Skip to content

Commit 89d65d2

Browse files
authored
[GH-2365] Modernize Sedona Python project by switching to pyproject.toml and uv (#2393)
1 parent 76efab9 commit 89d65d2

File tree

11 files changed

+236
-286
lines changed

11 files changed

+236
-286
lines changed

.github/workflows/pyflink.yml

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,17 @@ jobs:
5959
- uses: actions/setup-python@v5
6060
with:
6161
python-version: ${{ matrix.python }}
62-
- run: sudo apt-get -y install python3-pip python-dev-is-python3
62+
- name: Install uv
63+
uses: astral-sh/setup-uv@v6
6364
- run: mvn package -pl "org.apache.sedona:sedona-flink-shaded_2.12" -am -DskipTests
64-
- run: sudo pip3 install -U setuptools
65-
- run: sudo pip3 install -U wheel
66-
- run: sudo pip3 install -U virtualenvwrapper
67-
- run: python3 -m pip install uv
68-
- run: cd python
69-
- run: rm pyproject.toml
70-
- run: uv init --no-workspace
71-
- run: uv add apache-flink==1.20.1 shapely attr setuptools
72-
- run: uv add pytest --dev
73-
- run: |
74-
wget https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/1.8.0-33.1-rc1/geotools-wrapper-1.8.0-33.1-rc1.jar
65+
- name: Install python package + flink extra
66+
run: |
67+
cd python
68+
uv add apache-flink==1.20.1
69+
uv sync
70+
- name: Run PyFlink tests
71+
run: |
72+
wget -q https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/1.8.0-33.1-rc1/geotools-wrapper-1.8.0-33.1-rc1.jar
7573
export SEDONA_PYFLINK_EXTRA_JARS=${PWD}/$(find flink-shaded/target -name sedona-flink*.jar),${PWD}/geotools-wrapper-1.8.0-33.1-rc1.jar
76-
(cd python; PYTHONPATH=$(pwd) uv run pytest -v -s ./tests/flink)
74+
cd python
75+
PYTHONPATH=$(pwd) uv run pytest -v -s ./tests/flink

.github/workflows/python-extension.yml

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -56,46 +56,39 @@ jobs:
5656
defaults:
5757
run:
5858
shell: bash
59+
working-directory: python
5960
steps:
6061
- uses: actions/checkout@v4
6162
- uses: actions/setup-python@v5
6263
with:
6364
python-version: ${{ matrix.python }}
64-
- name: Install pipenv
65-
run: pip install -U pipenv
66-
- name: Install dependencies
65+
- name: Install uv
66+
uses: astral-sh/setup-uv@v6
67+
- name: Install dependencies (dev)
6768
run: |
68-
cd python
69-
if [[ "$RUNNER_OS" == "Windows" ]]; then
70-
PYTHON_EXE_PATH="$pythonLocation/python.exe"
71-
else
72-
PYTHON_EXE_PATH="$pythonLocation/python"
73-
fi
74-
echo "Using Python executable at: $PYTHON_EXE_PATH"
75-
pipenv install --dev --python "$PYTHON_EXE_PATH"
76-
- name: Build extension
69+
uv sync
70+
- name: Build extension (explicit)
7771
run: |
78-
cd python
79-
pipenv run python setup.py build_ext --inplace
72+
uv pip install -e .
8073
- name: Run tests
8174
run: |
82-
cd python
83-
pipenv run pytest tests/utils/test_geomserde_speedup.py
84-
- name: Run tests on Shapely 2.0
75+
uv run pytest tests/utils/test_geomserde_speedup.py
76+
- name: Install pip
77+
run: uv pip install pip
78+
- name: Run tests on Shapely 2
8579
run: |
86-
cd python
87-
pipenv install shapely~=2.0
88-
pipenv run pytest tests/utils/test_geomserde_speedup.py
89-
- name: Run tests on Shapley 1.7
90-
# Shapely 1.7 only provides wheels for cp36 ~ cp39, so we'll skip running
91-
# this test for recent python versions.
80+
uv add "shapely~=2.0"
81+
uv run pytest tests/utils/test_geomserde_speedup.py
82+
- name: Run tests on Shapely 1.8
83+
run: |
84+
uv add "shapely~=1.8"
85+
uv run pytest tests/utils/test_geomserde_speedup.py
86+
- name: Run tests on Shapely 1.7
9287
if: ${{ matrix.python == '3.9' || matrix.python == '3.8' }}
9388
run: |
94-
cd python
95-
pipenv install shapely~=1.7
96-
pipenv run pytest tests/utils/test_geomserde_speedup.py
89+
uv add "shapely==1.7.1"
90+
uv run pytest tests/utils/test_geomserde_speedup.py
9791
- name: Install from sdist
9892
run: |
99-
cd python
100-
pipenv run python setup.py sdist
101-
pipenv run python -m pip install dist/*sedona-*.tar.gz
93+
uv build
94+
uv pip install dist/*sedona-*.tar.gz --force-reinstall

.github/workflows/python.yml

Lines changed: 43 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,6 @@ jobs:
9999
python: '3.8'
100100
shapely: '1'
101101

102-
env:
103-
VENV_PATH: /home/runner/.local/share/virtualenvs/python-${{ matrix.python }}
104102
steps:
105103
- uses: actions/checkout@v4
106104
- uses: actions/setup-java@v4
@@ -110,6 +108,8 @@ jobs:
110108
- uses: actions/setup-python@v5
111109
with:
112110
python-version: ${{ matrix.python }}
111+
- name: Install uv
112+
uses: astral-sh/setup-uv@v6
113113
- name: Cache Maven packages
114114
uses: actions/cache@v4
115115
with:
@@ -122,71 +122,65 @@ jobs:
122122
run: |
123123
SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3}
124124
mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dgeotools
125-
- run: sudo apt-get -y install python3-pip python-dev-is-python3
126-
- run: sudo pip3 install -U setuptools
127-
- run: sudo pip3 install -U wheel
128-
- run: sudo pip3 install -U virtualenvwrapper
129-
- run: python3 -m pip install pipenv
130-
- run: cd python; python3 setup.py build_ext --inplace
131-
- env:
125+
- name: Setup Python build environment
126+
env:
132127
SPARK_VERSION: ${{ matrix.spark }}
133-
PYTHON_VERSION: ${{ matrix.python }}
134128
SHAPELY_VERSION: ${{ matrix.shapely }}
135-
PANDAS_VERSION: ${{ matrix.pandas }}
136129
run: |
137130
cd python
131+
132+
# Conditional shapely version adjustments
138133
if [ "${SHAPELY_VERSION}" == "1" ]; then
139-
echo "Patching Pipfile to use Shapely 1.x"
140-
sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile
134+
uv add "shapely<2.0.0"
141135
fi
142-
if [ "${PANDAS_VERSION}" == "1" ]; then
143-
echo "Patching Pipfile to use Pandas 1.x"
144-
sed -i 's/^pandas.*$/pandas="<2.0.0"/g' Pipfile
136+
137+
if [ "${SPARK_VERSION:0:1}" == "4" ]; then
138+
# Spark 4.0 requires Python 3.9+, and we remove flink since it conflicts with pyspark 4.0
139+
uv remove apache-flink --optional flink
140+
uv add "pyspark==4.0.0; python_version >= '3.9'"
141+
else
142+
# Install specific pyspark version matching matrix
143+
uv add pyspark==${SPARK_VERSION}
145144
fi
146-
export PIPENV_CUSTOM_VENV_NAME=python-${PYTHON_VERSION}
147-
pipenv --python ${PYTHON_VERSION}
148-
pipenv install pyspark==${SPARK_VERSION}
149-
pipenv install --dev
150-
pipenv graph
151-
- env:
152-
PYTHON_VERSION: ${{ matrix.python }}
145+
146+
uv sync
147+
148+
uv run python -c "import pyspark,sys;print('Using pyspark', pyspark.__version__)"
149+
- name: Install sedona package
150+
run: cd python; uv pip install -e .
151+
- name: Prepare Sedona Spark Dependencies
153152
run: |
154153
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar
155154
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar
156155
wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar
157-
mv -v jai_core-${JAI_CORE_VERSION}.jar ${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars
158-
mv -v jai_codec-${JAI_CODEC_VERSION}.jar ${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars
159-
mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar ${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars
160-
- env:
161-
PYTHON_VERSION: ${{ matrix.python }}
162-
run: find spark-shaded/target -name sedona-*.jar -exec cp {} ${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \;
156+
157+
PY_SITE=$(cd python; uv run python -c "import site; print(site.getsitepackages()[0])")
158+
echo "Python site-packages: $PY_SITE"
159+
mv -v jai_core-${JAI_CORE_VERSION}.jar ${PY_SITE}/pyspark/jars
160+
mv -v jai_codec-${JAI_CODEC_VERSION}.jar ${PY_SITE}/pyspark/jars
161+
mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar ${PY_SITE}/pyspark/jars
162+
- name: Copy Sedona Spark JARs
163+
run: |
164+
PY_SITE=$(cd python; uv run python -c "import site; print(site.getsitepackages()[0])")
165+
find spark-shaded/target -name sedona-*.jar -exec cp {} ${PY_SITE}/pyspark/jars/ \;
163166
- name: Run tests
164-
env:
165-
PYTHON_VERSION: ${{ matrix.python }}
166167
run: |
167-
export SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark
168168
cd python
169-
source ${VENV_PATH}/bin/activate
170-
pytest -v tests
169+
export SPARK_HOME=$(uv run python -c "import site; print(site.getsitepackages()[0]+'/pyspark')")
170+
uv run pytest -v tests
171171
- name: Run basic tests without rasterio
172-
env:
173-
PYTHON_VERSION: ${{ matrix.python }}
174172
run: |
175-
export SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark
176173
cd python
177-
source ${VENV_PATH}/bin/activate
178-
pip uninstall -y rasterio
179-
pytest -v tests/core/test_rdd.py tests/sql/test_dataframe_api.py
174+
export SPARK_HOME=$(uv run python -c "import site; print(site.getsitepackages()[0]+'/pyspark')")
175+
uv remove rasterio --optional all
176+
uv remove rasterio --dev
177+
uv sync
178+
uv run pytest -v tests/core/test_rdd.py tests/sql/test_dataframe_api.py
180179
- name: Run Spark Connect tests
181-
env:
182-
PYTHON_VERSION: ${{ matrix.python }}
183-
SPARK_VERSION: ${{ matrix.spark }}
184180
if: ${{ matrix.spark >= '3.4.0' }}
185181
run: |
186-
export SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark
187-
export SPARK_REMOTE=local
188-
189182
cd python
190-
source ${VENV_PATH}/bin/activate
191-
pip install "pyspark[connect]==${SPARK_VERSION}"
192-
pytest -v tests/sql/test_dataframe_api.py
183+
export SPARK_REMOTE=local
184+
export SPARK_HOME=$(uv run python -c "import site; print(site.getsitepackages()[0]+'/pyspark')")
185+
uv pip install "pyspark[connect]==${{ matrix.spark }}" --reinstall
186+
uv run pytest -v tests/sql/test_dataframe_api.py

docs/community/develop.md

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,25 +138,47 @@ If you want to test changes with different Spark/Scala versions, you can select
138138

139139
We recommend [PyCharm](https://www.jetbrains.com/pycharm/).
140140

141-
### Run Python tests
141+
### Run tests
142142

143143
#### Run all Python tests
144144

145145
To run all Python test cases, follow steps mentioned [here](../setup/compile.md#run-python-test).
146146

147-
#### Run all Python tests in a single test file
147+
Once the environment is set up, you can run all tests using the following command in python directory:
148148

149-
To run a particular Python test file, specify the path of the `.py` file to `pipenv`.
150-
151-
For example, to run all tests in `test_function.py` located in `python/tests/sql/`, use: `pipenv run pytest tests/sql/test_function.py`.
149+
```bash
150+
cd python
151+
uv run pytest -v tests
152+
```
152153

153154
#### Run a single test
154155

156+
To run a particular Python test file, specify the path of the `.py`.
157+
158+
For example, to run all tests in `test_function.py` located in `python/tests/sql/`, use:
159+
160+
```bash
161+
cd python
162+
uv run pytest -v tests/sql/test_function.py
163+
```
164+
155165
To run a particular test in a particular `.py` test file, specify `file_name::class_name::test_name` to the `pytest` command.
156166

157-
For example, to run the test on `ST_Contains` function located in `sql/test_predicate.py`, use: `pipenv run pytest tests/sql/test_predicate.py::TestPredicate::test_st_contains`
167+
For example, to run the test on `ST_Contains` function located in `sql/test_predicate.py`, use:
158168

159-
### Import the project
169+
```bash
170+
cd python
171+
uv run pytest -v tests/sql/test_predicate.py::TestPredicate::test_st_contains
172+
```
173+
174+
### Build packages
175+
176+
The following command will build the sdist and whl packages in the `dist` folder.
177+
178+
```bash
179+
cd python
180+
uv build
181+
```
160182

161183
## R developers
162184

docs/setup/compile.md

Lines changed: 24 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -77,27 +77,20 @@ Sedona uses GitHub Actions to automatically generate jars per commit. You can go
7777

7878
## Run Python test
7979

80-
1) Set up the environment variable SPARK_HOME and PYTHONPATH
80+
1) Set up Spark (download if needed) and environment variables
8181

82-
For example,
83-
84-
```
85-
export SPARK_VERSION=3.4.0
86-
export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop3
87-
export PYTHONPATH=$SPARK_HOME/python
88-
```
89-
90-
2) Install Spark if you haven't already
91-
92-
```
82+
```bash
83+
export SPARK_VERSION=3.4.0 # or another supported version
9384
wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz
9485
tar -xvzf spark-${SPARK_VERSION}-bin-hadoop3.tgz
9586
rm spark-${SPARK_VERSION}-bin-hadoop3.tgz
87+
export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop3
88+
export PYTHONPATH=$SPARK_HOME/python
9689
```
9790

98-
3) Put JAI jars to ==SPARK_HOME/jars/== folder.
91+
2) Add required JAI jars into $SPARK_HOME/jars
9992

100-
```
93+
```bash
10194
export JAI_CORE_VERSION="1.1.3"
10295
export JAI_CODEC_VERSION="1.1.3"
10396
export JAI_IMAGEIO_VERSION="1.1"
@@ -106,52 +99,38 @@ wget -P $SPARK_HOME/jars/ https://repo.osgeo.org/repository/release/javax/media/
10699
wget -P $SPARK_HOME/jars/ https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar
107100
```
108101

109-
4) Compile the Sedona Scala and Java code with `-Dgeotools` and then copy the ==sedona-spark-shaded-{{ sedona.current_version }}.jar== to ==SPARK_HOME/jars/== folder.
102+
3) Build Sedona Scala/Java jars with GeoTools shaded (from repo root)
110103

111-
```
104+
```bash
105+
mvn clean install -DskipTests -Dgeotools
112106
cp spark-shaded/target/sedona-spark-shaded-*.jar $SPARK_HOME/jars/
113107
```
114108

115-
5) Install the following libraries
109+
4) Setup Python development environment
116110

117-
```
118-
sudo apt-get -y install python3-pip python-dev libgeos-dev
119-
sudo pip3 install -U setuptools
120-
sudo pip3 install -U wheel
121-
sudo pip3 install -U virtualenvwrapper
122-
sudo pip3 install -U pipenv
123-
```
124-
125-
Homebrew can be used to install libgeos-dev in macOS:
126-
127-
```
128-
brew install geos
129-
```
111+
The Python package uses `pyproject.toml` (PEP 517/518) with setuptools as the build backend. We recommend using [uv](https://docs.astral.sh/uv/) to manage virtual environments and dependencies.
130112

131-
6) Set up pipenv to the desired Python version: 3.8, 3.9, or 3.10
132-
133-
```
113+
```bash
134114
cd python
135-
pipenv --python 3.8
115+
python -m pip install --upgrade uv
116+
uv venv --python 3.10 # or any supported version (>=3.8)
136117
```
137118

138-
7) Install the PySpark version and the other dependency
119+
5) Install the PySpark version and the other dependency
139120

140-
```
121+
```bash
141122
cd python
142-
pipenv install pyspark==${SPARK_VERSION}
143-
pipenv install --dev
123+
# Use the correct PySpark version, otherwise latest version will be installed
124+
uv add pyspark==${SPARK_VERSION} --optional spark
125+
uv sync
144126
```
145127

146-
`pipenv install pyspark` installs the latest version of pyspark.
147-
In order to remain consistent with the installed spark version, use `pipenv install pyspark==<spark_version>`
128+
6) Install Sedona (editable) and run the Python tests
148129

149-
8) Run the Python tests
150-
151-
```
130+
```bash
152131
cd python
153-
pipenv run python setup.py build_ext --inplace
154-
pipenv run pytest tests
132+
uv pip install -e .
133+
uv run pytest -v tests
155134
```
156135

157136
## Compile the documentation

docs/setup/install-python.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ Clone Sedona GitHub source code and run the following command
4545

4646
```bash
4747
cd python
48-
python3 setup.py install
48+
python3 -m pip install .
4949
```
5050

5151
### Prepare sedona-spark jar

0 commit comments

Comments
 (0)