Skip to content

Commit

Permalink
Working in pyodide! (#753)
Browse files Browse the repository at this point in the history
- Add feature flags to the python distribution to remove async APIs and
tokio code.
- minimal pyodide docs
- Add CI test that we can build in pyodide
- Update pyo3-arrow to 0.2


![image](https://github.com/user-attachments/assets/b0e68162-0f01-4ec5-be8e-18978834a9f8)
  • Loading branch information
kylebarron committed Sep 3, 2024
1 parent d00dfde commit e09f4af
Show file tree
Hide file tree
Showing 26 changed files with 365 additions and 212 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,33 @@ jobs:
- name: Test
run: cargo test

emscripten:
name: Build pyodide wheel
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: pip install pyodide-build
- name: Get Emscripten and Python version info
shell: bash
run: |
echo EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version) >> $GITHUB_ENV
echo PYTHON_VERSION=$(pyodide config get python_version | cut -d '.' -f 1-2) >> $GITHUB_ENV
pip uninstall -y pyodide-build
- uses: mymindstorm/setup-emsdk@v14
with:
version: ${{ env.EMSCRIPTEN_VERSION }}
actions-cache-folder: emsdk-cache
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- run: pip install pyodide-build
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: wasm32-unknown-emscripten
args: --no-default-features --out dist -m python/core/Cargo.toml
rust-toolchain: nightly

# lint-python:
# name: Lint Python code
# runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
emsdk
vcpkg
vcpkg_installed
.pyodide*
Expand Down
1 change: 1 addition & 0 deletions python/core/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
emsdk/
12 changes: 3 additions & 9 deletions python/core/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 32 additions & 13 deletions python/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,45 +16,64 @@ rust-version = "1.80"
name = "_rust"
crate-type = ["cdylib"]

[features]
default = ["async", "libc", "rayon"]
async = [
"dep:futures",
"dep:object_store",
"parquet/object_store",
"dep:pyo3-asyncio-0-21",
"geoarrow/flatgeobuf_async",
"geoarrow/parquet_async",
"geoarrow/postgis",
"dep:sqlx",
"dep:tokio",
]
libc = ["geoarrow/polylabel"]
rayon = ["geoarrow/rayon"]

[dependencies]
arrow = "52"
arrow-array = "52"
arrow-buffer = "52"
bytes = "1"
flatgeobuf = { version = "4.2.0", default-features = false }
futures = "0.3"
object_store = { version = "0.10", features = ["aws", "azure", "gcp", "http"] }
parquet = { version = "52", features = ["object_store"] }
futures = { version = "0.3", optional = true }
object_store = { version = "0.10", features = [
"aws",
"azure",
"gcp",
"http",
], optional = true }
parquet = "52"
pyo3 = { version = "0.21.0", features = [
"abi3-py38",
"multiple-pymethods",
"hashbrown",
"serde",
"anyhow",
] }
pyo3-arrow = { git = "https://github.com/kylebarron/arro3", rev = "d0d737a03c141ff316e3e354d85828edb42338d4" }
pyo3-asyncio-0-21 = { version = "0.21", features = ["tokio-runtime"] }
pyo3-arrow = "0.2"
pyo3-asyncio-0-21 = { version = "0.21", features = [
"tokio-runtime",
], optional = true }
pythonize = "0.21"
geo = "0.28"
geoarrow = { path = "../../", features = [
"csv",
"flatgeobuf_async",
"flatgeobuf",
"geozero",
"ipc_compression",
"parquet_async",
"parquet_compression",
"parquet",
"polylabel",
"postgis",
"rayon",
] }
geozero = { version = "0.13", features = ["with-svg"] }
numpy = "0.21"
serde_json = "1"
sqlx = { version = "0.7", default-features = false, features = ["postgres"] }
sqlx = { version = "0.7", default-features = false, features = [
"postgres",
], optional = true }
thiserror = "1"
tokio = { version = "1.9", features = ["rt"] }
tokio = { version = "1.9", features = ["rt"], optional = true }
url = "2.5"

# reqwest is pulled in by object store, but not used by python binding itself
Expand Down
42 changes: 42 additions & 0 deletions python/core/DEVELOP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
## Pyodide


Install rust nightly and add wasm toolchain

```
rustup toolchain install nightly
rustup target add --toolchain nightly wasm32-unknown-emscripten
```

Install dependencies. You need to set the `pyodide-build` version to the same version as the `pyodide` release you distribute for.

```
pip install -U maturin
pip install pyodide-build
```

Install emsdk.

```
git clone https://github.com/emscripten-core/emsdk.git
cd emsdk
PYODIDE_EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version)
./emsdk install ${PYODIDE_EMSCRIPTEN_VERSION}
./emsdk activate ${PYODIDE_EMSCRIPTEN_VERSION}
source emsdk_env.sh
cd ..
```

- The `RUSTFLAGS` is temporary to get around this compiler bug.
- You must use rust nightly
- You must use `--no-default-features` to remove any async support. `tokio` does not compile for emscripten.

```bash
RUSTFLAGS='-Zinline-mir=no' /
RUSTUP_TOOLCHAIN=nightly /
maturin build /
--no-default-features /
--release /
-o dist /
--target wasm32-unknown-emscripten
```
10 changes: 6 additions & 4 deletions python/core/src/algorithm/geo/area.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::Arc;

use crate::error::PyGeoArrowResult;
use crate::ffi::from_python::AnyGeometryInput;
use geoarrow::algorithm::geo::{Area, ChamberlainDuquetteArea, GeodesicArea};
Expand Down Expand Up @@ -38,7 +40,7 @@ pub fn area(py: Python, input: AnyGeometryInput, method: AreaMethod) -> PyGeoArr
AreaMethod::Euclidean => arr.as_ref().unsigned_area()?,
AreaMethod::Geodesic => arr.as_ref().geodesic_area_unsigned()?,
};
Ok(PyArray::from_array(out).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
}
AnyGeometryInput::Chunked(arr) => {
let out = match method {
Expand All @@ -48,7 +50,7 @@ pub fn area(py: Python, input: AnyGeometryInput, method: AreaMethod) -> PyGeoArr
AreaMethod::Euclidean => arr.as_ref().unsigned_area()?,
AreaMethod::Geodesic => arr.as_ref().geodesic_area_unsigned()?,
};
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
}
}
}
Expand All @@ -72,7 +74,7 @@ pub fn signed_area(
AreaMethod::Euclidean => arr.as_ref().signed_area()?,
AreaMethod::Geodesic => arr.as_ref().geodesic_area_signed()?,
};
Ok(PyArray::from_array(out).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
}
AnyGeometryInput::Chunked(arr) => {
let out = match method {
Expand All @@ -82,7 +84,7 @@ pub fn signed_area(
AreaMethod::Euclidean => arr.as_ref().signed_area()?,
AreaMethod::Geodesic => arr.as_ref().geodesic_area_signed()?,
};
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
}
}
}
6 changes: 4 additions & 2 deletions python/core/src/algorithm/geo/dimensions.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::Arc;

use crate::error::PyGeoArrowResult;
use crate::ffi::from_python::AnyGeometryInput;
use geoarrow::algorithm::geo::HasDimensions;
Expand All @@ -9,11 +11,11 @@ pub fn is_empty(py: Python, input: AnyGeometryInput) -> PyGeoArrowResult<PyObjec
match input {
AnyGeometryInput::Array(arr) => {
let out = HasDimensions::is_empty(&arr.as_ref())?;
Ok(PyArray::from_array(out).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
}
AnyGeometryInput::Chunked(arr) => {
let out = HasDimensions::is_empty(&arr.as_ref())?;
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
}
}
}
10 changes: 6 additions & 4 deletions python/core/src/algorithm/geo/frechet_distance.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::Arc;

use crate::error::PyGeoArrowResult;
use crate::ffi::from_python::input::AnyGeometryBroadcastInput;
use crate::ffi::from_python::AnyGeometryInput;
Expand All @@ -15,21 +17,21 @@ pub fn frechet_distance(
match (input, other) {
(AnyGeometryInput::Array(left), AnyGeometryBroadcastInput::Array(right)) => {
let result = FrechetDistance::frechet_distance(&left.as_ref(), &right.as_ref())?;
Ok(PyArray::from_array(result).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(result)).to_arro3(py)?)
}
(AnyGeometryInput::Chunked(left), AnyGeometryBroadcastInput::Chunked(right)) => {
let result = FrechetDistance::frechet_distance(&left.as_ref(), &right.as_ref())?;
Ok(PyChunkedArray::from_arrays(result.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(result.chunk_refs())?.to_arro3(py)?)
}
(AnyGeometryInput::Array(left), AnyGeometryBroadcastInput::Scalar(right)) => {
let scalar = right.to_geo_line_string()?;
let result = FrechetDistanceLineString::frechet_distance(&left.as_ref(), &scalar)?;
Ok(PyArray::from_array(result).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(result)).to_arro3(py)?)
}
(AnyGeometryInput::Chunked(left), AnyGeometryBroadcastInput::Scalar(right)) => {
let scalar = right.to_geo_line_string()?;
let result = FrechetDistanceLineString::frechet_distance(&left.as_ref(), &scalar)?;
Ok(PyChunkedArray::from_arrays(result.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(result.chunk_refs())?.to_arro3(py)?)
}
_ => Err(PyValueError::new_err("Unsupported input types.").into()),
}
Expand Down
6 changes: 4 additions & 2 deletions python/core/src/algorithm/geo/geodesic_area.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::Arc;

use crate::error::PyGeoArrowResult;
use crate::ffi::from_python::AnyGeometryInput;
use geoarrow::algorithm::geo::GeodesicArea;
Expand All @@ -9,11 +11,11 @@ pub fn geodesic_perimeter(py: Python, input: AnyGeometryInput) -> PyGeoArrowResu
match input {
AnyGeometryInput::Array(arr) => {
let out = arr.as_ref().geodesic_perimeter()?;
Ok(PyArray::from_array(out).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
}
AnyGeometryInput::Chunked(arr) => {
let out = arr.as_ref().geodesic_perimeter()?;
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
}
}
}
6 changes: 4 additions & 2 deletions python/core/src/algorithm/geo/length.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::Arc;

use crate::error::PyGeoArrowResult;
use crate::ffi::from_python::AnyGeometryInput;
use geoarrow::algorithm::geo::{EuclideanLength, GeodesicLength, HaversineLength, VincentyLength};
Expand Down Expand Up @@ -43,7 +45,7 @@ pub fn length(
LengthMethod::Haversine => arr.as_ref().haversine_length()?,
LengthMethod::Vincenty => arr.as_ref().vincenty_length()?,
};
Ok(PyArray::from_array(out).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
}
AnyGeometryInput::Chunked(arr) => {
let out = match method {
Expand All @@ -52,7 +54,7 @@ pub fn length(
LengthMethod::Haversine => arr.as_ref().haversine_length()?,
LengthMethod::Vincenty => arr.as_ref().vincenty_length()?,
};
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
}
}
}
10 changes: 6 additions & 4 deletions python/core/src/algorithm/geo/line_locate_point.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::Arc;

use crate::error::PyGeoArrowResult;
use crate::ffi::from_python::input::AnyGeometryBroadcastInput;
use crate::ffi::from_python::AnyGeometryInput;
Expand All @@ -15,21 +17,21 @@ pub fn line_locate_point(
match (input, point) {
(AnyGeometryInput::Array(arr), AnyGeometryBroadcastInput::Array(point)) => {
let result = LineLocatePoint::line_locate_point(&arr.as_ref(), point.as_ref())?;
Ok(PyArray::from_array(result).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(result)).to_arro3(py)?)
}
(AnyGeometryInput::Chunked(arr), AnyGeometryBroadcastInput::Chunked(point)) => {
let result = LineLocatePoint::line_locate_point(&arr.as_ref(), point.as_ref())?;
Ok(PyChunkedArray::from_arrays(result.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(result.chunk_refs())?.to_arro3(py)?)
}
(AnyGeometryInput::Array(arr), AnyGeometryBroadcastInput::Scalar(point)) => {
let scalar = point.to_geo_point()?;
let result = LineLocatePointScalar::line_locate_point(&arr.as_ref(), &scalar)?;
Ok(PyArray::from_array(result).to_arro3(py)?)
Ok(PyArray::from_array_ref(Arc::new(result)).to_arro3(py)?)
}
(AnyGeometryInput::Chunked(arr), AnyGeometryBroadcastInput::Scalar(point)) => {
let scalar = point.to_geo_point()?;
let result = LineLocatePointScalar::line_locate_point(&arr.as_ref(), &scalar)?;
Ok(PyChunkedArray::from_arrays(result.chunks())?.to_arro3(py)?)
Ok(PyChunkedArray::from_array_refs(result.chunk_refs())?.to_arro3(py)?)
}
_ => Err(PyValueError::new_err("Unsupported input types.").into()),
}
Expand Down
2 changes: 2 additions & 0 deletions python/core/src/algorithm/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pub mod geo;
pub mod native;

#[cfg(feature = "libc")]
pub mod polylabel;
Loading

0 comments on commit e09f4af

Please sign in to comment.