Skip to content

Commit

Permalink
Chunked WKT and WKB parsing (#761)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed Sep 5, 2024
1 parent 7351452 commit 3397434
Show file tree
Hide file tree
Showing 10 changed files with 250 additions and 35 deletions.
48 changes: 46 additions & 2 deletions python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ except ImportError:

from .enums import (
AreaMethod,
CoordType,
LengthMethod,
RotateOrigin,
SimplifyMethod,
Expand All @@ -37,6 +38,7 @@ from .types import (
AffineTransform,
AreaMethodT,
BroadcastGeometry,
CoordTypeT,
GeoInterfaceProtocol,
LengthMethodT,
NumpyArrayProtocolf64,
Expand Down Expand Up @@ -945,7 +947,23 @@ def from_shapely(input, *, crs: Any | None = None) -> GeometryArray:
A GeoArrow array
"""

def from_wkb(input: ArrowArrayExportable) -> GeometryArray:
@overload
def from_wkb(
input: ArrowArrayExportable,
*,
coord_type: CoordType | CoordTypeT = CoordType.Interleaved,
) -> GeometryArray: ...
@overload
def from_wkb(
input: ArrowStreamExportable,
*,
coord_type: CoordType | CoordTypeT = CoordType.Interleaved,
) -> ChunkedGeometryArray: ...
def from_wkb(
input: ArrowArrayExportable | ArrowStreamExportable,
*,
coord_type: CoordType | CoordTypeT = CoordType.Interleaved,
) -> GeometryArray | ChunkedGeometryArray:
"""
Parse an Arrow BinaryArray from WKB to its GeoArrow-native counterpart.
Expand All @@ -954,17 +972,39 @@ def from_wkb(input: ArrowArrayExportable) -> GeometryArray:
Args:
input: An Arrow array of Binary type holding WKB-formatted geometries.
Other args:
coord_type: Specify the coordinate type of the generated GeoArrow data.
Returns:
A GeoArrow-native geometry array
"""

def from_wkt(input: ArrowArrayExportable) -> GeometryArray:
@overload
def from_wkt(
input: ArrowArrayExportable,
*,
coord_type: CoordType | CoordTypeT = CoordType.Interleaved,
) -> GeometryArray: ...
@overload
def from_wkt(
input: ArrowStreamExportable,
*,
coord_type: CoordType | CoordTypeT = CoordType.Interleaved,
) -> ChunkedGeometryArray: ...
def from_wkt(
input: ArrowArrayExportable | ArrowStreamExportable,
*,
coord_type: CoordType | CoordTypeT = CoordType.Interleaved,
) -> GeometryArray | ChunkedGeometryArray:
"""
Parse an Arrow StringArray from WKT to its GeoArrow-native counterpart.
Args:
input: An Arrow array of string type holding WKT-formatted geometries.
Other args:
coord_type: Specify the coordinate type of the generated GeoArrow data.
Returns:
A GeoArrow-native geometry array
"""
Expand Down Expand Up @@ -997,6 +1037,10 @@ def to_shapely(
numpy array with Shapely objects
"""

@overload
def to_wkb(input: ArrowArrayExportable) -> GeometryArray: ...
@overload
def to_wkb(input: ArrowStreamExportable) -> ChunkedGeometryArray: ...
def to_wkb(input: ArrowArrayExportable) -> GeometryArray:
"""
Encode a GeoArrow-native geometry array to a WKBArray, holding ISO-formatted WKB geometries.
Expand Down
14 changes: 14 additions & 0 deletions python/geoarrow-core/python/geoarrow/rust/core/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,20 @@ class AreaMethod(StrEnum):
"""


class CoordType(StrEnum):
Interleaved = auto()
"""Interleaved coordinate layout.
All coordinates are stored in a single buffer, as `XYXYXY`.
"""

Separated = auto()
"""Separated coordinate layout.
Coordinates are stored in a separate buffer per dimension, e.g. `XXXX` and `YYYY`.
"""


class LengthMethod(StrEnum):
Ellipsoidal = auto()
"""Determine the length of a geometry on an ellipsoidal model of the earth.
Expand Down
3 changes: 3 additions & 0 deletions python/geoarrow-core/python/geoarrow/rust/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
[`signed_area`][geoarrow.rust.core.signed_area].
"""

CoordTypeT = Literal["interleaved", "separated"]
"""Acceptable coord_type strings.
"""

LengthMethodT = Literal["ellipsoidal", "euclidean", "haversine", "vincenty"]
"""Acceptable strings to be passed into the `method` parameter for
Expand Down
29 changes: 29 additions & 0 deletions python/geoarrow-core/src/coord_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use geoarrow::array::CoordType;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;

#[derive(Debug, Clone, Copy)]
pub enum PyCoordType {
Interleaved,
Separated,
}

impl<'a> FromPyObject<'a> for PyCoordType {
fn extract(ob: &'a PyAny) -> PyResult<Self> {
let s: String = ob.extract()?;
match s.to_lowercase().as_str() {
"interleaved" => Ok(Self::Interleaved),
"separated" => Ok(Self::Separated),
_ => Err(PyValueError::new_err("Unexpected coord type")),
}
}
}

impl From<PyCoordType> for CoordType {
fn from(value: PyCoordType) -> Self {
match value {
PyCoordType::Interleaved => Self::Interleaved,
PyCoordType::Separated => Self::Separated,
}
}
}
44 changes: 34 additions & 10 deletions python/geoarrow-core/src/interop/wkb.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,36 @@
use std::sync::Arc;

use geoarrow::array::{AsGeometryArray, CoordType, GeometryArrayDyn};
use geoarrow::array::{AsChunkedGeometryArray, AsGeometryArray, GeometryArrayDyn};
use geoarrow::chunked_array::ChunkedGeometryArrayTrait;
use geoarrow::datatypes::GeoDataType;
use geoarrow::error::GeoArrowError;
use geoarrow::io::wkb::{to_wkb as _to_wkb, FromWKB};
use geoarrow::io::wkb::{to_wkb as _to_wkb, FromWKB, ToWKB};
use geoarrow::GeometryArrayTrait;
use pyo3::prelude::*;

use crate::array::*;
use crate::coord_type::PyCoordType;
use crate::error::PyGeoArrowResult;
use crate::ffi::from_python::AnyGeometryInput;
use crate::ffi::to_python::geometry_array_to_pyobject;
use crate::ffi::to_python::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject};

#[pyfunction]
pub fn from_wkb(py: Python, input: AnyGeometryInput) -> PyGeoArrowResult<PyObject> {
#[pyo3(
signature = (input, *, coord_type = PyCoordType::Interleaved),
text_signature = "(input, *, method = 'interleaved')")
]
pub fn from_wkb(
py: Python,
input: AnyGeometryInput,
coord_type: PyCoordType,
) -> PyGeoArrowResult<PyObject> {
let coord_type = coord_type.into();
match input {
AnyGeometryInput::Array(arr) => {
let geo_array: Arc<dyn GeometryArrayTrait> = match arr.0.data_type() {
GeoDataType::WKB => {
FromWKB::from_wkb(arr.as_ref().as_wkb(), CoordType::Interleaved)?
}
GeoDataType::WKB => FromWKB::from_wkb(arr.as_ref().as_wkb(), coord_type)?,
GeoDataType::LargeWKB => {
FromWKB::from_wkb(arr.as_ref().as_large_wkb(), CoordType::Interleaved)?
FromWKB::from_wkb(arr.as_ref().as_large_wkb(), coord_type)?
}
other => {
return Err(GeoArrowError::IncorrectType(
Expand All @@ -32,7 +41,19 @@ pub fn from_wkb(py: Python, input: AnyGeometryInput) -> PyGeoArrowResult<PyObjec
};
geometry_array_to_pyobject(py, geo_array)
}
AnyGeometryInput::Chunked(_) => todo!(),
AnyGeometryInput::Chunked(s) => {
let geo_array: Arc<dyn ChunkedGeometryArrayTrait> = match s.0.data_type() {
GeoDataType::WKB => FromWKB::from_wkb(s.as_ref().as_wkb(), coord_type)?,
GeoDataType::LargeWKB => FromWKB::from_wkb(s.as_ref().as_large_wkb(), coord_type)?,
other => {
return Err(GeoArrowError::IncorrectType(
format!("Unexpected array type {:?}", other).into(),
)
.into())
}
};
chunked_geometry_array_to_pyobject(py, geo_array)
}
}
}

Expand All @@ -43,6 +64,9 @@ pub fn to_wkb(py: Python, input: AnyGeometryInput) -> PyGeoArrowResult<PyObject>
_to_wkb::<i32>(arr.as_ref()),
)))
.into_py(py)),
AnyGeometryInput::Chunked(_) => todo!(),
AnyGeometryInput::Chunked(s) => {
let out = s.0.as_ref().to_wkb::<i32>();
chunked_geometry_array_to_pyobject(py, Arc::new(out))
}
}
}
95 changes: 72 additions & 23 deletions python/geoarrow-core/src/interop/wkt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,84 @@ use std::sync::Arc;

use arrow::datatypes::DataType;
use arrow_array::cast::AsArray;
use geoarrow::array::CoordType;
use geoarrow::array::metadata::ArrayMetadata;
use geoarrow::array::MixedGeometryArray;
use geoarrow::chunked_array::{ChunkedArray, ChunkedMixedGeometryArray};
use geoarrow::io::geozero::FromWKT;
use geoarrow::GeometryArrayTrait;
use pyo3::exceptions::PyTypeError;
use pyo3::prelude::*;
use pyo3_arrow::PyArray;
use pyo3_arrow::input::AnyArray;

use crate::coord_type::PyCoordType;
use crate::error::PyGeoArrowResult;
use crate::ffi::to_python::geometry_array_to_pyobject;
use crate::ffi::to_python::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject};

#[pyfunction]
pub fn from_wkt(py: Python, input: PyArray) -> PyGeoArrowResult<PyObject> {
let (array, _field) = input.into_inner();
let geo_array: Arc<dyn GeometryArrayTrait> = match array.data_type() {
DataType::Utf8 => FromWKT::from_wkt(
array.as_string::<i32>(),
CoordType::Interleaved,
Default::default(),
false,
)?,
DataType::LargeUtf8 => FromWKT::from_wkt(
array.as_string::<i64>(),
CoordType::Interleaved,
Default::default(),
false,
)?,
other => {
return Err(PyTypeError::new_err(format!("Unexpected array type {:?}", other)).into())
#[pyo3(
signature = (input, *, coord_type = PyCoordType::Interleaved),
text_signature = "(input, *, method = 'interleaved')")
]
pub fn from_wkt(
py: Python,
input: AnyArray,
coord_type: PyCoordType,
) -> PyGeoArrowResult<PyObject> {
let coord_type = coord_type.into();
match input {
AnyArray::Array(arr) => {
let (array, field) = arr.into_inner();
let metadata = Arc::new(ArrayMetadata::try_from(field.as_ref())?);
let geo_array: MixedGeometryArray<i32, 2> = match array.data_type() {
DataType::Utf8 => {
FromWKT::from_wkt(array.as_string::<i32>(), coord_type, metadata, false)?
}
DataType::LargeUtf8 => {
FromWKT::from_wkt(array.as_string::<i64>(), coord_type, metadata, false)?
}
other => {
return Err(
PyTypeError::new_err(format!("Unexpected array type {:?}", other)).into(),
)
}
};
geometry_array_to_pyobject(py, Arc::new(geo_array))
}
};
geometry_array_to_pyobject(py, geo_array)
AnyArray::Stream(s) => {
let chunked_arr = s.into_chunked_array()?;
let (chunks, field) = chunked_arr.into_inner();
let metadata = Arc::new(ArrayMetadata::try_from(field.as_ref())?);
let geo_array: ChunkedMixedGeometryArray<i32, 2> = match field.data_type() {
DataType::Utf8 => {
let string_chunks = chunks
.iter()
.map(|chunk| chunk.as_string::<i32>().clone())
.collect::<Vec<_>>();
FromWKT::from_wkt(
&ChunkedArray::new(string_chunks),
coord_type,
metadata,
false,
)?
}
DataType::LargeUtf8 => {
let string_chunks = chunks
.iter()
.map(|chunk| chunk.as_string::<i64>().clone())
.collect::<Vec<_>>();
FromWKT::from_wkt(
&ChunkedArray::new(string_chunks),
coord_type,
metadata,
false,
)?
}
other => {
return Err(
PyTypeError::new_err(format!("Unexpected array type {:?}", other)).into(),
)
}
};
chunked_geometry_array_to_pyobject(py, Arc::new(geo_array))
}
}
}
1 change: 1 addition & 0 deletions python/geoarrow-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub mod algorithm;
pub mod array;
pub mod broadcasting;
pub mod chunked_array;
mod coord_type;
pub(crate) mod crs;
pub mod error;
pub mod ffi;
Expand Down
12 changes: 12 additions & 0 deletions python/tests/interop/test_wkb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pyarrow as pa
import shapely
from geoarrow.rust.core import from_shapely, from_wkb, to_shapely, to_wkb
from shapely.testing import assert_geometries_equal


def test_wkb_round_trip():
geoms = shapely.points([0, 1, 2, 3], [4, 5, 6, 7])
geo_arr = from_shapely(geoms)
wkb_arr = to_wkb(geo_arr)
assert pa.array(shapely.to_wkb(geoms, flavor="iso")) == pa.array(wkb_arr)
assert_geometries_equal(geoms, to_shapely(from_wkb(wkb_arr)))
27 changes: 27 additions & 0 deletions python/tests/interop/test_wkt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pyarrow as pa
import shapely
from geoarrow.rust.core import from_wkt, to_shapely
from shapely.testing import assert_geometries_equal


def test_from_wkt():
s = [
"POINT (3 2)",
"POINT (0 2)",
"POINT (1 4)",
"POINT (3 2)",
"POINT (0 2)",
"POINT (1 4)",
]
shapely_arr = shapely.from_wkt(s)
geo_arr = from_wkt(pa.array(s))
assert_geometries_equal(shapely_arr, to_shapely(geo_arr))


def test_from_wkt_chunked():
s1 = ["POINT (3 2)", "POINT (0 2)", "POINT (1 4)"]
s2 = ["POINT (3 2)", "POINT (0 2)", "POINT (1 4)"]
ca = pa.chunked_array([pa.array(s1), pa.array(s2)])
shapely_arr = shapely.from_wkt(s1 + s2)
geo_arr = from_wkt(ca)
assert_geometries_equal(shapely_arr, to_shapely(geo_arr))
Loading

0 comments on commit 3397434

Please sign in to comment.