From 6db20312c97c744e6e3b2e3b7847edca86a43e04 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Fri, 1 Mar 2024 00:40:29 -0500 Subject: [PATCH] Zero-copy as_chunked --- .../src/algorithm/geo/frechet_distance.rs | 35 +++++++++++++------ src/algorithm/native/as_chunked.rs | 28 +++++++++++++++ src/algorithm/native/mod.rs | 2 ++ src/chunked_array/chunked_array.rs | 14 ++++++++ 4 files changed, 69 insertions(+), 10 deletions(-) create mode 100644 src/algorithm/native/as_chunked.rs diff --git a/python/core/src/algorithm/geo/frechet_distance.rs b/python/core/src/algorithm/geo/frechet_distance.rs index 8eec26639..37b74b5b2 100644 --- a/python/core/src/algorithm/geo/frechet_distance.rs +++ b/python/core/src/algorithm/geo/frechet_distance.rs @@ -4,6 +4,7 @@ use crate::error::PyGeoArrowResult; use crate::ffi::from_python::input::AnyGeometryBroadcastInput; use crate::ffi::from_python::AnyGeometryInput; use geoarrow::algorithm::geo::{FrechetDistance, FrechetDistanceLineString}; +use geoarrow::algorithm::native::as_chunked_geometry_array; use geoarrow::array::{AsChunkedGeometryArray, AsGeometryArray}; use geoarrow::datatypes::GeoDataType; use geoarrow::io::geo::geometry_to_geo; @@ -16,16 +17,6 @@ pub fn frechet_distance( other: AnyGeometryBroadcastInput, ) -> PyGeoArrowResult { match (input, other) { - (AnyGeometryInput::Array(left), AnyGeometryBroadcastInput::Array(right)) => { - let result = FrechetDistance::frechet_distance(&left.as_ref(), &right.as_ref())?; - let result = Float64Array::from(result); - Python::with_gil(|py| Ok(result.into_py(py))) - } - (AnyGeometryInput::Chunked(left), AnyGeometryBroadcastInput::Chunked(right)) => { - let result = FrechetDistance::frechet_distance(&left.as_ref(), &right.as_ref())?; - let result = ChunkedFloat64Array::from(result); - Python::with_gil(|py| Ok(result.into_py(py))) - } (AnyGeometryInput::Array(left), AnyGeometryBroadcastInput::Scalar(right)) => { let scalar = geo::LineString::try_from(geometry_to_geo(&right.0)) .map_err(|_| PyValueError::new_err("Expected type LineString"))?; @@ -33,6 +24,18 @@ pub fn frechet_distance( let result = Float64Array::from(result); Python::with_gil(|py| Ok(result.into_py(py))) } + (AnyGeometryInput::Array(left), AnyGeometryBroadcastInput::Array(right)) => { + let result = FrechetDistance::frechet_distance(&left.as_ref(), &right.as_ref())?; + let result = Float64Array::from(result); + Python::with_gil(|py| Ok(result.into_py(py))) + } + // TODO: Unknown whether this should be supported. I like "array in, array out". + // (AnyGeometryInput::Array(left), AnyGeometryBroadcastInput::Chunked(right)) => { + // let left_chunked = as_chunked_geometry_array(&left.as_ref(), &right.chunk_lengths())?; + // let result = FrechetDistance::frechet_distance(&left_chunked.as_ref(), &right.as_ref())?; + // let result = Float64Array::from(result); + // Python::with_gil(|py| Ok(result.into_py(py))) + // } (AnyGeometryInput::Chunked(left), AnyGeometryBroadcastInput::Scalar(right)) => { let scalar = geo::LineString::try_from(geometry_to_geo(&right.0)) .map_err(|_| PyValueError::new_err("Expected type LineString"))?; @@ -40,6 +43,18 @@ pub fn frechet_distance( let result = ChunkedFloat64Array::from(result); Python::with_gil(|py| Ok(result.into_py(py))) } + (AnyGeometryInput::Chunked(left), AnyGeometryBroadcastInput::Array(right)) => { + let right_chunked = as_chunked_geometry_array(right.as_ref(), &left.chunk_lengths())?; + let result = + FrechetDistance::frechet_distance(&left.as_ref(), &right_chunked.as_ref())?; + let result = ChunkedFloat64Array::from(result); + Python::with_gil(|py| Ok(result.into_py(py))) + } + (AnyGeometryInput::Chunked(left), AnyGeometryBroadcastInput::Chunked(right)) => { + let result = FrechetDistance::frechet_distance(&left.as_ref(), &right.as_ref())?; + let result = ChunkedFloat64Array::from(result); + Python::with_gil(|py| Ok(result.into_py(py))) + } _ => Err(PyValueError::new_err("Unsupported input types.").into()), } } diff --git a/src/algorithm/native/as_chunked.rs b/src/algorithm/native/as_chunked.rs new file mode 100644 index 000000000..1a00abc63 --- /dev/null +++ b/src/algorithm/native/as_chunked.rs @@ -0,0 +1,28 @@ +use std::sync::Arc; + +use crate::chunked_array::{from_arrow_chunks, ChunkedGeometryArrayTrait}; +use crate::error::Result; +use crate::GeometryArrayTrait; + +// TODO: don't go through Arc +// Update geometry array trait to put slice on the main trait +// Put slice() on each individual array directly, and delegate to it from geom trait +pub fn as_chunked_geometry_array( + array: &dyn GeometryArrayTrait, + chunk_lengths: &[usize], +) -> Result> { + assert_eq!(array.len(), chunk_lengths.iter().sum::()); + + let mut new_chunks = Vec::with_capacity(chunk_lengths.len()); + let mut offset = 0; + for length in chunk_lengths { + new_chunks.push(array.to_array_ref()); + offset += length; + } + + let array_refs = new_chunks + .iter() + .map(|arr| arr.as_ref()) + .collect::>(); + from_arrow_chunks(array_refs.as_slice(), array.extension_field().as_ref()) +} diff --git a/src/algorithm/native/mod.rs b/src/algorithm/native/mod.rs index f3b6d7c29..5a25c6bd3 100644 --- a/src/algorithm/native/mod.rs +++ b/src/algorithm/native/mod.rs @@ -3,6 +3,7 @@ //! Where possible, operations on scalars are implemented in terms of [geometry //! traits](../../geo_traits). +mod as_chunked; mod binary; pub mod bounding_rect; mod cast; @@ -17,6 +18,7 @@ mod total_bounds; pub(crate) mod type_id; mod unary; +pub use as_chunked::as_chunked_geometry_array; pub use binary::Binary; pub use cast::Cast; pub use concatenate::Concatenate; diff --git a/src/chunked_array/chunked_array.rs b/src/chunked_array/chunked_array.rs index b600f0dbc..e655a71c0 100644 --- a/src/chunked_array/chunked_array.rs +++ b/src/chunked_array/chunked_array.rs @@ -232,6 +232,8 @@ pub trait ChunkedGeometryArrayTrait: std::fmt::Debug + Send + Sync { /// The number of chunks in this chunked array. fn num_chunks(&self) -> usize; + + fn chunk_lengths(&self) -> Vec; } impl ChunkedGeometryArrayTrait for ChunkedPointArray { @@ -256,6 +258,10 @@ impl ChunkedGeometryArrayTrait for ChunkedPointArray { fn num_chunks(&self) -> usize { self.chunks.len() } + + fn chunk_lengths(&self) -> Vec { + self.chunks.iter().map(|chunk| chunk.len()).collect() + } } macro_rules! impl_trait { @@ -282,6 +288,10 @@ macro_rules! impl_trait { fn num_chunks(&self) -> usize { self.chunks.len() } + + fn chunk_lengths(&self) -> Vec { + self.chunks.iter().map(|chunk| chunk.len()).collect() + } } }; } @@ -317,6 +327,10 @@ impl ChunkedGeometryArrayTrait for ChunkedRectArray { fn num_chunks(&self) -> usize { self.chunks.len() } + + fn chunk_lengths(&self) -> Vec { + self.chunks.iter().map(|chunk| chunk.len()).collect() + } } /// Construct