From 107501c82dc00dc80fb1467c915ef605b8f74a51 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 6 Oct 2024 11:42:30 -0400 Subject: [PATCH 1/4] Binary operations simplification --- src/algorithm/geo/within.rs | 330 ++++++++------------------------- src/algorithm/native/binary.rs | 112 ++++++++++- src/algorithm/native/mod.rs | 2 +- src/array/rect/array.rs | 13 +- src/scalar/point/scalar.rs | 6 + 5 files changed, 211 insertions(+), 252 deletions(-) diff --git a/src/algorithm/geo/within.rs b/src/algorithm/geo/within.rs index ae542eab..eaeacaea 100644 --- a/src/algorithm/geo/within.rs +++ b/src/algorithm/geo/within.rs @@ -1,8 +1,12 @@ +use crate::algorithm::native::binary::try_binary_boolean_native_geometry; +use crate::algorithm::native::Unary; use crate::array::*; -use crate::scalar::*; -use crate::trait_::ArrayAccessor; +use crate::datatypes::{Dimension, NativeType}; +use crate::error::{GeoArrowError, Result}; +use crate::geo_traits::GeometryTrait; +use crate::io::geo::geometry_to_geo; +use crate::trait_::NativeGeometryAccessor; use crate::trait_::NativeScalar; -use arrow_array::builder::BooleanBuilder; use arrow_array::BooleanArray; use geo::Within as _Within; @@ -40,269 +44,99 @@ use geo::Within as _Within; /// ``` /// /// [DE-9IM]: https://en.wikipedia.org/wiki/DE-9IM -pub trait Within { - fn is_within(&self, b: &Other) -> BooleanArray; +pub trait Within<'a, Other> { + fn is_within(&'a self, b: &'a Other) -> Result; } -// ┌────────────────────────────────┐ -// │ Implementations for RHS arrays │ -// └────────────────────────────────┘ - -// Note: this implementation is outside the macro because it is not generic over O -impl Within for PointArray<2> { - fn is_within(&self, rhs: &Self) -> BooleanArray { - assert_eq!(self.len(), rhs.len()); - - let mut output_array = BooleanBuilder::with_capacity(self.len()); - - self.iter_geo() - .zip(rhs.iter_geo()) - .for_each(|(first, second)| match (first, second) { - (Some(first), Some(second)) => output_array.append_value(first.is_within(&second)), - _ => output_array.append_null(), - }); - - output_array.finish() - } -} - -// Implementation that iterates over geo objects macro_rules! iter_geo_impl { - ($first:ty, $second:ty) => { - impl<'a> Within<$second> for $first { - fn is_within(&self, rhs: &$second) -> BooleanArray { - assert_eq!(self.len(), rhs.len()); - - let mut output_array = BooleanBuilder::with_capacity(self.len()); - - self.iter_geo() - .zip(rhs.iter_geo()) - .for_each(|(first, second)| match (first, second) { - (Some(first), Some(second)) => { - output_array.append_value(first.is_within(&second)) - } - _ => output_array.append_null(), - }); - - output_array.finish() + ($array_type:ty) => { + impl<'a, R: NativeGeometryAccessor<'a, 2>> Within<'a, R> for $array_type { + fn is_within(&'a self, rhs: &'a R) -> Result { + try_binary_boolean_native_geometry(self, rhs, |l, r| { + Ok(l.to_geo().is_within(&r.to_geo())) + }) } } }; } -// Implementations on PointArray -iter_geo_impl!(PointArray<2>, LineStringArray<2>); -iter_geo_impl!(PointArray<2>, PolygonArray<2>); -iter_geo_impl!(PointArray<2>, MultiPointArray<2>); -iter_geo_impl!(PointArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(PointArray<2>, MultiPolygonArray<2>); - -// Implementations on LineStringArray -iter_geo_impl!(LineStringArray<2>, PointArray<2>); -iter_geo_impl!(LineStringArray<2>, LineStringArray<2>); -iter_geo_impl!(LineStringArray<2>, PolygonArray<2>); -iter_geo_impl!(LineStringArray<2>, MultiPointArray<2>); -iter_geo_impl!(LineStringArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(LineStringArray<2>, MultiPolygonArray<2>); - -// Implementations on PolygonArray -iter_geo_impl!(PolygonArray<2>, PointArray<2>); -iter_geo_impl!(PolygonArray<2>, LineStringArray<2>); -iter_geo_impl!(PolygonArray<2>, PolygonArray<2>); -iter_geo_impl!(PolygonArray<2>, MultiPointArray<2>); -iter_geo_impl!(PolygonArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(PolygonArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiPointArray -iter_geo_impl!(MultiPointArray<2>, PointArray<2>); -iter_geo_impl!(MultiPointArray<2>, LineStringArray<2>); -iter_geo_impl!(MultiPointArray<2>, PolygonArray<2>); -iter_geo_impl!(MultiPointArray<2>, MultiPointArray<2>); -iter_geo_impl!(MultiPointArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(MultiPointArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiLineStringArray -iter_geo_impl!(MultiLineStringArray<2>, PointArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, LineStringArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, PolygonArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, MultiPointArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiPolygonArray -iter_geo_impl!(MultiPolygonArray<2>, PointArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, LineStringArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, PolygonArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, MultiPointArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, MultiPolygonArray<2>); - -// ┌──────────────────────────────────────────┐ -// │ Implementations for RHS geoarrow scalars │ -// └──────────────────────────────────────────┘ - -// Note: this implementation is outside the macro because it is not generic over O -impl<'a> Within> for PointArray<2> { - fn is_within(&self, rhs: &Point<'a, 2>) -> BooleanArray { - let mut output_array = BooleanBuilder::with_capacity(self.len()); - - self.iter_geo().for_each(|maybe_point| { - let output = maybe_point.map(|point| point.is_within(&rhs.to_geo())); - output_array.append_option(output) - }); - - output_array.finish() +iter_geo_impl!(PointArray<2>); +iter_geo_impl!(LineStringArray<2>); +iter_geo_impl!(PolygonArray<2>); +iter_geo_impl!(MultiPointArray<2>); +iter_geo_impl!(MultiLineStringArray<2>); +iter_geo_impl!(MultiPolygonArray<2>); +iter_geo_impl!(MixedGeometryArray<2>); +iter_geo_impl!(GeometryCollectionArray<2>); +iter_geo_impl!(RectArray<2>); + +impl<'a, R: NativeGeometryAccessor<'a, 2>> Within<'a, R> for &dyn NativeArray { + fn is_within(&'a self, rhs: &'a R) -> Result { + use Dimension::*; + use NativeType::*; + + match self.data_type() { + Point(_, XY) => Within::is_within(self.as_point::<2>(), rhs), + LineString(_, XY) => Within::is_within(self.as_line_string::<2>(), rhs), + Polygon(_, XY) => Within::is_within(self.as_polygon::<2>(), rhs), + MultiPoint(_, XY) => Within::is_within(self.as_multi_point::<2>(), rhs), + MultiLineString(_, XY) => Within::is_within(self.as_multi_line_string::<2>(), rhs), + MultiPolygon(_, XY) => Within::is_within(self.as_multi_polygon::<2>(), rhs), + Mixed(_, XY) => Within::is_within(self.as_mixed::<2>(), rhs), + GeometryCollection(_, XY) => Within::is_within(self.as_geometry_collection::<2>(), rhs), + Rect(XY) => Within::is_within(self.as_rect::<2>(), rhs), + _ => Err(GeoArrowError::IncorrectType("".into())), + } } } -/// Implementation that iterates over geo objects -macro_rules! iter_geo_impl_geoarrow_scalar { - ($first:ty, $second:ty) => { - impl<'a> Within<$second> for $first { - fn is_within(&self, rhs: &$second) -> BooleanArray { - let mut output_array = BooleanBuilder::with_capacity(self.len()); - let rhs_geo = rhs.to_geo(); - - self.iter_geo().for_each(|maybe_geom| { - let output = maybe_geom.map(|geom| geom.is_within(&rhs_geo)); - output_array.append_option(output) - }); - - output_array.finish() - } - } - }; +pub trait WithinScalar<'a, G: GeometryTrait> { + fn is_within(&'a self, b: &'a G) -> Result; } -// Implementations on PointArray -iter_geo_impl_geoarrow_scalar!(PointArray<2>, LineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PointArray<2>, Polygon<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PointArray<2>, MultiPoint<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PointArray<2>, MultiLineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PointArray<2>, MultiPolygon<'a, 2>); - -// Implementations on LineStringArray -iter_geo_impl_geoarrow_scalar!(LineStringArray<2>, Point<'a, 2>); -iter_geo_impl_geoarrow_scalar!(LineStringArray<2>, LineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(LineStringArray<2>, Polygon<'a, 2>); -iter_geo_impl_geoarrow_scalar!(LineStringArray<2>, MultiPoint<'a, 2>); -iter_geo_impl_geoarrow_scalar!(LineStringArray<2>, MultiLineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(LineStringArray<2>, MultiPolygon<'a, 2>); - -// Implementations on PolygonArray -iter_geo_impl_geoarrow_scalar!(PolygonArray<2>, Point<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PolygonArray<2>, LineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PolygonArray<2>, Polygon<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PolygonArray<2>, MultiPoint<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PolygonArray<2>, MultiLineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(PolygonArray<2>, MultiPolygon<'a, 2>); - -// Implementations on MultiPointArray -iter_geo_impl_geoarrow_scalar!(MultiPointArray<2>, Point<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPointArray<2>, LineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPointArray<2>, Polygon<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPointArray<2>, MultiPoint<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPointArray<2>, MultiLineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPointArray<2>, MultiPolygon<'a, 2>); - -// Implementations on MultiLineStringArray -iter_geo_impl_geoarrow_scalar!(MultiLineStringArray<2>, Point<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiLineStringArray<2>, LineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiLineStringArray<2>, Polygon<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiLineStringArray<2>, MultiPoint<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiLineStringArray<2>, MultiLineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiLineStringArray<2>, MultiPolygon<'a, 2>); - -// Implementations on MultiPolygonArray -iter_geo_impl_geoarrow_scalar!(MultiPolygonArray<2>, Point<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPolygonArray<2>, LineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPolygonArray<2>, Polygon<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPolygonArray<2>, MultiPoint<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPolygonArray<2>, MultiLineString<'a, 2>); -iter_geo_impl_geoarrow_scalar!(MultiPolygonArray<2>, MultiPolygon<'a, 2>); - -// ┌─────────────────────────────────────┐ -// │ Implementations for RHS geo scalars │ -// └─────────────────────────────────────┘ - -macro_rules! non_generic_iter_geo_impl_geo_scalar { - ($first:ty, $second:ty) => { - impl<'a> Within<$second> for $first { - fn is_within(&self, rhs: &$second) -> BooleanArray { - let mut output_array = BooleanBuilder::with_capacity(self.len()); - - self.iter_geo().for_each(|maybe_geom| { - let output = maybe_geom.map(|geom| geom.is_within(rhs)); - output_array.append_option(output) - }); - - output_array.finish() +macro_rules! scalar_impl { + ($array_type:ty) => { + impl<'a, G: GeometryTrait> WithinScalar<'a, G> for $array_type { + fn is_within(&'a self, rhs: &'a G) -> Result { + let right = geometry_to_geo(rhs); + self.try_unary_boolean(|left| { + Ok::<_, GeoArrowError>(left.to_geo().is_within(&right)) + }) } } }; } -// Implementations on PointArray -non_generic_iter_geo_impl_geo_scalar!(PointArray<2>, geo::Point); -non_generic_iter_geo_impl_geo_scalar!(PointArray<2>, geo::LineString); -non_generic_iter_geo_impl_geo_scalar!(PointArray<2>, geo::Polygon); -non_generic_iter_geo_impl_geo_scalar!(PointArray<2>, geo::MultiPoint); -non_generic_iter_geo_impl_geo_scalar!(PointArray<2>, geo::MultiLineString); -non_generic_iter_geo_impl_geo_scalar!(PointArray<2>, geo::MultiPolygon); - -macro_rules! iter_geo_impl_geo_scalar { - ($first:ty, $second:ty) => { - impl<'a> Within<$second> for $first { - fn is_within(&self, rhs: &$second) -> BooleanArray { - let mut output_array = BooleanBuilder::with_capacity(self.len()); - - self.iter_geo().for_each(|maybe_geom| { - let output = maybe_geom.map(|geom| geom.is_within(rhs)); - output_array.append_option(output) - }); - - output_array.finish() +scalar_impl!(PointArray<2>); +scalar_impl!(LineStringArray<2>); +scalar_impl!(PolygonArray<2>); +scalar_impl!(MultiPointArray<2>); +scalar_impl!(MultiLineStringArray<2>); +scalar_impl!(MultiPolygonArray<2>); +scalar_impl!(MixedGeometryArray<2>); +scalar_impl!(GeometryCollectionArray<2>); +scalar_impl!(RectArray<2>); + +impl<'a, G: GeometryTrait> WithinScalar<'a, G> for &dyn NativeArray { + fn is_within(&'a self, rhs: &'a G) -> Result { + use Dimension::*; + use NativeType::*; + + match self.data_type() { + Point(_, XY) => WithinScalar::is_within(self.as_point::<2>(), rhs), + LineString(_, XY) => WithinScalar::is_within(self.as_line_string::<2>(), rhs), + Polygon(_, XY) => WithinScalar::is_within(self.as_polygon::<2>(), rhs), + MultiPoint(_, XY) => WithinScalar::is_within(self.as_multi_point::<2>(), rhs), + MultiLineString(_, XY) => { + WithinScalar::is_within(self.as_multi_line_string::<2>(), rhs) } + MultiPolygon(_, XY) => WithinScalar::is_within(self.as_multi_polygon::<2>(), rhs), + Mixed(_, XY) => WithinScalar::is_within(self.as_mixed::<2>(), rhs), + GeometryCollection(_, XY) => { + WithinScalar::is_within(self.as_geometry_collection::<2>(), rhs) + } + Rect(XY) => WithinScalar::is_within(self.as_rect::<2>(), rhs), + _ => Err(GeoArrowError::IncorrectType("".into())), } - }; + } } - -// Implementations on LineStringArray -iter_geo_impl_geo_scalar!(LineStringArray<2>, geo::Point); -iter_geo_impl_geo_scalar!(LineStringArray<2>, geo::LineString); -iter_geo_impl_geo_scalar!(LineStringArray<2>, geo::Polygon); -iter_geo_impl_geo_scalar!(LineStringArray<2>, geo::MultiPoint); -iter_geo_impl_geo_scalar!(LineStringArray<2>, geo::MultiLineString); -iter_geo_impl_geo_scalar!(LineStringArray<2>, geo::MultiPolygon); - -// Implementations on PolygonArray -iter_geo_impl_geo_scalar!(PolygonArray<2>, geo::Point); -iter_geo_impl_geo_scalar!(PolygonArray<2>, geo::LineString); -iter_geo_impl_geo_scalar!(PolygonArray<2>, geo::Polygon); -iter_geo_impl_geo_scalar!(PolygonArray<2>, geo::MultiPoint); -iter_geo_impl_geo_scalar!(PolygonArray<2>, geo::MultiLineString); -iter_geo_impl_geo_scalar!(PolygonArray<2>, geo::MultiPolygon); - -// Implementations on MultiPointArray -iter_geo_impl_geo_scalar!(MultiPointArray<2>, geo::Point); -iter_geo_impl_geo_scalar!(MultiPointArray<2>, geo::LineString); -iter_geo_impl_geo_scalar!(MultiPointArray<2>, geo::Polygon); -iter_geo_impl_geo_scalar!(MultiPointArray<2>, geo::MultiPoint); -iter_geo_impl_geo_scalar!(MultiPointArray<2>, geo::MultiLineString); -iter_geo_impl_geo_scalar!(MultiPointArray<2>, geo::MultiPolygon); - -// Implementations on MultiLineStringArray -iter_geo_impl_geo_scalar!(MultiLineStringArray<2>, geo::Point); -iter_geo_impl_geo_scalar!(MultiLineStringArray<2>, geo::LineString); -iter_geo_impl_geo_scalar!(MultiLineStringArray<2>, geo::Polygon); -iter_geo_impl_geo_scalar!(MultiLineStringArray<2>, geo::MultiPoint); -iter_geo_impl_geo_scalar!(MultiLineStringArray<2>, geo::MultiLineString); -iter_geo_impl_geo_scalar!(MultiLineStringArray<2>, geo::MultiPolygon); - -// Implementations on MultiPolygonArray -iter_geo_impl_geo_scalar!(MultiPolygonArray<2>, geo::Point); -iter_geo_impl_geo_scalar!(MultiPolygonArray<2>, geo::LineString); -iter_geo_impl_geo_scalar!(MultiPolygonArray<2>, geo::Polygon); -iter_geo_impl_geo_scalar!(MultiPolygonArray<2>, geo::MultiPoint); -iter_geo_impl_geo_scalar!(MultiPolygonArray<2>, geo::MultiLineString); -iter_geo_impl_geo_scalar!(MultiPolygonArray<2>, geo::MultiPolygon); diff --git a/src/algorithm/native/binary.rs b/src/algorithm/native/binary.rs index 53c44607..2954025f 100644 --- a/src/algorithm/native/binary.rs +++ b/src/algorithm/native/binary.rs @@ -7,7 +7,7 @@ use arrow_data::ArrayData; use crate::array::*; use crate::error::{GeoArrowError, Result}; -use crate::trait_::ArrayAccessor; +use crate::trait_::{ArrayAccessor, NativeGeometryAccessor}; pub trait Binary<'a, Rhs: ArrayAccessor<'a> = Self>: ArrayAccessor<'a> { fn binary_boolean(&'a self, rhs: &'a Rhs, op: F) -> Result @@ -205,3 +205,113 @@ impl<'a> Binary<'a, GeometryCollectionArray<2>> for MultiLineStringArray<2> {} impl<'a> Binary<'a, GeometryCollectionArray<2>> for MultiPolygonArray<2> {} impl<'a> Binary<'a, GeometryCollectionArray<2>> for MixedGeometryArray<2> {} impl<'a> Binary<'a, GeometryCollectionArray<2>> for GeometryCollectionArray<2> {} + +pub(crate) fn try_binary_boolean_native_geometry<'a, const D: usize, L, R, F>( + lhs: &'a L, + rhs: &'a R, + op: F, +) -> Result +where + L: NativeGeometryAccessor<'a, D>, + R: NativeGeometryAccessor<'a, D>, + F: Fn(crate::scalar::Geometry<'a, D>, crate::scalar::Geometry<'a, D>) -> Result, +{ + if lhs.len() != rhs.len() { + return Err(GeoArrowError::General( + "Cannot perform binary operation on arrays of different length".to_string(), + )); + } + + if lhs.is_empty() { + return Ok(BooleanBuilder::new().finish()); + } + let len = lhs.len(); + + if lhs.null_count() == 0 && rhs.null_count() == 0 { + let mut builder = BooleanBufferBuilder::new(len); + for idx in 0..len { + let (left, right) = unsafe { + ( + lhs.value_as_geometry_unchecked(idx), + rhs.value_as_geometry_unchecked(idx), + ) + }; + builder.append(op(left, right)?); + } + Ok(BooleanArray::new(builder.finish(), None)) + } else { + let nulls = NullBuffer::union(lhs.nulls(), rhs.nulls()).unwrap(); + + let mut buffer = BooleanBufferBuilder::new(len); + buffer.append_n(len, false); + + nulls.try_for_each_valid_idx(|idx| { + let (left, right) = unsafe { + ( + lhs.value_as_geometry_unchecked(idx), + rhs.value_as_geometry_unchecked(idx), + ) + }; + buffer.set_bit(idx, op(left, right)?); + Ok::<_, GeoArrowError>(()) + })?; + + Ok(BooleanArray::new(buffer.finish(), Some(nulls))) + } +} + +pub(crate) fn try_binary_primitive_native_geometry<'a, const D: usize, L, R, F, O>( + lhs: &'a L, + rhs: &'a R, + op: F, +) -> Result> +where + L: NativeGeometryAccessor<'a, D>, + R: NativeGeometryAccessor<'a, D>, + O: ArrowPrimitiveType, + F: Fn(crate::scalar::Geometry<'a, D>, crate::scalar::Geometry<'a, D>) -> Result, +{ + if lhs.len() != rhs.len() { + return Err(GeoArrowError::General( + "Cannot perform binary operation on arrays of different length".to_string(), + )); + } + + if lhs.is_empty() { + return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE))); + } + + let len = lhs.len(); + + if lhs.null_count() == 0 && rhs.null_count() == 0 { + let mut buffer = MutableBuffer::new(len * O::Native::get_byte_width()); + for idx in 0..len { + unsafe { + buffer.push_unchecked(op( + lhs.value_as_geometry_unchecked(idx), + rhs.value_as_geometry_unchecked(idx), + )?); + }; + } + Ok(PrimitiveArray::new(buffer.into(), None)) + } else { + let nulls = NullBuffer::union(lhs.nulls(), rhs.nulls()).unwrap(); + + let mut buffer = BufferBuilder::::new(len); + buffer.append_n_zeroed(len); + let slice = buffer.as_slice_mut(); + + nulls.try_for_each_valid_idx(|idx| { + unsafe { + *slice.get_unchecked_mut(idx) = op( + lhs.value_as_geometry_unchecked(idx), + rhs.value_as_geometry_unchecked(idx), + )? + }; + Ok::<_, GeoArrowError>(()) + })?; + + let values = buffer.finish().into(); + Ok(PrimitiveArray::new(values, Some(nulls))) + } +} diff --git a/src/algorithm/native/mod.rs b/src/algorithm/native/mod.rs index 9509e355..0a6e09cc 100644 --- a/src/algorithm/native/mod.rs +++ b/src/algorithm/native/mod.rs @@ -3,7 +3,7 @@ //! Where possible, operations on scalars are implemented in terms of [geometry //! traits](../../geo_traits). -mod binary; +pub(crate) mod binary; pub mod bounding_rect; mod cast; mod concatenate; diff --git a/src/array/rect/array.rs b/src/array/rect/array.rs index b7af5a06..2b5eeae7 100644 --- a/src/array/rect/array.rs +++ b/src/array/rect/array.rs @@ -12,8 +12,8 @@ use crate::array::{CoordBuffer, CoordType, SeparatedCoordBuffer}; use crate::datatypes::{rect_fields, NativeType}; use crate::error::GeoArrowError; use crate::geo_traits::RectTrait; -use crate::scalar::Rect; -use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow}; +use crate::scalar::{Geometry, Rect}; +use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow, NativeGeometryAccessor}; use crate::util::owned_slice_validity; use crate::{ArrayBase, NativeArray}; @@ -178,6 +178,15 @@ impl GeometryArraySelfMethods for RectArray { } } +impl<'a, const D: usize> NativeGeometryAccessor<'a, D> for RectArray { + unsafe fn value_as_geometry_unchecked( + &'a self, + index: usize, + ) -> crate::scalar::Geometry<'a, D> { + Geometry::Rect(Rect::new(&self.lower, &self.upper, index)) + } +} + impl<'a, const D: usize> ArrayAccessor<'a> for RectArray { type Item = Rect<'a, D>; type ItemGeo = geo::Rect; diff --git a/src/scalar/point/scalar.rs b/src/scalar/point/scalar.rs index bd41a6b3..81be8c04 100644 --- a/src/scalar/point/scalar.rs +++ b/src/scalar/point/scalar.rs @@ -139,6 +139,12 @@ impl From> for geo::Geometry { } } +impl From<&Point<'_, D>> for geo::Geometry { + fn from(value: &Point<'_, D>) -> Self { + geo::Geometry::Point(value.into()) + } +} + impl RTreeObject for Point<'_, D> { type Envelope = AABB<[f64; 2]>; From bcdbb8cecebc80a6beab05dd8ade1dccbef997ee Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 6 Oct 2024 12:02:35 -0400 Subject: [PATCH 2/4] Update euclidean distance --- src/algorithm/geo/euclidean_distance.rs | 272 +++++++++--------------- 1 file changed, 104 insertions(+), 168 deletions(-) diff --git a/src/algorithm/geo/euclidean_distance.rs b/src/algorithm/geo/euclidean_distance.rs index 32f8c1ca..cb127e15 100644 --- a/src/algorithm/geo/euclidean_distance.rs +++ b/src/algorithm/geo/euclidean_distance.rs @@ -1,12 +1,16 @@ +use crate::algorithm::native::binary::try_binary_primitive_native_geometry; +use crate::algorithm::native::Unary; use crate::array::*; -use crate::scalar::*; -use crate::trait_::ArrayAccessor; +use crate::datatypes::{Dimension, NativeType}; +use crate::error::{GeoArrowError, Result}; +use crate::geo_traits::GeometryTrait; +use crate::io::geo::geometry_to_geo; +use crate::trait_::NativeGeometryAccessor; use crate::trait_::NativeScalar; -use arrow_array::builder::Float64Builder; use arrow_array::Float64Array; use geo::EuclideanDistance as _EuclideanDistance; -pub trait EuclideanDistance { +pub trait EuclideanDistance<'a, Rhs> { /// Returns the distance between two geometries /// /// If a `Point` is contained by a `Polygon`, the distance is `0.0` @@ -84,184 +88,116 @@ pub trait EuclideanDistance { /// /// assert_relative_eq!(distance, 1.1313708498984762); /// ``` - fn euclidean_distance(&self, rhs: &Rhs) -> Float64Array; + fn euclidean_distance(&'a self, rhs: &'a Rhs) -> Result; } -// ┌────────────────────────────────┐ -// │ Implementations for RHS arrays │ -// └────────────────────────────────┘ - -// Note: this implementation is outside the macro because it is not generic over O -impl EuclideanDistance> for PointArray<2> { - /// Minimum distance between two Points - fn euclidean_distance(&self, other: &PointArray<2>) -> Float64Array { - assert_eq!(self.len(), other.len()); - let mut output_array = Float64Builder::with_capacity(self.len()); - - self.iter_geo() - .zip(other.iter_geo()) - .for_each(|(first, second)| match (first, second) { - (Some(first), Some(second)) => { - output_array.append_value(first.euclidean_distance(&second)) - } - _ => output_array.append_null(), - }); - - output_array.finish() - } -} - -/// Implementation that iterates over geo objects macro_rules! iter_geo_impl { - ($first:ty, $second:ty) => { - impl<'a> EuclideanDistance<$second> for $first { - fn euclidean_distance(&self, other: &$second) -> Float64Array { - assert_eq!(self.len(), other.len()); - let mut output_array = Float64Builder::with_capacity(self.len()); - - self.iter_geo() - .zip(other.iter_geo()) - .for_each(|(first, second)| match (first, second) { - (Some(first), Some(second)) => { - output_array.append_value(first.euclidean_distance(&second)) - } - _ => output_array.append_null(), - }); - - output_array.finish() + ($array_type:ty) => { + impl<'a, R: NativeGeometryAccessor<'a, 2>> EuclideanDistance<'a, R> for $array_type { + fn euclidean_distance(&'a self, rhs: &'a R) -> Result { + try_binary_primitive_native_geometry(self, rhs, |l, r| { + Ok(l.to_geo().euclidean_distance(&r.to_geo())) + }) } } }; } -// Implementations on PointArray -iter_geo_impl!(PointArray<2>, LineStringArray<2>); -iter_geo_impl!(PointArray<2>, PolygonArray<2>); -iter_geo_impl!(PointArray<2>, MultiPointArray<2>); -iter_geo_impl!(PointArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(PointArray<2>, MultiPolygonArray<2>); - -// Implementations on LineStringArray -iter_geo_impl!(LineStringArray<2>, PointArray<2>); -iter_geo_impl!(LineStringArray<2>, LineStringArray<2>); -iter_geo_impl!(LineStringArray<2>, PolygonArray<2>); -// iter_geo_impl!(LineStringArray<2>, MultiPointArray<2>); -// iter_geo_impl!(LineStringArray<2>, MultiLineStringArray<2>); -// iter_geo_impl!(LineStringArray<2>, MultiPolygonArray<2>); - -// Implementations on PolygonArray -iter_geo_impl!(PolygonArray<2>, PointArray<2>); -iter_geo_impl!(PolygonArray<2>, LineStringArray<2>); -iter_geo_impl!(PolygonArray<2>, PolygonArray<2>); -// iter_geo_impl!(PolygonArray<2>, MultiPointArray<2>); -// iter_geo_impl!(PolygonArray<2>, MultiLineStringArray<2>); -// iter_geo_impl!(PolygonArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiPointArray -iter_geo_impl!(MultiPointArray<2>, PointArray<2>); -// iter_geo_impl!(MultiPointArray<2>, LineStringArray<2>); -// iter_geo_impl!(MultiPointArray<2>, PolygonArray<2>); -// iter_geo_impl!(MultiPointArray<2>, MultiPointArray<2>); -// iter_geo_impl!(MultiPointArray<2>, MultiLineStringArray<2>); -// iter_geo_impl!(MultiPointArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiLineStringArray -iter_geo_impl!(MultiLineStringArray<2>, PointArray<2>); -// iter_geo_impl!(MultiLineStringArray<2>, LineStringArray<2>); -// iter_geo_impl!(MultiLineStringArray<2>, PolygonArray<2>); -// iter_geo_impl!(MultiLineStringArray<2>, MultiPointArray<2>); -// iter_geo_impl!(MultiLineStringArray<2>, MultiLineStringArray<2>); -// iter_geo_impl!(MultiLineStringArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiPolygonArray -iter_geo_impl!(MultiPolygonArray<2>, PointArray<2>); -// iter_geo_impl!(MultiPolygonArray<2>, LineStringArray<2>); -// iter_geo_impl!(MultiPolygonArray<2>, PolygonArray<2>); -// iter_geo_impl!(MultiPolygonArray<2>, MultiPointArray<2>); -// iter_geo_impl!(MultiPolygonArray<2>, MultiLineStringArray<2>); -// iter_geo_impl!(MultiPolygonArray<2>, MultiPolygonArray<2>); - -// ┌─────────────────────────────────┐ -// │ Implementations for RHS scalars │ -// └─────────────────────────────────┘ - -// Note: this implementation is outside the macro because it is not generic over O -impl<'a> EuclideanDistance> for PointArray<2> { - /// Minimum distance between two Points - fn euclidean_distance(&self, other: &Point<'a, 2>) -> Float64Array { - let mut output_array = Float64Builder::with_capacity(self.len()); - - self.iter_geo().for_each(|maybe_point| { - let output = maybe_point.map(|point| point.euclidean_distance(&other.to_geo())); - output_array.append_option(output) - }); - - output_array.finish() +iter_geo_impl!(PointArray<2>); +iter_geo_impl!(LineStringArray<2>); +iter_geo_impl!(PolygonArray<2>); +iter_geo_impl!(MultiPointArray<2>); +iter_geo_impl!(MultiLineStringArray<2>); +iter_geo_impl!(MultiPolygonArray<2>); +iter_geo_impl!(MixedGeometryArray<2>); +iter_geo_impl!(GeometryCollectionArray<2>); +iter_geo_impl!(RectArray<2>); + +impl<'a, R: NativeGeometryAccessor<'a, 2>> EuclideanDistance<'a, R> for &dyn NativeArray { + fn euclidean_distance(&'a self, rhs: &'a R) -> Result { + use Dimension::*; + use NativeType::*; + + match self.data_type() { + Point(_, XY) => EuclideanDistance::euclidean_distance(self.as_point::<2>(), rhs), + LineString(_, XY) => { + EuclideanDistance::euclidean_distance(self.as_line_string::<2>(), rhs) + } + Polygon(_, XY) => EuclideanDistance::euclidean_distance(self.as_polygon::<2>(), rhs), + MultiPoint(_, XY) => { + EuclideanDistance::euclidean_distance(self.as_multi_point::<2>(), rhs) + } + MultiLineString(_, XY) => { + EuclideanDistance::euclidean_distance(self.as_multi_line_string::<2>(), rhs) + } + MultiPolygon(_, XY) => { + EuclideanDistance::euclidean_distance(self.as_multi_polygon::<2>(), rhs) + } + Mixed(_, XY) => EuclideanDistance::euclidean_distance(self.as_mixed::<2>(), rhs), + GeometryCollection(_, XY) => { + EuclideanDistance::euclidean_distance(self.as_geometry_collection::<2>(), rhs) + } + Rect(XY) => EuclideanDistance::euclidean_distance(self.as_rect::<2>(), rhs), + _ => Err(GeoArrowError::IncorrectType("".into())), + } } } -/// Implementation that iterates over geo objects -macro_rules! iter_geo_impl_scalar { - ($first:ty, $second:ty) => { - impl<'a> EuclideanDistance<$second> for $first { - fn euclidean_distance(&self, other: &$second) -> Float64Array { - let mut output_array = Float64Builder::with_capacity(self.len()); - let other_geo = other.to_geo(); - - self.iter_geo().for_each(|maybe_geom| { - let output = maybe_geom.map(|geom| geom.euclidean_distance(&other_geo)); - output_array.append_option(output) - }); +pub trait EuclideanDistanceScalar<'a, G: GeometryTrait> { + fn euclidean_distance(&'a self, rhs: &'a G) -> Result; +} - output_array.finish() +macro_rules! scalar_impl { + ($array_type:ty) => { + impl<'a, G: GeometryTrait> EuclideanDistanceScalar<'a, G> for $array_type { + fn euclidean_distance(&'a self, rhs: &'a G) -> Result { + let right = geometry_to_geo(rhs); + self.try_unary_primitive(|left| { + Ok::<_, GeoArrowError>(left.to_geo().euclidean_distance(&right)) + }) } } }; } -// Implementations on PointArray -iter_geo_impl_scalar!(PointArray<2>, LineString<'a, 2>); -iter_geo_impl_scalar!(PointArray<2>, Polygon<'a, 2>); -iter_geo_impl_scalar!(PointArray<2>, MultiPoint<'a, 2>); -iter_geo_impl_scalar!(PointArray<2>, MultiLineString<'a, 2>); -iter_geo_impl_scalar!(PointArray<2>, MultiPolygon<'a, 2>); - -// Implementations on LineStringArray -iter_geo_impl_scalar!(LineStringArray<2>, Point<'a, 2>); -iter_geo_impl_scalar!(LineStringArray<2>, LineString<'a, 2>); -iter_geo_impl_scalar!(LineStringArray<2>, Polygon<'a, 2>); -// iter_geo_impl_scalar!(LineStringArray<2>, MultiPoint<'a, 2>); -// iter_geo_impl_scalar!(LineStringArray<2>, MultiLineString<'a, 2>); -// iter_geo_impl_scalar!(LineStringArray<2>, MultiPolygon<'a, 2>); - -// Implementations on PolygonArray -iter_geo_impl_scalar!(PolygonArray<2>, Point<'a, 2>); -iter_geo_impl_scalar!(PolygonArray<2>, LineString<'a, 2>); -iter_geo_impl_scalar!(PolygonArray<2>, Polygon<'a, 2>); -// iter_geo_impl_scalar!(PolygonArray<2>, MultiPoint<'a, 2>); -// iter_geo_impl_scalar!(PolygonArray<2>, MultiLineString<'a, 2>); -// iter_geo_impl_scalar!(PolygonArray<2>, MultiPolygon<'a, 2>); - -// Implementations on MultiPointArray -iter_geo_impl_scalar!(MultiPointArray<2>, Point<'a, 2>); -// iter_geo_impl_scalar!(MultiPointArray<2>, LineString<'a, 2>); -// iter_geo_impl_scalar!(MultiPointArray<2>, Polygon<'a, 2>); -// iter_geo_impl_scalar!(MultiPointArray<2>, MultiPoint<'a, 2>); -// iter_geo_impl_scalar!(MultiPointArray<2>, MultiLineString<'a, 2>); -// iter_geo_impl_scalar!(MultiPointArray<2>, MultiPolygon<'a, 2>); - -// Implementations on MultiLineStringArray -iter_geo_impl_scalar!(MultiLineStringArray<2>, Point<'a, 2>); -// iter_geo_impl_scalar!(MultiLineStringArray<2>, LineString<'a, 2>); -// iter_geo_impl_scalar!(MultiLineStringArray<2>, Polygon<'a, 2>); -// iter_geo_impl_scalar!(MultiLineStringArray<2>, MultiPoint<'a, 2>); -// iter_geo_impl_scalar!(MultiLineStringArray<2>, MultiLineString<'a, 2>); -// iter_geo_impl_scalar!(MultiLineStringArray<2>, MultiPolygon<'a, 2>); - -// Implementations on MultiPolygonArray -iter_geo_impl_scalar!(MultiPolygonArray<2>, Point<'a, 2>); -// iter_geo_impl_scalar!(MultiPolygonArray<2>, LineString<'a, 2>); -// iter_geo_impl_scalar!(MultiPolygonArray<2>, Polygon<'a, 2>); -// iter_geo_impl_scalar!(MultiPolygonArray<2>, MultiPoint<'a, 2>); -// iter_geo_impl_scalar!(MultiPolygonArray<2>, MultiLineString<'a, 2>); -// iter_geo_impl_scalar!(MultiPolygonArray<2>, MultiPolygon<'a, 2>); +scalar_impl!(PointArray<2>); +scalar_impl!(LineStringArray<2>); +scalar_impl!(PolygonArray<2>); +scalar_impl!(MultiPointArray<2>); +scalar_impl!(MultiLineStringArray<2>); +scalar_impl!(MultiPolygonArray<2>); +scalar_impl!(MixedGeometryArray<2>); +scalar_impl!(GeometryCollectionArray<2>); +scalar_impl!(RectArray<2>); + +impl<'a, G: GeometryTrait> EuclideanDistanceScalar<'a, G> for &dyn NativeArray { + fn euclidean_distance(&'a self, rhs: &'a G) -> Result { + use Dimension::*; + use NativeType::*; + + match self.data_type() { + Point(_, XY) => EuclideanDistanceScalar::euclidean_distance(self.as_point::<2>(), rhs), + LineString(_, XY) => { + EuclideanDistanceScalar::euclidean_distance(self.as_line_string::<2>(), rhs) + } + Polygon(_, XY) => { + EuclideanDistanceScalar::euclidean_distance(self.as_polygon::<2>(), rhs) + } + MultiPoint(_, XY) => { + EuclideanDistanceScalar::euclidean_distance(self.as_multi_point::<2>(), rhs) + } + MultiLineString(_, XY) => { + EuclideanDistanceScalar::euclidean_distance(self.as_multi_line_string::<2>(), rhs) + } + MultiPolygon(_, XY) => { + EuclideanDistanceScalar::euclidean_distance(self.as_multi_polygon::<2>(), rhs) + } + Mixed(_, XY) => EuclideanDistanceScalar::euclidean_distance(self.as_mixed::<2>(), rhs), + GeometryCollection(_, XY) => { + EuclideanDistanceScalar::euclidean_distance(self.as_geometry_collection::<2>(), rhs) + } + Rect(XY) => EuclideanDistanceScalar::euclidean_distance(self.as_rect::<2>(), rhs), + _ => Err(GeoArrowError::IncorrectType("".into())), + } + } +} From de83eca121542b04a03bc8e31ce3332bf287f73f Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 6 Oct 2024 12:06:13 -0400 Subject: [PATCH 3/4] simplify --- src/algorithm/geo/affine_ops.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/algorithm/geo/affine_ops.rs b/src/algorithm/geo/affine_ops.rs index e3863d8a..18d8ff4b 100644 --- a/src/algorithm/geo/affine_ops.rs +++ b/src/algorithm/geo/affine_ops.rs @@ -149,16 +149,6 @@ impl AffineOps<&AffineTransform> for &dyn NativeArray { } } -impl AffineOps<&AffineTransform> for ChunkedPointArray<2> { - type Output = Self; - - fn affine_transform(&self, transform: &AffineTransform) -> Self::Output { - self.map(|chunk| chunk.affine_transform(transform)) - .try_into() - .unwrap() - } -} - macro_rules! impl_chunked { ($struct_name:ty) => { impl AffineOps<&AffineTransform> for $struct_name { @@ -173,6 +163,7 @@ macro_rules! impl_chunked { }; } +impl_chunked!(ChunkedPointArray<2>); impl_chunked!(ChunkedLineStringArray<2>); impl_chunked!(ChunkedPolygonArray<2>); impl_chunked!(ChunkedMultiPointArray<2>); From b05bec0abbb53806f3ddc3cbc6cc018368c52b3c Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 7 Oct 2024 00:04:06 -0400 Subject: [PATCH 4/4] commit wip --- benches/nybb.rs | 8 +- src/algorithm/broadcasting/geometry.rs | 79 +------------------ src/algorithm/broadcasting/geometry_bak.rs | 78 +++++++++++++++++++ src/algorithm/broadcasting/mod.rs | 2 +- src/algorithm/geo/contains.rs | 89 +++++----------------- src/algorithm/geo/mod.rs | 2 +- 6 files changed, 107 insertions(+), 151 deletions(-) create mode 100644 src/algorithm/broadcasting/geometry_bak.rs diff --git a/benches/nybb.rs b/benches/nybb.rs index 8b8b4ed0..d6094d16 100644 --- a/benches/nybb.rs +++ b/benches/nybb.rs @@ -2,9 +2,9 @@ use std::fs::File; use arrow_ipc::reader::FileReader; use criterion::{criterion_group, criterion_main, Criterion}; -use geoarrow::algorithm::geo::EuclideanDistance; +use geoarrow::algorithm::geo::EuclideanDistanceScalar; use geoarrow::array::{MultiPolygonArray, PointArray}; -use geoarrow::trait_::ArrayAccessor; +use geoarrow::trait_::{ArrayAccessor, NativeGeometryAccessor}; fn load_nybb() -> MultiPolygonArray<2> { let file = File::open("fixtures/nybb.arrow").unwrap(); @@ -39,9 +39,9 @@ pub fn criterion_benchmark(c: &mut Criterion) { c.bench_function("euclidean distance to scalar point", |b| { b.iter(|| { let point = geo::Point::new(0.0f64, 0.0f64); - let point_array = PointArray::from(vec![point].as_slice()); + let point_array = PointArray::<2>::from(vec![point].as_slice()); - let _distances = array.euclidean_distance(&point_array.value(0)); + let _distances = array.euclidean_distance(&point_array.value_as_geometry(0)); }) }); } diff --git a/src/algorithm/broadcasting/geometry.rs b/src/algorithm/broadcasting/geometry.rs index 05891b3c..bbacd93a 100644 --- a/src/algorithm/broadcasting/geometry.rs +++ b/src/algorithm/broadcasting/geometry.rs @@ -1,78 +1,7 @@ -use crate::algorithm::broadcasting::linestring::BroadcastLineStringIter; -use crate::algorithm::broadcasting::multilinestring::BroadcastMultiLineStringIter; -use crate::algorithm::broadcasting::multipoint::BroadcastMultiPointIter; -use crate::algorithm::broadcasting::multipolygon::BroadcastMultiPolygonIter; -use crate::algorithm::broadcasting::point::BroadcastPointIter; -use crate::algorithm::broadcasting::polygon::BroadcastPolygonIter; -use crate::algorithm::broadcasting::{ - BroadcastableLineString, BroadcastableMultiLineString, BroadcastableMultiPoint, - BroadcastableMultiPolygon, BroadcastablePoint, BroadcastablePolygon, -}; -use crate::scalar::Geometry; -use arrow_array::OffsetSizeTrait; +use crate::trait_::NativeGeometryAccessor; -/// An enum over all broadcastable geometry types. -/// -/// [`IntoIterator`] is implemented for this, where it will iterate over the `Array` variant -/// normally but will iterate over the `Scalar` variant forever. #[derive(Debug)] -pub enum BroadcastableGeometry<'a, O: OffsetSizeTrait> { - Point(BroadcastablePoint<'a>), - LineString(BroadcastableLineString<'a, O>), - Polygon(BroadcastablePolygon<'a, O>), - MultiPoint(BroadcastableMultiPoint<'a, O>), - MultiLineString(BroadcastableMultiLineString<'a, O>), - MultiPolygon(BroadcastableMultiPolygon<'a, O>), -} - -pub enum BroadcastGeometryIter<'a, O: OffsetSizeTrait> { - Point(BroadcastPointIter<'a>), - LineString(BroadcastLineStringIter<'a, O>), - Polygon(BroadcastPolygonIter<'a, O>), - MultiPoint(BroadcastMultiPointIter<'a, O>), - MultiLineString(BroadcastMultiLineStringIter<'a, O>), - MultiPolygon(BroadcastMultiPolygonIter<'a, O>), -} - -impl<'a, O: OffsetSizeTrait> IntoIterator for &'a BroadcastableGeometry<'a, O> { - type Item = Option>; - type IntoIter = BroadcastGeometryIter<'a, O>; - - fn into_iter(self) -> Self::IntoIter { - match self { - BroadcastableGeometry::Point(p) => BroadcastGeometryIter::Point(p.into_iter()), - BroadcastableGeometry::LineString(p) => { - BroadcastGeometryIter::LineString(p.into_iter()) - } - BroadcastableGeometry::Polygon(p) => BroadcastGeometryIter::Polygon(p.into_iter()), - BroadcastableGeometry::MultiPoint(p) => { - BroadcastGeometryIter::MultiPoint(p.into_iter()) - } - BroadcastableGeometry::MultiLineString(p) => { - BroadcastGeometryIter::MultiLineString(p.into_iter()) - } - BroadcastableGeometry::MultiPolygon(p) => { - BroadcastGeometryIter::MultiPolygon(p.into_iter()) - } - } - } -} - -impl<'a, O: OffsetSizeTrait> Iterator for BroadcastGeometryIter<'a, O> { - type Item = Option>; - - fn next(&mut self) -> Option { - match self { - BroadcastGeometryIter::Point(p) => p.next().map(|g| g.map(Geometry::Point)), - BroadcastGeometryIter::LineString(p) => p.next().map(|g| g.map(Geometry::LineString)), - BroadcastGeometryIter::Polygon(p) => p.next().map(|g| g.map(Geometry::Polygon)), - BroadcastGeometryIter::MultiPoint(p) => p.next().map(|g| g.map(Geometry::MultiPoint)), - BroadcastGeometryIter::MultiLineString(p) => { - p.next().map(|g| g.map(Geometry::MultiLineString)) - } - BroadcastGeometryIter::MultiPolygon(p) => { - p.next().map(|g| g.map(Geometry::MultiPolygon)) - } - } - } +pub enum BroadcastableGeoGeometry<'a> { + Scalar(&'a geo::Geometry), + Array(&'a dyn NativeGeometryAccessor<'a, 2>), } diff --git a/src/algorithm/broadcasting/geometry_bak.rs b/src/algorithm/broadcasting/geometry_bak.rs new file mode 100644 index 00000000..05891b3c --- /dev/null +++ b/src/algorithm/broadcasting/geometry_bak.rs @@ -0,0 +1,78 @@ +use crate::algorithm::broadcasting::linestring::BroadcastLineStringIter; +use crate::algorithm::broadcasting::multilinestring::BroadcastMultiLineStringIter; +use crate::algorithm::broadcasting::multipoint::BroadcastMultiPointIter; +use crate::algorithm::broadcasting::multipolygon::BroadcastMultiPolygonIter; +use crate::algorithm::broadcasting::point::BroadcastPointIter; +use crate::algorithm::broadcasting::polygon::BroadcastPolygonIter; +use crate::algorithm::broadcasting::{ + BroadcastableLineString, BroadcastableMultiLineString, BroadcastableMultiPoint, + BroadcastableMultiPolygon, BroadcastablePoint, BroadcastablePolygon, +}; +use crate::scalar::Geometry; +use arrow_array::OffsetSizeTrait; + +/// An enum over all broadcastable geometry types. +/// +/// [`IntoIterator`] is implemented for this, where it will iterate over the `Array` variant +/// normally but will iterate over the `Scalar` variant forever. +#[derive(Debug)] +pub enum BroadcastableGeometry<'a, O: OffsetSizeTrait> { + Point(BroadcastablePoint<'a>), + LineString(BroadcastableLineString<'a, O>), + Polygon(BroadcastablePolygon<'a, O>), + MultiPoint(BroadcastableMultiPoint<'a, O>), + MultiLineString(BroadcastableMultiLineString<'a, O>), + MultiPolygon(BroadcastableMultiPolygon<'a, O>), +} + +pub enum BroadcastGeometryIter<'a, O: OffsetSizeTrait> { + Point(BroadcastPointIter<'a>), + LineString(BroadcastLineStringIter<'a, O>), + Polygon(BroadcastPolygonIter<'a, O>), + MultiPoint(BroadcastMultiPointIter<'a, O>), + MultiLineString(BroadcastMultiLineStringIter<'a, O>), + MultiPolygon(BroadcastMultiPolygonIter<'a, O>), +} + +impl<'a, O: OffsetSizeTrait> IntoIterator for &'a BroadcastableGeometry<'a, O> { + type Item = Option>; + type IntoIter = BroadcastGeometryIter<'a, O>; + + fn into_iter(self) -> Self::IntoIter { + match self { + BroadcastableGeometry::Point(p) => BroadcastGeometryIter::Point(p.into_iter()), + BroadcastableGeometry::LineString(p) => { + BroadcastGeometryIter::LineString(p.into_iter()) + } + BroadcastableGeometry::Polygon(p) => BroadcastGeometryIter::Polygon(p.into_iter()), + BroadcastableGeometry::MultiPoint(p) => { + BroadcastGeometryIter::MultiPoint(p.into_iter()) + } + BroadcastableGeometry::MultiLineString(p) => { + BroadcastGeometryIter::MultiLineString(p.into_iter()) + } + BroadcastableGeometry::MultiPolygon(p) => { + BroadcastGeometryIter::MultiPolygon(p.into_iter()) + } + } + } +} + +impl<'a, O: OffsetSizeTrait> Iterator for BroadcastGeometryIter<'a, O> { + type Item = Option>; + + fn next(&mut self) -> Option { + match self { + BroadcastGeometryIter::Point(p) => p.next().map(|g| g.map(Geometry::Point)), + BroadcastGeometryIter::LineString(p) => p.next().map(|g| g.map(Geometry::LineString)), + BroadcastGeometryIter::Polygon(p) => p.next().map(|g| g.map(Geometry::Polygon)), + BroadcastGeometryIter::MultiPoint(p) => p.next().map(|g| g.map(Geometry::MultiPoint)), + BroadcastGeometryIter::MultiLineString(p) => { + p.next().map(|g| g.map(Geometry::MultiLineString)) + } + BroadcastGeometryIter::MultiPolygon(p) => { + p.next().map(|g| g.map(Geometry::MultiPolygon)) + } + } + } +} diff --git a/src/algorithm/broadcasting/mod.rs b/src/algorithm/broadcasting/mod.rs index f55f4b8c..47411055 100644 --- a/src/algorithm/broadcasting/mod.rs +++ b/src/algorithm/broadcasting/mod.rs @@ -23,7 +23,7 @@ mod primitive; mod vec; -// pub use geometry::BroadcastableGeometry; +// pub use geometry::BroadcastableGeoGeometry; // pub use linestring::BroadcastableLineString; // pub use multilinestring::BroadcastableMultiLineString; // pub use multipoint::BroadcastableMultiPoint; diff --git a/src/algorithm/geo/contains.rs b/src/algorithm/geo/contains.rs index 2d8fb163..a21c6213 100644 --- a/src/algorithm/geo/contains.rs +++ b/src/algorithm/geo/contains.rs @@ -1,4 +1,5 @@ -use crate::algorithm::native::{Binary, Unary}; +use crate::algorithm::native::binary::try_binary_boolean_native_geometry; +use crate::algorithm::native::Unary; use crate::array::*; use crate::datatypes::{Dimension, NativeType}; use crate::error::GeoArrowError; @@ -10,7 +11,7 @@ use crate::io::geo::{ geometry_collection_to_geo, geometry_to_geo, line_string_to_geo, multi_line_string_to_geo, multi_point_to_geo, multi_polygon_to_geo, point_to_geo, polygon_to_geo, }; -use crate::trait_::{ArrayAccessor, NativeScalar}; +use crate::trait_::{ArrayAccessor, NativeGeometryAccessor, NativeScalar}; use crate::NativeArray; use arrow_array::builder::BooleanBuilder; use arrow_array::BooleanArray; @@ -50,31 +51,16 @@ use geo::Contains as _Contains; /// // Point in Polygon /// assert!(polygon.contains(&point!(x: 1., y: 1.))); /// ``` -pub trait Contains { - fn contains(&self, rhs: &Rhs) -> BooleanArray; -} - -// ┌────────────────────────────────┐ -// │ Implementations for RHS arrays │ -// └────────────────────────────────┘ - -// Note: this implementation is outside the macro because it is not generic over O -impl Contains for PointArray<2> { - fn contains(&self, rhs: &Self) -> BooleanArray { - self.try_binary_boolean(rhs, |left, right| { - Ok(left.to_geo().contains(&right.to_geo())) - }) - .unwrap() - } +pub trait Contains<'a, Rhs> { + fn contains(&'a self, rhs: &'a Rhs) -> BooleanArray; } -// Implementation that iterates over geo objects macro_rules! iter_geo_impl { - ($first:ty, $second:ty) => { - impl<'a> Contains<$second> for $first { - fn contains(&self, rhs: &$second) -> BooleanArray { - self.try_binary_boolean(rhs, |left, right| { - Ok(left.to_geo().contains(&right.to_geo())) + ($array_type:ty) => { + impl<'a, R: NativeGeometryAccessor<'a, 2>> Contains<'a, R> for $array_type { + fn contains(&'a self, rhs: &'a R) -> BooleanArray { + try_binary_boolean_native_geometry(self, rhs, |l, r| { + Ok(l.to_geo().contains(&r.to_geo())) }) .unwrap() } @@ -82,52 +68,15 @@ macro_rules! iter_geo_impl { }; } -// Implementations on PointArray -iter_geo_impl!(PointArray<2>, LineStringArray<2>); -iter_geo_impl!(PointArray<2>, PolygonArray<2>); -iter_geo_impl!(PointArray<2>, MultiPointArray<2>); -iter_geo_impl!(PointArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(PointArray<2>, MultiPolygonArray<2>); - -// Implementations on LineStringArray -iter_geo_impl!(LineStringArray<2>, PointArray<2>); -iter_geo_impl!(LineStringArray<2>, LineStringArray<2>); -iter_geo_impl!(LineStringArray<2>, PolygonArray<2>); -iter_geo_impl!(LineStringArray<2>, MultiPointArray<2>); -iter_geo_impl!(LineStringArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(LineStringArray<2>, MultiPolygonArray<2>); - -// Implementations on PolygonArray -iter_geo_impl!(PolygonArray<2>, PointArray<2>); -iter_geo_impl!(PolygonArray<2>, LineStringArray<2>); -iter_geo_impl!(PolygonArray<2>, PolygonArray<2>); -iter_geo_impl!(PolygonArray<2>, MultiPointArray<2>); -iter_geo_impl!(PolygonArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(PolygonArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiPointArray -iter_geo_impl!(MultiPointArray<2>, PointArray<2>); -iter_geo_impl!(MultiPointArray<2>, LineStringArray<2>); -iter_geo_impl!(MultiPointArray<2>, PolygonArray<2>); -iter_geo_impl!(MultiPointArray<2>, MultiPointArray<2>); -iter_geo_impl!(MultiPointArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(MultiPointArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiLineStringArray -iter_geo_impl!(MultiLineStringArray<2>, PointArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, LineStringArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, PolygonArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, MultiPointArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(MultiLineStringArray<2>, MultiPolygonArray<2>); - -// Implementations on MultiPolygonArray -iter_geo_impl!(MultiPolygonArray<2>, PointArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, LineStringArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, PolygonArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, MultiPointArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, MultiLineStringArray<2>); -iter_geo_impl!(MultiPolygonArray<2>, MultiPolygonArray<2>); +iter_geo_impl!(PointArray<2>); +iter_geo_impl!(LineStringArray<2>); +iter_geo_impl!(PolygonArray<2>); +iter_geo_impl!(MultiPointArray<2>); +iter_geo_impl!(MultiLineStringArray<2>); +iter_geo_impl!(MultiPolygonArray<2>); +iter_geo_impl!(MixedGeometryArray<2>); +iter_geo_impl!(GeometryCollectionArray<2>); +iter_geo_impl!(RectArray<2>); // ┌─────────────────────────────────┐ // │ Implementations for RHS scalars │ diff --git a/src/algorithm/geo/mod.rs b/src/algorithm/geo/mod.rs index 4d3ffb76..fe97347c 100644 --- a/src/algorithm/geo/mod.rs +++ b/src/algorithm/geo/mod.rs @@ -57,7 +57,7 @@ pub use euclidean_length::EuclideanLength; /// Calculate the minimum Euclidean distance between two `Geometries`. mod euclidean_distance; -pub use euclidean_distance::EuclideanDistance; +pub use euclidean_distance::{EuclideanDistance, EuclideanDistanceScalar}; mod frechet_distance; pub use frechet_distance::{FrechetDistance, FrechetDistanceLineString};