rapidsai · rapids-bot · Mar 18, 2023 · Feb 6, 2023 · Feb 7, 2023 · Feb 7, 2023
@@ -19,6 +19,8 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/types.hpp>
 
+#include <cuspatial/detail/utility/floating_point.cuh>
+
 namespace cuspatial {
 namespace detail {
 
@@ -32,6 +34,7 @@ inline __device__ bool is_point_in_polygon(T const x,
                                            cudf::column_device_view const& poly_points_y)
 {
   bool in_polygon     = false;
+  bool is_colinear    = false;
   uint32_t poly_begin = poly_offsets.element<uint32_t>(poly_idx);
   uint32_t poly_end   = poly_idx < poly_offsets.size() - 1
                           ? poly_offsets.element<uint32_t>(poly_idx + 1)
@@ -54,10 +57,26 @@ inline __device__ bool is_point_in_polygon(T const x,
       bool y_in_bounds     = y_between_ay_by || y_between_by_ay;  // is y in range [by, ay]
       T run                = x1 - x0;
       T rise               = y1 - y0;
-      T rise_to_point      = y - y0;
+
+      // The endpoint of the line segment is the same, and the segment degenerates to a point.
+      // This can happen in polygon vertices when the first and last vertex of the ring are
+      // the same. In this scenario, do not attempt ray casting on a degenerate point.
+      T constexpr zero = 0.0;
+      if (float_equal(run, zero) && float_equal(rise, zero)) continue;
+
+      T rise_to_point = y - y0;
+      T run_to_point  = x - x0;
+
+      is_colinear = float_equal(run * rise_to_point, run_to_point * rise);
+      if (is_colinear) { break; }
 
       if (y_in_bounds && x < (run / rise) * rise_to_point + x0) { in_polygon = not in_polygon; }
     }
+    // If points are on the polygon edge, they are not contained in the polygon.
+    if (is_colinear) {
+      in_polygon = false;
+      break;
+    }
   }
 
   return in_polygon;

@@ -2,14 +2,20 @@
 
 from abc import ABC, abstractmethod
 
+import cupy as cp
+
 import cudf
 
 from cuspatial.core._column.geocolumn import GeoColumn
-from cuspatial.core.binpreds.contains import contains_properly
+from cuspatial.core.binpreds.contains import (
+    byte_limited_contains_properly,
+    quadtree_contains_properly,
+)
 from cuspatial.utils.column_utils import (
     contains_only_linestrings,
     contains_only_multipoints,
     contains_only_polygons,
+    has_multipolygons,
     has_same_geometry,
 )
 
@@ -129,6 +135,14 @@ def __call__(self) -> cudf.Series:
 
 
 class ContainsProperlyBinpred(BinaryPredicate):
+    def __init__(self, lhs, rhs, align=True, allpairs=False):
+        super().__init__(lhs, rhs, align=align)
+        if allpairs:
+            self.allpairs = True
+            self.align = False
+        else:
+            self.allpairs = False
+
     def preprocess(self, lhs, rhs):
         """Preprocess the input GeoSeries to ensure that they are of the
         correct type for the predicate."""
@@ -154,11 +168,14 @@ def preprocess(self, lhs, rhs):
         point_indices = geom.point_indices()
         from cuspatial.core.geoseries import GeoSeries
 
-        final_rhs = GeoSeries(
-            GeoColumn._from_points_xy(xy_points._column)
-        ).points
+        final_rhs = GeoSeries(GeoColumn._from_points_xy(xy_points._column))
         return (lhs, final_rhs, point_indices)
 
+    def _should_use_quadtree(self):
+        return (
+            len(self.lhs) >= 32 or has_multipolygons(self.lhs) or self.allpairs
+        )
+
     def _op(self, lhs, points):
         """Compute the contains_properly relationship between two GeoSeries.
         A feature A contains another feature B if no points of B lie in the
@@ -168,23 +185,71 @@ def _op(self, lhs, points):
             raise TypeError(
                 "`.contains` can only be called with polygon series."
             )
+        if self._should_use_quadtree():
+            point_result = quadtree_contains_properly(
+                points,
+                lhs,
+            )
+        else:
+            point_result = byte_limited_contains_properly(points, lhs)
+        return point_result
 
-        # call pip on the three subtypes on the right:
-        point_result = contains_properly(
-            points.x,
-            points.y,
-            lhs.polygons.part_offset,
-            lhs.polygons.ring_offset,
-            lhs.polygons.x,
-            lhs.polygons.y,
+    def _postprocess_quadtree_result(self, point_indices, point_result):
+        # If there are more than 31 polygons, the point indices are
+        # returned as a dataframe with two columns: part_index and
+        # point_index.
+
+        # This complex block of code is to create a dataframe that
+        # contains the polygon index and the point index for each
+        # point in the polygon. Quadtree pip returns the _part_index_
+        # of the polygon, not the polygon index.
+        rings_to_parts = cp.array(self.lhs.polygons.part_offset)
+        part_sizes = rings_to_parts[1:] - rings_to_parts[:-1]
+        parts_map = cudf.Series(
+            cp.arange(len(part_sizes)), name="part_index"
+        ).repeat(part_sizes)
+        parts_df = parts_map.reset_index(drop=True).reset_index()
+        # Mapping of parts to polygons
+        parts_to_geoms = cp.array(self.lhs.polygons.geometry_offset)
+        geometry_sizes = parts_to_geoms[1:] - parts_to_geoms[:-1]
+        geometry_map = cudf.Series(
+            cp.arange(len(geometry_sizes)), name="polygon_index"
+        ).repeat(geometry_sizes)
+        geom_df = geometry_map.reset_index(drop=True)
+        geom_df.index.name = "part_index"
+        geom_df = geom_df.reset_index()
+
+        # Replace the part index with the polygon index
+        part_result = parts_df.merge(point_result, on="part_index")
+        # Replace the polygon index with the row index
+        result = geom_df.merge(part_result, on="part_index")
+        result = result[["polygon_index", "point_index"]]
+        result = result.drop_duplicates()
+        # Replace the polygon index with the original index
+        result["polygon_index"] = result["polygon_index"].replace(
+            cudf.Series(self.lhs.index, index=cp.arange(len(self.lhs.index)))
         )
-        return point_result
+        # Using allpairs for all requests with more than 31 polygons.
+        if not self.allpairs:
+            if len(result) == 0:
+                return cudf.Series([False] * len(self.lhs))
+            final_result = cudf.Series([False] * len(point_indices))
+            grouped = result.groupby("polygon_index").count() == len(
+                point_indices
+            )
+            final_result.loc[grouped.index] = True
+            return final_result
+        else:
+            return result
 
-    def postprocess(self, point_indices, point_result):
-        """Postprocess the output GeoSeries to ensure that they are of the
-        correct type for the predicate."""
-        result = cudf.DataFrame({"idx": point_indices, "pip": point_result})
-        df_result = result
+    def _postprocess_brute_force_result(self, point_indices, point_result):
+        # If there are 31 or fewer polygons in the input, the result
+        # is a dataframe with one row per point and one column per
+        # polygon.
+
+        # Result can be:
+        # A Dataframe of booleans with n_points rows and up to 31 columns.
+        result = point_result
         # Discrete math recombination
         if (
             contains_only_linestrings(self.rhs)
@@ -193,15 +258,45 @@ def postprocess(self, point_indices, point_result):
         ):
             # process for completed linestrings, polygons, and multipoints.
             # Not necessary for points.
+            result["idx"] = point_indices
             df_result = (
                 result.groupby("idx").sum().sort_index()
                 == result.groupby("idx").count().sort_index()
             )
-        point_result = cudf.Series(
-            df_result["pip"], index=cudf.RangeIndex(0, len(df_result))
-        )
-        point_result.name = None
-        return point_result
+            result = df_result
+
+        final_result = cudf.Series([False] * len(self.lhs))
+
+        if len(result.columns) > 1:
+            final_result[result.index] = cp.diag(result.values)
+        else:
+            final_result[result.index] = result[result.columns[0]]
+        final_result.name = None
+        return final_result
+
+    def postprocess(self, point_indices, point_result):
+        """Postprocess the output GeoSeries to ensure that they are of the
+        correct type for the predicate.
+
+        Postprocess for contains_properly has to handle multiple input and
+        output configurations.
+
+        The input can be a single polygon, a single multipolygon, or a
+        GeoSeries containing a mix of polygons and multipolygons.
+
+        The input to postprocess is `point_indices`, which can be either a
+        cudf.DataFrame with one row per point and one column per polygon or
+        a cudf.DataFrame containing the point index and the part index for
+        each point in the polygon.
+        """
+        if self._should_use_quadtree():
+            return self._postprocess_quadtree_result(
+                point_indices, point_result
+            )
+        else:
+            return self._postprocess_brute_force_result(
+                point_indices, point_result
+            )
 
 
 class OverlapsBinpred(ContainsProperlyBinpred):
@@ -211,15 +306,14 @@ def postprocess(self, point_indices, point_result):
         # TODO: Maybe change this to intersection
         if not has_same_geometry(self.lhs, self.rhs):
             return cudf.Series([False] * len(self.lhs))
-        result = cudf.DataFrame({"idx": point_indices, "pip": point_result})
-        df_result = result
-        partial_result = result.groupby("idx").sum()
-        df_result = (partial_result > 0) & (partial_result < len(point_result))
-        point_result = cudf.Series(
-            df_result["pip"], index=cudf.RangeIndex(0, len(df_result))
-        )
-        point_result.name = None
-        return point_result
+        point_result["point_index"] = point_indices
+        hits = point_result.groupby("point_index").sum()
+        size = point_result.groupby("point_index").count()
+        partial_overlap = hits != size
+        non_empty = size > 0
+        at_least_one_overlap = hits > 0
+        group_result = partial_overlap & non_empty & at_least_one_overlap
+        return group_result
 
 
 class IntersectsBinpred(ContainsProperlyBinpred):