Skip to content

Commit

Permalink
[SEDONA-647] Add ST_RemoveRepeatedPoints (#1557)
Browse files Browse the repository at this point in the history
* feat: add ST_RemoveRepeatedPoints

* fix: lint errors

* fix: snowflake typos

* change variable names, divide the function tests, add detailed examples docs, add more tests in spark
  • Loading branch information
furqaankhan committed Aug 23, 2024
1 parent 809b5a7 commit 63a98e4
Show file tree
Hide file tree
Showing 22 changed files with 726 additions and 0 deletions.
8 changes: 8 additions & 0 deletions common/src/main/java/org/apache/sedona/common/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,14 @@ public static Geometry removePoint(Geometry linestring, int position) {
return null;
}

public static Geometry removeRepeatedPoints(Geometry geom, double tolerance) {
return GeometryDuplicateCoordinateRemover.process(geom, tolerance);
}

public static Geometry removeRepeatedPoints(Geometry geom) {
return removeRepeatedPoints(geom, 0);
}

public static Geometry setPoint(Geometry linestring, int position, Geometry point) {
if (linestring instanceof LineString) {
List<Coordinate> coordinates = new ArrayList<>(Arrays.asList(linestring.getCoordinates()));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sedona.common.utils;

import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import org.locationtech.jts.geom.*;

public class GeometryDuplicateCoordinateRemover {

public static Coordinate[] removeDuplicates(Coordinate[] coords, int minPoints) {
Coordinate currentPoint;
int numPoint = coords.length;
int totalPointsOut = 1;

double distance = Double.MAX_VALUE;

if (numPoint <= minPoints) return new Coordinate[0];

Coordinate lastPoint = coords[0];
int writeIndex = 1;

for (int i = 1; i < numPoint; i++) {
boolean isLastPoint = (i == numPoint - 1);

currentPoint = coords[i];

if (numPoint + totalPointsOut > minPoints + i) {
if (TOLERANCE > 0.0) {
distance = currentPoint.distance(lastPoint);
if (!isLastPoint && distance <= TOLERANCE) {
continue;
}
} else {
if (currentPoint.equals2D(lastPoint)) {
continue;
}
}

if (isLastPoint && totalPointsOut > 1 && TOLERANCE > 0.0 && distance <= TOLERANCE) {
totalPointsOut--;
writeIndex--;
}
}

coords[writeIndex] = currentPoint;
totalPointsOut++;
writeIndex++;
lastPoint = currentPoint;
}
Coordinate[] newCoordinates = new Coordinate[totalPointsOut];
System.arraycopy(coords, 0, newCoordinates, 0, totalPointsOut);

return newCoordinates;
}

public static Coordinate[] removeDuplicatePointsMultiPoint(
Coordinate[] coords, boolean recursion) {
if (TOLERANCE == 0 || recursion) {
Set<Coordinate> uniqueCoords = new LinkedHashSet<>(Arrays.asList(coords));
return uniqueCoords.toArray(new Coordinate[0]);
}
Coordinate[] deduplicated =
Arrays.stream(removeDuplicatePointsMultiPoint(coords, true))
.sorted()
.toArray(Coordinate[]::new);

for (int i = 0; i < deduplicated.length; i++) {
for (int j = i + 1; j < deduplicated.length; j++) {
if (deduplicated[i] != null
&& deduplicated[j] != null
&& deduplicated[i].distance(deduplicated[j]) < TOLERANCE) {
deduplicated[j] = null;
} else {
break;
}
}
}

return Arrays.stream(deduplicated).filter(Objects::nonNull).toArray(Coordinate[]::new);
}

private static GeometryFactory FACTORY = null;

private static double TOLERANCE = 0;

public static Geometry process(Geometry geometry, double tolerance) {

TOLERANCE = tolerance;

if (geometry.isEmpty()) return geometry;

FACTORY = geometry.getFactory();

if (geometry.getGeometryType().equals(Geometry.TYPENAME_POINT)) return geometry;
if (geometry.getGeometryType().equals(Geometry.TYPENAME_MULTIPOINT))
return processMultiPoint((MultiPoint) geometry);
if (geometry.getGeometryType().equals(Geometry.TYPENAME_LINEARRING))
return processLinearRing((LinearRing) geometry);
if (geometry.getGeometryType().equals(Geometry.TYPENAME_LINESTRING))
return processLineString((LineString) geometry);
if (geometry.getGeometryType().equals(Geometry.TYPENAME_MULTILINESTRING))
return processMultiLineString((MultiLineString) geometry);
if (geometry.getGeometryType().equals(Geometry.TYPENAME_POLYGON))
return processPolygon((Polygon) geometry);
if (geometry.getGeometryType().equals(Geometry.TYPENAME_MULTIPOLYGON))
return processMultiPolygon((MultiPolygon) geometry);
if (geometry.getGeometryType().equals(Geometry.TYPENAME_GEOMETRYCOLLECTION))
return processGeometryCollection((GeometryCollection) geometry);

throw new IllegalArgumentException(
"Unknown Geometry subtype: " + geometry.getClass().getName());
}

private static MultiPoint processMultiPoint(MultiPoint geometry) {
Coordinate[] coords = geometry.getCoordinates();
return FACTORY.createMultiPointFromCoords(removeDuplicatePointsMultiPoint(coords, false));
}

private static LinearRing processLinearRing(LinearRing geometry) {
Coordinate[] coords = geometry.getCoordinates();
return FACTORY.createLinearRing(removeDuplicates(coords, 4));
}

private static LineString processLineString(LineString geometry) {
if (geometry.getNumPoints() <= 2) return geometry;

Coordinate[] coords = geometry.getCoordinates();
return FACTORY.createLineString(removeDuplicates(coords, 2));
}

private static MultiLineString processMultiLineString(MultiLineString geometry) {
LineString[] lineStrings = new LineString[geometry.getNumGeometries()];
for (int i = 0; i < lineStrings.length; i++) {
lineStrings[i] = processLineString((LineString) geometry.getGeometryN(i));
}
return FACTORY.createMultiLineString(lineStrings);
}

private static Polygon processPolygon(Polygon geometry) {
LinearRing shell = processLinearRing(geometry.getExteriorRing());

LinearRing[] holes = new LinearRing[geometry.getNumInteriorRing()];
for (int i = 0; i < holes.length; i++) {
holes[i] = processLinearRing(geometry.getInteriorRingN(i));
}
return FACTORY.createPolygon(shell, holes);
}

private static MultiPolygon processMultiPolygon(MultiPolygon geometry) {
Polygon[] polygons = new Polygon[geometry.getNumGeometries()];
for (int i = 0; i < polygons.length; i++) {
polygons[i] = processPolygon((Polygon) geometry.getGeometryN(i));
}
return FACTORY.createMultiPolygon(polygons);
}

private static GeometryCollection processGeometryCollection(GeometryCollection geometry) {
Geometry[] geometries = new Geometry[geometry.getNumGeometries()];
for (int i = 0; i < geometries.length; i++) {
geometries[i] = process(geometry.getGeometryN(i), TOLERANCE);
}
return FACTORY.createGeometryCollection(geometries);
}
}
133 changes: 133 additions & 0 deletions common/src/test/java/org/apache/sedona/common/FunctionsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1622,6 +1622,139 @@ public void numPointsUnsupported() throws Exception {
assertEquals(expected, e.getMessage());
}

@Test
public void removeRepeatedPointsMultiPoint() throws ParseException {
Geometry geom = Constructors.geomFromWKT("POINT (10 23)", 4321);
Geometry actualGeom = Functions.removeRepeatedPoints(geom);
String actual = Functions.asWKT(actualGeom);
String expected = "POINT (10 23)";
assertEquals(expected, actual);
int actualSRID = Functions.getSRID(actualGeom);
assertEquals(4321, actualSRID);

geom = Constructors.geomFromWKT("MULTIPOINT ((1 1), (4 4), (2 2), (3 3))", 1000);
actualGeom = Functions.removeRepeatedPoints(geom);
actual = Functions.asWKT(actualGeom);
expected = "MULTIPOINT ((1 1), (4 4), (2 2), (3 3))";
assertEquals(expected, actual);
actualSRID = Functions.getSRID(actualGeom);
assertEquals(1000, actualSRID);

geom =
Constructors.geomFromWKT("MULTIPOINT (20 20, 10 10, 30 30, 40 40, 20 20, 30 30, 40 40)", 0);
actual = Functions.asWKT(Functions.removeRepeatedPoints(geom, 20));
expected = "MULTIPOINT ((10 10), (30 30))";
assertEquals(expected, actual);

actual = Functions.asWKT(Functions.removeRepeatedPoints(geom));
expected = "MULTIPOINT ((20 20), (10 10), (30 30), (40 40))";
assertEquals(expected, actual);

geom = Constructors.geomFromWKT("MULTIPOINT ((1 1), (4 4), (2 2), (3 3), (3 3))", 0);
actual = Functions.asWKT(Functions.removeRepeatedPoints(geom, 2000));
expected = "MULTIPOINT ((1 1))";
assertEquals(expected, actual);
}

@Test
public void removeRepeatedPointsLineString() throws ParseException {
Geometry geom = Constructors.geomFromWKT("LINESTRING (0 0, 0 0, 1 1, 0 0, 1 1, 2 2)", 2000);
Geometry actualGeom = Functions.removeRepeatedPoints(geom);
String actual = Functions.asWKT(actualGeom);
String expected = "LINESTRING (0 0, 1 1, 0 0, 1 1, 2 2)";
assertEquals(expected, actual);
int actualSRID = Functions.getSRID(actualGeom);
assertEquals(2000, actualSRID);

geom = Constructors.geomFromWKT("LINESTRING (0 0, 0 0, 1 1, 5 5, 1 1, 2 2)", 0);
actual = Functions.asWKT(Functions.removeRepeatedPoints(geom, 2));
expected = "LINESTRING (0 0, 5 5, 2 2)";
assertEquals(expected, actual);

actual = Functions.asWKT(Functions.removeRepeatedPoints(geom, 6));
expected = "LINESTRING (0 0, 2 2)";
assertEquals(expected, actual);

geom =
Constructors.geomFromWKT("LINESTRING (20 20, 10 10, 30 30, 40 40, 20 20, 30 30, 40 40)", 0);
actual = Functions.asWKT(Functions.removeRepeatedPoints(geom, 20));
expected = "LINESTRING (20 20, 40 40, 20 20, 40 40)";
assertEquals(expected, actual);

geom =
Constructors.geomFromWKT("LINESTRING (10 10, 20 20, 20 20, 30 30, 30 30, 40 40, 40 40)", 0);
actual = Functions.asWKT(Functions.removeRepeatedPoints(geom, 10000));
expected = "LINESTRING (10 10, 40 40)";
assertEquals(expected, actual);

geom =
Constructors.geomFromWKT(
"MULTILINESTRING ((10 10, 20 20, 20 20, 30 30), (40 40, 50 50, 50 50, 60 60))", 3000);
actualGeom = Functions.removeRepeatedPoints(geom);
actual = Functions.asWKT(actualGeom);
expected = "MULTILINESTRING ((10 10, 20 20, 30 30), (40 40, 50 50, 60 60))";
assertEquals(expected, actual);
actualSRID = Functions.getSRID(actualGeom);
assertEquals(3000, actualSRID);
}

@Test
public void removeRepeatedPointsPolygon() throws ParseException {
Geometry geom =
Constructors.geomFromWKT(
"POLYGON ((10 10, 20 20, 20 20, 30 30, 30 30, 40 40, 40 40, 10 10))", 4000);
Geometry actualGeom = Functions.removeRepeatedPoints(geom);
String actual = Functions.asWKT(actualGeom);
String expected = "POLYGON ((10 10, 20 20, 30 30, 40 40, 10 10))";
assertEquals(expected, actual);
int actualSRID = Functions.getSRID(actualGeom);
assertEquals(4000, actualSRID);

geom =
Constructors.geomFromWKT(
"POLYGON ((10 10, 20 20, 20 20, 30 30, 30 30, 40 40, 40 40, 10 10),(15 15, 25 25, 25 25, 35 35, 35 35, 15 15),(25 25, 35 35, 35 35, 45 45, 45 45, 25 25))",
0);
actual = Functions.asWKT(Functions.removeRepeatedPoints(geom, 1000));
expected =
"POLYGON ((10 10, 40 40, 40 40, 10 10), (15 15, 35 35, 35 35, 15 15), (25 25, 45 45, 45 45, 25 25))";
assertEquals(expected, actual);

geom =
Constructors.geomFromWKT(
"MULTIPOLYGON (((10 10, 20 20, 20 20, 30 30, 30 30, 40 40, 40 40, 10 10)),((50 50, 60 60, 60 60, 70 70, 70 70, 80 80, 80 80, 50 50)))",
5000);
actualGeom = Functions.removeRepeatedPoints(geom, 1000);
actual = Functions.asWKT(actualGeom);
expected = "MULTIPOLYGON (((10 10, 40 40, 40 40, 10 10)), ((50 50, 80 80, 80 80, 50 50)))";
assertEquals(expected, actual);
actualSRID = Functions.getSRID(actualGeom);
assertEquals(5000, actualSRID);

geom =
Constructors.geomFromWKT(
"MULTIPOLYGON (((10 10, 20 20, 20 20, 30 30, 30 30, 40 40, 40 40, 10 10),(15 15, 25 25, 25 25, 35 35, 35 35, 15 15),(25 25, 35 35, 35 35, 45 45, 45 45, 25 25)),((50 50, 60 60, 60 60, 70 70, 70 70, 80 80, 80 80, 50 50),(55 55, 65 65, 65 65, 75 75, 75 75, 55 55),(65 65, 75 75, 75 75, 85 85, 85 85, 65 65)))",
0);
actual = Functions.asWKT(Functions.removeRepeatedPoints(geom, 1000));
expected =
"MULTIPOLYGON (((10 10, 40 40, 40 40, 10 10), (15 15, 35 35, 35 35, 15 15), (25 25, 45 45, 45 45, 25 25)), ((50 50, 80 80, 80 80, 50 50), (55 55, 75 75, 75 75, 55 55), (65 65, 85 85, 85 85, 65 65)))";
assertEquals(expected, actual);
}

@Test
public void removeRepeatedPointsGeometryCollection() throws ParseException {
Geometry geom =
Constructors.geomFromWKT(
"GEOMETRYCOLLECTION (POINT (10 10),LINESTRING (20 20, 20 20, 30 30, 30 30),POLYGON ((40 40, 50 50, 50 50, 60 60, 60 60, 70 70, 70 70, 40 40)), MULTIPOINT ((80 80), (90 90), (90 90), (100 100)))",
6000);
Geometry actualGeom = Functions.removeRepeatedPoints(geom);
String actual = Functions.asWKT(actualGeom);
String expected =
"GEOMETRYCOLLECTION (POINT (10 10), LINESTRING (20 20, 30 30), POLYGON ((40 40, 50 50, 60 60, 70 70, 40 40)), MULTIPOINT ((80 80), (90 90), (100 100)))";
assertEquals(expected, actual);
int actualSRID = Functions.getSRID(actualGeom);
assertEquals(6000, actualSRID);
}

@Test
public void simplifyVW() throws ParseException {
Geometry geom = Constructors.geomFromEWKT("LINESTRING(5 2, 3 8, 6 20, 7 25, 10 10)");
Expand Down
Loading

0 comments on commit 63a98e4

Please sign in to comment.