From 2df38554f9d47d3bb278c95711b4213de38ec734 Mon Sep 17 00:00:00 2001 From: panguixin Date: Tue, 6 Aug 2024 10:30:38 +0800 Subject: [PATCH] Support common format geo point (#2801) (#2896) --------- Signed-off-by: panguixin (cherry picked from commit 82ef68e2b25c7c10740e74968bbe960c000c1cee) --- .../sql/legacy/SQLIntegTestCase.java | 8 +- .../org/opensearch/sql/legacy/TestUtils.java | 5 ++ .../opensearch/sql/legacy/TestsConstants.java | 1 + .../opensearch/sql/sql/GeopointFormatsIT.java | 60 +++++++++++++ integ-test/src/test/resources/geopoints.json | 12 +++ .../geopoint_index_mapping.json | 9 ++ .../data/utils/OpenSearchJsonContent.java | 50 ++++------- .../value/OpenSearchExprValueFactory.java | 59 ++++++++++-- .../data/utils/OpenSearchJsonContentTest.java | 31 +++++++ .../value/OpenSearchExprValueFactoryTest.java | 89 +++++++++++++------ 10 files changed, 256 insertions(+), 68 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/sql/GeopointFormatsIT.java create mode 100644 integ-test/src/test/resources/geopoints.json create mode 100644 integ-test/src/test/resources/indexDefinitions/geopoint_index_mapping.json create mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContentTest.java diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index d86b7c4c55..f30864d4b1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -20,6 +20,7 @@ import static org.opensearch.sql.legacy.TestUtils.getDogs3IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getEmployeeNestedTypeIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getJoinTypeIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getLocationIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getMappingFile; @@ -730,7 +731,12 @@ public enum Index { TestsConstants.TEST_INDEX_NESTED_WITH_NULLS, "multi_nested", getNestedTypeIndexMapping(), - "src/test/resources/nested_with_nulls.json"); + "src/test/resources/nested_with_nulls.json"), + GEOPOINTS( + TestsConstants.TEST_INDEX_GEOPOINT, + "dates", + getGeopointIndexMapping(), + "src/test/resources/geopoints.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index e02de782b4..a0f2bab716 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -244,6 +244,11 @@ public static String getDataTypeNonnumericIndexMapping() { return getMappingFile(mappingFile); } + public static String getGeopointIndexMapping() { + String mappingFile = "geopoint_index_mapping.json"; + return getMappingFile(mappingFile); + } + public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 29bc9813fa..73838feb4f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -57,6 +57,7 @@ public class TestsConstants { public static final String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard"; public static final String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; + public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; public static final String DATASOURCES = ".ql-datasources"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/GeopointFormatsIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/GeopointFormatsIT.java new file mode 100644 index 0000000000..f25eeec241 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/GeopointFormatsIT.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import java.util.Map; +import org.apache.commons.lang3.tuple.Pair; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class GeopointFormatsIT extends SQLIntegTestCase { + + @Override + public void init() throws Exception { + loadIndex(Index.GEOPOINTS); + } + + @Test + public void testReadingGeopoints() throws IOException { + String query = String.format("SELECT point FROM %s LIMIT 5", Index.GEOPOINTS.getName()); + JSONObject result = executeJdbcRequest(query); + verifySchema(result, schema("point", null, "geo_point")); + verifyDataRows( + result, + rows(Map.of("lon", 74, "lat", 40.71)), + rows(Map.of("lon", 74, "lat", 40.71)), + rows(Map.of("lon", 74, "lat", 40.71)), + rows(Map.of("lon", 74, "lat", 40.71)), + rows(Map.of("lon", 74, "lat", 40.71))); + } + + private static final double TOLERANCE = 1E-5; + + public void testReadingGeoHash() throws IOException { + String query = String.format("SELECT point FROM %s WHERE _id='6'", Index.GEOPOINTS.getName()); + JSONObject result = executeJdbcRequest(query); + verifySchema(result, schema("point", null, "geo_point")); + Pair point = getGeoValue(result); + assertEquals(40.71, point.getLeft(), TOLERANCE); + assertEquals(74, point.getRight(), TOLERANCE); + } + + private Pair getGeoValue(JSONObject result) { + JSONObject geoRaw = + (JSONObject) ((JSONArray) ((JSONArray) result.get("datarows")).get(0)).get(0); + double lat = geoRaw.getDouble("lat"); + double lon = geoRaw.getDouble("lon"); + return Pair.of(lat, lon); + } +} diff --git a/integ-test/src/test/resources/geopoints.json b/integ-test/src/test/resources/geopoints.json new file mode 100644 index 0000000000..95900fe811 --- /dev/null +++ b/integ-test/src/test/resources/geopoints.json @@ -0,0 +1,12 @@ +{"index": {"_id": "1"}} +{"point": {"lat": 40.71, "lon": 74.00}} +{"index": {"_id": "2"}} +{"point": "40.71,74.00"} +{"index": {"_id": "3"}} +{"point": [74.00, 40.71]} +{"index": {"_id": "4"}} +{"point": "POINT (74.00 40.71)"} +{"index": {"_id": "5"}} +{"point": {"type": "Point", "coordinates": [74.00, 40.71]}} +{"index": {"_id": "6"}} +{"point": "txhxegj0uyp3"} diff --git a/integ-test/src/test/resources/indexDefinitions/geopoint_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/geopoint_index_mapping.json new file mode 100644 index 0000000000..61340530d8 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/geopoint_index_mapping.json @@ -0,0 +1,9 @@ +{ + "mappings": { + "properties": { + "point": { + "type": "geo_point" + } + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java index f79c8a708b..4446c1f979 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java @@ -7,11 +7,19 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.Iterators; +import java.io.IOException; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.OpenSearchParseException; +import org.opensearch.common.geo.GeoPoint; +import org.opensearch.common.geo.GeoUtils; +import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; /** The Implementation of Content to represent {@link JsonNode}. */ @RequiredArgsConstructor @@ -122,25 +130,17 @@ public Object objectValue() { @Override public Pair geoValue() { final JsonNode value = value(); - if (value.has("lat") && value.has("lon")) { - Double lat = 0d; - Double lon = 0d; - try { - lat = extractDoubleValue(value.get("lat")); - } catch (Exception exception) { - throw new IllegalStateException( - "latitude must be number value, but got value: " + value.get("lat")); - } - try { - lon = extractDoubleValue(value.get("lon")); - } catch (Exception exception) { - throw new IllegalStateException( - "longitude must be number value, but got value: " + value.get("lon")); - } - return Pair.of(lat, lon); - } else { - throw new IllegalStateException( - "geo point must in format of {\"lat\": number, \"lon\": " + "number}"); + try (XContentParser parser = + new JsonXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + value.traverse())) { + parser.nextToken(); + GeoPoint point = new GeoPoint(); + GeoUtils.parseGeoPoint(parser, point, true); + return Pair.of(point.getLat(), point.getLon()); + } catch (IOException ex) { + throw new OpenSearchParseException("error parsing geo point", ex); } } @@ -148,16 +148,4 @@ public Pair geoValue() { private JsonNode value() { return value; } - - /** Get doubleValue from JsonNode if possible. */ - private Double extractDoubleValue(JsonNode node) { - if (node.isTextual()) { - return Double.valueOf(node.textValue()); - } - if (node.isNumber()) { - return node.doubleValue(); - } else { - throw new IllegalStateException("node must be a number"); - } - } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index b03220b29e..e736663f60 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -40,6 +40,7 @@ import java.util.function.BiFunction; import lombok.Getter; import lombok.Setter; +import org.opensearch.OpenSearchParseException; import org.opensearch.common.time.DateFormatter; import org.opensearch.common.time.DateFormatters; import org.opensearch.common.time.FormatNames; @@ -63,7 +64,6 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchBinaryType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; -import org.opensearch.sql.opensearch.data.type.OpenSearchGeoPointType; import org.opensearch.sql.opensearch.data.type.OpenSearchIpType; import org.opensearch.sql.opensearch.data.utils.Content; import org.opensearch.sql.opensearch.data.utils.ObjectContent; @@ -137,10 +137,6 @@ public void extendTypeMapping(Map typeMapping) { .put( OpenSearchDataType.of(OpenSearchDataType.MappingType.Ip), (c, dt) -> new OpenSearchExprIpValue(c.stringValue())) - .put( - OpenSearchDataType.of(OpenSearchDataType.MappingType.GeoPoint), - (c, dt) -> - new OpenSearchExprGeoPointValue(c.geoValue().getLeft(), c.geoValue().getRight())) .put( OpenSearchDataType.of(OpenSearchDataType.MappingType.Binary), (c, dt) -> new OpenSearchExprBinaryValue(c.stringValue())) @@ -189,8 +185,11 @@ private ExprValue parse( return ExprNullValue.of(); } - ExprType type = fieldType.get(); - if (type.equals(OpenSearchDataType.of(OpenSearchDataType.MappingType.Nested)) + final ExprType type = fieldType.get(); + + if (type.equals(OpenSearchDataType.of(OpenSearchDataType.MappingType.GeoPoint))) { + return parseGeoPoint(content, supportArrays); + } else if (type.equals(OpenSearchDataType.of(OpenSearchDataType.MappingType.Nested)) || content.isArray()) { return parseArray(content, field, type, supportArrays); } else if (type.equals(OpenSearchDataType.of(OpenSearchDataType.MappingType.Object)) @@ -357,6 +356,49 @@ private ExprValue parseArray( return new ExprCollectionValue(result); } + /** + * Parse geo point content. + * + * @param content Content to parse. + * @param supportArrays Parsing the whole array or not + * @return Geo point value parsed from content. + */ + private ExprValue parseGeoPoint(Content content, boolean supportArrays) { + // there is only one point in doc. + if (content.isArray() == false) { + final var pair = content.geoValue(); + return new OpenSearchExprGeoPointValue(pair.getLeft(), pair.getRight()); + } + + var elements = content.array(); + var first = elements.next(); + // an array in the [longitude, latitude] format. + if (first.isNumber()) { + double lon = first.doubleValue(); + var second = elements.next(); + if (second.isNumber() == false) { + throw new OpenSearchParseException("lat must be a number, got " + second.objectValue()); + } + return new OpenSearchExprGeoPointValue(second.doubleValue(), lon); + } + + // there are multi points in doc + var pair = first.geoValue(); + var firstPoint = new OpenSearchExprGeoPointValue(pair.getLeft(), pair.getRight()); + if (supportArrays) { + List result = new ArrayList<>(); + result.add(firstPoint); + elements.forEachRemaining( + e -> { + var p = e.geoValue(); + result.add(new OpenSearchExprGeoPointValue(p.getLeft(), p.getRight())); + }); + return new ExprCollectionValue(result); + } else { + return firstPoint; + } + } + /** * Parse inner array value. Can be object type and recurse continues. * @@ -370,8 +412,7 @@ private ExprValue parseInnerArrayValue( Content content, String prefix, ExprType type, boolean supportArrays) { if (type instanceof OpenSearchIpType || type instanceof OpenSearchBinaryType - || type instanceof OpenSearchDateType - || type instanceof OpenSearchGeoPointType) { + || type instanceof OpenSearchDateType) { return parse(content, prefix, Optional.of(type), supportArrays); } else if (content.isString()) { return parse(content, prefix, Optional.of(OpenSearchDataType.of(STRING)), supportArrays); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContentTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContentTest.java new file mode 100644 index 0000000000..c2cf0328bd --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContentTest.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.data.utils; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.junit.jupiter.api.Test; +import org.opensearch.OpenSearchParseException; + +public class OpenSearchJsonContentTest { + @Test + public void testGetValueWithIOException() throws IOException { + JsonNode jsonNode = mock(JsonNode.class); + JsonParser jsonParser = mock(JsonParser.class); + when(jsonNode.traverse()).thenReturn(jsonParser); + when(jsonParser.nextToken()).thenThrow(new IOException()); + OpenSearchJsonContent content = new OpenSearchJsonContent(jsonNode); + OpenSearchParseException exception = + assertThrows(OpenSearchParseException.class, content::geoValue); + assertTrue(exception.getMessage().contains("error parsing geo point")); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 1e913dfde2..d1bc3c500b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -48,6 +48,8 @@ import lombok.EqualsAndHashCode; import lombok.ToString; import org.junit.jupiter.api.Test; +import org.opensearch.OpenSearchParseException; +import org.opensearch.geometry.utils.Geohash; import org.opensearch.sql.data.model.ExprCollectionValue; import org.opensearch.sql.data.model.ExprDateValue; import org.opensearch.sql.data.model.ExprDatetimeValue; @@ -614,6 +616,18 @@ public void constructArrayOfGeoPoints() { .get("geoV")); } + @Test + public void constructArrayOfGeoPointsReturnsFirstIndex() { + assertEquals( + new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), + tupleValue( + "{\"geoV\":[" + + "{\"lat\":42.60355556,\"lon\":-97.25263889}," + + "{\"lat\":-33.6123556,\"lon\":66.287449}" + + "]}") + .get("geoV")); + } + @Test public void constructArrayOfIPsReturnsFirstIndex() { assertEquals( @@ -688,14 +702,50 @@ public void constructIP() { tupleValue("{\"ipV\":\"192.168.0.1\"}").get("ipV")); } + private static final double TOLERANCE = 1E-5; + @Test public void constructGeoPoint() { + final double lat = 42.60355556; + final double lon = -97.25263889; + final var expectedGeoPointValue = new OpenSearchExprGeoPointValue(lat, lon); + // An object with a latitude and longitude. assertEquals( - new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), - tupleValue("{\"geoV\":{\"lat\":42.60355556,\"lon\":-97.25263889}}").get("geoV")); + expectedGeoPointValue, + tupleValue(String.format("{\"geoV\":{\"lat\":%.8f,\"lon\":%.8f}}", lat, lon)).get("geoV")); + + // A string in the “latitude,longitude” format. assertEquals( - new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), - tupleValue("{\"geoV\":{\"lat\":\"42.60355556\",\"lon\":\"-97.25263889\"}}").get("geoV")); + expectedGeoPointValue, + tupleValue(String.format("{\"geoV\":\"%.8f,%.8f\"}", lat, lon)).get("geoV")); + + // A geohash. + var point = + (OpenSearchExprGeoPointValue.GeoPoint) + tupleValue(String.format("{\"geoV\":\"%s\"}", Geohash.stringEncode(lon, lat))) + .get("geoV") + .value(); + assertEquals(lat, point.getLat(), TOLERANCE); + assertEquals(lon, point.getLon(), TOLERANCE); + + // An array in the [longitude, latitude] format. + assertEquals( + expectedGeoPointValue, + tupleValue(String.format("{\"geoV\":[%.8f, %.8f]}", lon, lat)).get("geoV")); + + // A Well-Known Text POINT in the “POINT(longitude latitude)” format. + assertEquals( + expectedGeoPointValue, + tupleValue(String.format("{\"geoV\":\"POINT (%.8f %.8f)\"}", lon, lat)).get("geoV")); + + // GeoJSON format, where the coordinates are in the [longitude, latitude] format + assertEquals( + expectedGeoPointValue, + tupleValue( + String.format( + "{\"geoV\":{\"type\":\"Point\",\"coordinates\":[%.8f,%.8f]}}", lon, lat)) + .get("geoV")); + assertEquals( new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), constructFromObject("geoV", "42.60355556,-97.25263889")); @@ -703,38 +753,23 @@ public void constructGeoPoint() { @Test public void constructGeoPointFromUnsupportedFormatShouldThrowException() { - IllegalStateException exception = + OpenSearchParseException exception = assertThrows( - IllegalStateException.class, - () -> tupleValue("{\"geoV\":[42.60355556,-97.25263889]}").get("geoV")); - assertEquals( - "geo point must in format of {\"lat\": number, \"lon\": number}", exception.getMessage()); + OpenSearchParseException.class, + () -> tupleValue("{\"geoV\": [42.60355556, false]}").get("geoV")); + assertEquals("lat must be a number, got false", exception.getMessage()); exception = assertThrows( - IllegalStateException.class, + OpenSearchParseException.class, () -> tupleValue("{\"geoV\":{\"lon\":-97.25263889}}").get("geoV")); - assertEquals( - "geo point must in format of {\"lat\": number, \"lon\": number}", exception.getMessage()); - - exception = - assertThrows( - IllegalStateException.class, - () -> tupleValue("{\"geoV\":{\"lat\":-97.25263889}}").get("geoV")); - assertEquals( - "geo point must in format of {\"lat\": number, \"lon\": number}", exception.getMessage()); + assertEquals("field [lat] missing", exception.getMessage()); exception = assertThrows( - IllegalStateException.class, + OpenSearchParseException.class, () -> tupleValue("{\"geoV\":{\"lat\":true,\"lon\":-97.25263889}}").get("geoV")); - assertEquals("latitude must be number value, but got value: true", exception.getMessage()); - - exception = - assertThrows( - IllegalStateException.class, - () -> tupleValue("{\"geoV\":{\"lat\":42.60355556,\"lon\":false}}").get("geoV")); - assertEquals("longitude must be number value, but got value: false", exception.getMessage()); + assertEquals("lat must be a number", exception.getMessage()); } @Test