From 8e7b89514fee3c855b344126c4a3d0c9ac26c13c Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 6 Apr 2018 14:33:30 -0400 Subject: [PATCH 1/2] Fix overflow error in parsing of long geohashes Fixes a possible overflow error that geohashes longer than 12 character can cause during parsing. --- .../common/geo/GeoHashUtils.java | 19 ++++++++++++++----- .../common/geo/GeoHashTests.java | 16 ++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java b/server/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java index acfb8970e684c..d2ca936740e27 100644 --- a/server/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java +++ b/server/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java @@ -72,15 +72,19 @@ public static final long longEncode(final double lon, final double lat, final in /** * Encode from geohash string to the geohash based long format (lon/lat interleaved, 4 least significant bits = level) */ - public static final long longEncode(final String hash) { - int level = hash.length()-1; + private static long longEncode(final String hash, int length) { + int level = length - 1; long b; long l = 0L; for(char c : hash.toCharArray()) { b = (long)(BASE_32_STRING.indexOf(c)); l |= (b<<(level--*5)); + if (level < 0) { + // We cannot handle more than 12 levels + break; + } } - return (l<<4)|hash.length(); + return (l << 4) | length; } /** @@ -173,6 +177,10 @@ public static final long mortonEncode(final String hash) { for(char c : hash.toCharArray()) { b = (long)(BASE_32_STRING.indexOf(c)); l |= (b<<((level--*5) + MORTON_OFFSET)); + if (level < 0) { + // We cannot handle more than 12 levels + break; + } } return BitUtil.flipFlop(l); } @@ -200,13 +208,14 @@ private static char encode(int x, int y) { public static Rectangle bbox(final String geohash) { // bottom left is the coordinate GeoPoint bottomLeft = GeoPoint.fromGeohash(geohash); - long ghLong = longEncode(geohash); + int len = Math.min(12, geohash.length()); + long ghLong = longEncode(geohash, len); // shift away the level ghLong >>>= 4; // deinterleave and add 1 to lat and lon to get topRight long lat = BitUtil.deinterleave(ghLong >>> 1) + 1; long lon = BitUtil.deinterleave(ghLong) + 1; - GeoPoint topRight = GeoPoint.fromGeohash(BitUtil.interleave((int)lon, (int)lat) << 4 | geohash.length()); + GeoPoint topRight = GeoPoint.fromGeohash(BitUtil.interleave((int)lon, (int)lat) << 4 | len); return new Rectangle(bottomLeft.lat(), topRight.lat(), bottomLeft.lon(), topRight.lon()); } diff --git a/server/src/test/java/org/elasticsearch/common/geo/GeoHashTests.java b/server/src/test/java/org/elasticsearch/common/geo/GeoHashTests.java index e4856fd01136b..2726380b7e3bc 100644 --- a/server/src/test/java/org/elasticsearch/common/geo/GeoHashTests.java +++ b/server/src/test/java/org/elasticsearch/common/geo/GeoHashTests.java @@ -82,4 +82,20 @@ public void testGeohashExtremes() { assertEquals("xbpbpbpbpbpb", GeoHashUtils.stringEncode(180, 0)); assertEquals("zzzzzzzzzzzz", GeoHashUtils.stringEncode(180, 90)); } + + public void testLongGeohashes() { + for (int i = 0; i < 100000; i++) { + String geohash = randomGeohash(12, 12); + GeoPoint expected = GeoPoint.fromGeohash(geohash); + // Adding some random geohash characters at the end + String extendedGeohash = geohash + randomGeohash(1, 10); + GeoPoint actual = GeoPoint.fromGeohash(extendedGeohash); + assertEquals("Additional data points above 12 should be ignored [" + extendedGeohash + "]" , expected, actual); + + Rectangle expectedBbox = GeoHashUtils.bbox(geohash); + Rectangle actualBbox = GeoHashUtils.bbox(extendedGeohash); + assertEquals("Additional data points above 12 should be ignored [" + extendedGeohash + "]" , expectedBbox, actualBbox); + + } + } } From 474eaa6e2206e39f6620fdc8c8a9214c5730d6cc Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 10 Apr 2018 09:12:23 -0400 Subject: [PATCH 2/2] Add a note about geohash precision limitations --- docs/reference/mapping/types/geo-point.asciidoc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/reference/mapping/types/geo-point.asciidoc b/docs/reference/mapping/types/geo-point.asciidoc index ae81773e6a0a2..57faef2dbd7db 100644 --- a/docs/reference/mapping/types/geo-point.asciidoc +++ b/docs/reference/mapping/types/geo-point.asciidoc @@ -92,6 +92,16 @@ format was changed early on to conform to the format used by GeoJSON. ================================================== +[NOTE] +A point can be expressed as a http://en.wikipedia.org/wiki/Geohash[geohash]. +Geohashes are https://en.wikipedia.org/wiki/Base32[base32] encoded strings of +the bits of the latitude and longitude interleaved. Each character in a geohash +adds additional 5 bits to the precision. So the longer the hash, the more +precise it is. For the indexing purposed geohashs are translated into +latitude-longitude pairs. During this process only first 12 characters are +used, so specifying more than 12 characters in a geohash doesn't increase the +precision. The 12 characters provide 60 bits, which should reduce a possible +error to less than 2cm. [[geo-point-params]] ==== Parameters for `geo_point` fields