From aaa17faf9f3985608af0c71414218ffe879e12ec Mon Sep 17 00:00:00 2001 From: Brian Sperlongano Date: Sat, 18 Jun 2022 21:23:15 -0400 Subject: [PATCH 1/5] Allow more than 256 attributes per layer --- .../planetiler/collection/FeatureGroup.java | 8 ++-- .../planetiler/util/CommonStringEncoder.java | 46 ++++++++++++++++--- .../planetiler/FeatureCollectorTest.java | 29 ++++++++++++ .../util/CommonStringEncoderTest.java | 18 ++++---- 4 files changed, 82 insertions(+), 19 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java index c152f2d822..045418ad44 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java @@ -190,7 +190,7 @@ public void close() throws IOException { private long encodeKey(RenderedFeature feature) { var vectorTileFeature = feature.vectorTileFeature(); - byte encodedLayer = commonStrings.encode(vectorTileFeature.layer()); + byte encodedLayer = commonStrings.encodeByte(vectorTileFeature.layer()); return encodeKey( feature.tile().encoded(), encodedLayer, @@ -214,7 +214,7 @@ private byte[] encodeValue(VectorTile.Feature vectorTileFeature, RenderedFeature packer.packMapHeader((int) attrs.values().stream().filter(Objects::nonNull).count()); for (Map.Entry entry : attrs.entrySet()) { if (entry.getValue() != null) { - packer.packByte(commonStrings.encode(entry.getKey())); + packer.packInt(commonStrings.encodeInt(entry.getKey())); Object value = entry.getValue(); if (value instanceof String string) { packer.packValue(ValueFactory.newString(string)); @@ -427,7 +427,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) { int mapSize = unpacker.unpackMapHeader(); Map attrs = new HashMap<>(mapSize); for (int i = 0; i < mapSize; i++) { - String key = commonStrings.decode(unpacker.unpackByte()); + String key = commonStrings.decodeInt(unpacker.unpackInt()); Value v = unpacker.unpackValue(); if (v.isStringValue()) { attrs.put(key, v.asStringValue().asString()); @@ -444,7 +444,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) { for (int i = 0; i < commandSize; i++) { commands[i] = unpacker.unpackInt(); } - String layer = commonStrings.decode(extractLayerIdFromKey(entry.key())); + String layer = commonStrings.decodeByte(extractLayerIdFromKey(entry.key())); return new VectorTile.Feature( layer, id, diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java index fe031c21bb..17941e800c 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java @@ -1,26 +1,30 @@ package com.onthegomap.planetiler.util; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; import javax.annotation.concurrent.ThreadSafe; /** - * A utility for compressing up to 250 commonly-used strings (i.e. layer name, tag attributes) into a single byte. + * A utility for compressing commonly-used strings (i.e. layer name, tag attributes). */ @ThreadSafe public class CommonStringEncoder { - private final ConcurrentMap stringToId = new ConcurrentHashMap<>(255); + private final Map stringToId = new ConcurrentHashMap<>(255); private final String[] idToString = new String[255]; private final AtomicInteger layerId = new AtomicInteger(0); + private final Map stringToIdMap = new ConcurrentHashMap<>(255); + private final Map idToStringMap = new ConcurrentHashMap<>(); + private final AtomicInteger intStringId = new AtomicInteger(0); + /** * Returns the string for {@code id}. * * @throws IllegalArgumentException if there is no value for {@code id}. */ - public String decode(byte id) { + public String decodeByte(byte id) { String str = idToString[id & 0xff]; if (str == null) { throw new IllegalArgumentException("No string for " + id); @@ -28,14 +32,27 @@ public String decode(byte id) { return str; } + /** + * Returns the string for {@code id}. + * + * @throws IllegalArgumentException if there is no value for {@code id}. + */ + public String decodeInt(int id) { + String str = idToStringMap.get(id); + if (str == null) { + throw new IllegalArgumentException("No string for " + id); + } + return str; + } + /** * Returns a byte value to each unique string passed in. * * @param string the string to store - * @return a byte that can be converted back to a string by {@link #decode(byte)}. + * @return a byte that can be converted back to a string by {@link #decodeByte(byte)}. * @throws IllegalArgumentException if called for too many values */ - public byte encode(String string) { + public byte encodeByte(String string) { // optimization to avoid more expensive computeIfAbsent call for the majority case when concurrent hash map already // contains the value. Byte result = stringToId.get(string); @@ -51,4 +68,21 @@ public byte encode(String string) { } return result; } + + /** + * Returns a int value to each unique string passed in. + * + * @param string the string to store + * @return an int that can be converted back to a string by {@link #decodeInt(int)}. + * @throws IllegalArgumentException if called for too many values + */ + public int encodeInt(String string) { + // optimization to avoid more expensive computeIfAbsent call for the majority case when concurrent hash map already + // contains the value. + return stringToIdMap.computeIfAbsent(string, s -> { + int id = intStringId.getAndIncrement(); + idToStringMap.put(id, string); + return id; + }); + } } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/FeatureCollectorTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/FeatureCollectorTest.java index 75dabe7c48..7f91bb7dbf 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/FeatureCollectorTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/FeatureCollectorTest.java @@ -13,6 +13,7 @@ import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.util.ZoomFunction; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.StreamSupport; @@ -582,4 +583,32 @@ void testMultiLineStringCoercion() throws GeometryException { assertFalse(iter.hasNext()); } + + @Test + void testManyAttr() { + + Map tags = new HashMap<>(); + + for (int i = 0; i < 500; i++) { + tags.put("key" + i, "val" + i); + } + + var collector = factory.get(newReaderFeature(newPoint(0, 0), tags)); + var point = collector.point("layername"); + + for (int i = 0; i < 500; i++) { + point.setAttr("key" + i, tags.get("key" + i)); + } + + assertFeatures(13, List.of( + Map.of( + "key0", "val0", + "key10", "val10", + "key100", "val100", + "key256", "val256", + "key499", "val499" + ) + ), collector); + } + } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java index e161314dd7..d1063951ab 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java @@ -11,22 +11,22 @@ class CommonStringEncoderTest { @Test void testRoundTrip() { - byte a = commonStringEncoder.encode("a"); - byte b = commonStringEncoder.encode("b"); - assertEquals("a", commonStringEncoder.decode(a)); - assertEquals(a, commonStringEncoder.encode("a")); - assertEquals("b", commonStringEncoder.decode(b)); - assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.decode((byte) (b + 1))); + byte a = commonStringEncoder.encodeByte("a"); + byte b = commonStringEncoder.encodeByte("b"); + assertEquals("a", commonStringEncoder.decodeByte(a)); + assertEquals(a, commonStringEncoder.encodeByte("a")); + assertEquals("b", commonStringEncoder.decodeByte(b)); + assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.decodeByte((byte) (b + 1))); } @Test void testLimitsTo250() { for (int i = 0; i <= 250; i++) { String string = Integer.toString(i); - byte encoded = commonStringEncoder.encode(Integer.toString(i)); - String decoded = commonStringEncoder.decode(encoded); + byte encoded = commonStringEncoder.encodeByte(Integer.toString(i)); + String decoded = commonStringEncoder.decodeByte(encoded); assertEquals(string, decoded); } - assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.encode("too many")); + assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.encodeByte("too many")); } } From 3c46d562f74df90dbe2832e9048717aa0a892df8 Mon Sep 17 00:00:00 2001 From: Brian Sperlongano Date: Sun, 19 Jun 2022 14:10:31 -0400 Subject: [PATCH 2/5] Changes based on code review --- .../planetiler/collection/FeatureGroup.java | 14 ++-- .../planetiler/util/CommonStringEncoder.java | 80 +++++++------------ .../util/CommonStringEncoderTest.java | 37 ++++++--- 3 files changed, 62 insertions(+), 69 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java index 045418ad44..94b4be0432 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java @@ -56,12 +56,12 @@ public final class FeatureGroup implements Iterable, private static final Logger LOGGER = LoggerFactory.getLogger(FeatureGroup.class); private final FeatureSort sorter; private final Profile profile; - private final CommonStringEncoder commonStrings; + private final CommonStringEncoder.AsByte commonStrings; private final Stats stats; private final LayerStats layerStats = new LayerStats(); private volatile boolean prepared = false; - FeatureGroup(FeatureSort sorter, Profile profile, CommonStringEncoder commonStrings, Stats stats) { + FeatureGroup(FeatureSort sorter, Profile profile, CommonStringEncoder.AsByte commonStrings, Stats stats) { this.sorter = sorter; this.profile = profile; this.commonStrings = commonStrings; @@ -69,7 +69,7 @@ public final class FeatureGroup implements Iterable, } FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) { - this(sorter, profile, new CommonStringEncoder(), stats); + this(sorter, profile, new CommonStringEncoder.AsByte(), stats); } /** Returns a feature grouper that stores all feature in-memory. Only suitable for toy use-cases like unit tests. */ @@ -190,7 +190,7 @@ public void close() throws IOException { private long encodeKey(RenderedFeature feature) { var vectorTileFeature = feature.vectorTileFeature(); - byte encodedLayer = commonStrings.encodeByte(vectorTileFeature.layer()); + byte encodedLayer = commonStrings.encode(vectorTileFeature.layer()); return encodeKey( feature.tile().encoded(), encodedLayer, @@ -214,7 +214,7 @@ private byte[] encodeValue(VectorTile.Feature vectorTileFeature, RenderedFeature packer.packMapHeader((int) attrs.values().stream().filter(Objects::nonNull).count()); for (Map.Entry entry : attrs.entrySet()) { if (entry.getValue() != null) { - packer.packInt(commonStrings.encodeInt(entry.getKey())); + packer.packInt(commonStrings.encode(entry.getKey())); Object value = entry.getValue(); if (value instanceof String string) { packer.packValue(ValueFactory.newString(string)); @@ -427,7 +427,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) { int mapSize = unpacker.unpackMapHeader(); Map attrs = new HashMap<>(mapSize); for (int i = 0; i < mapSize; i++) { - String key = commonStrings.decodeInt(unpacker.unpackInt()); + String key = commonStrings.decode(unpacker.unpackByte()); Value v = unpacker.unpackValue(); if (v.isStringValue()) { attrs.put(key, v.asStringValue().asString()); @@ -444,7 +444,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) { for (int i = 0; i < commandSize; i++) { commands[i] = unpacker.unpackInt(); } - String layer = commonStrings.decodeByte(extractLayerIdFromKey(entry.key())); + String layer = commonStrings.decode(extractLayerIdFromKey(entry.key())); return new VectorTile.Feature( layer, id, diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java index 17941e800c..321013fd4a 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java @@ -11,21 +11,19 @@ @ThreadSafe public class CommonStringEncoder { - private final Map stringToId = new ConcurrentHashMap<>(255); - private final String[] idToString = new String[255]; - private final AtomicInteger layerId = new AtomicInteger(0); + private static final int MAX_STRINGS = 100_000; - private final Map stringToIdMap = new ConcurrentHashMap<>(255); - private final Map idToStringMap = new ConcurrentHashMap<>(); - private final AtomicInteger intStringId = new AtomicInteger(0); + private final Map stringToId = new ConcurrentHashMap<>(MAX_STRINGS); + private final String[] idToString = new String[MAX_STRINGS]; + private final AtomicInteger stringId = new AtomicInteger(0); /** * Returns the string for {@code id}. * * @throws IllegalArgumentException if there is no value for {@code id}. */ - public String decodeByte(byte id) { - String str = idToString[id & 0xff]; + public String decode(int id) { + String str = idToString[id]; if (str == null) { throw new IllegalArgumentException("No string for " + id); } @@ -33,56 +31,40 @@ public String decodeByte(byte id) { } /** - * Returns the string for {@code id}. - * - * @throws IllegalArgumentException if there is no value for {@code id}. - */ - public String decodeInt(int id) { - String str = idToStringMap.get(id); - if (str == null) { - throw new IllegalArgumentException("No string for " + id); - } - return str; - } - - /** - * Returns a byte value to each unique string passed in. + * Returns a int value to each unique string passed in. * * @param string the string to store - * @return a byte that can be converted back to a string by {@link #decodeByte(byte)}. + * @return an int that can be converted back to a string by {@link #decode(int)}. * @throws IllegalArgumentException if called for too many values */ - public byte encodeByte(String string) { + public int encode(String string) { // optimization to avoid more expensive computeIfAbsent call for the majority case when concurrent hash map already // contains the value. - Byte result = stringToId.get(string); - if (result == null) { - result = stringToId.computeIfAbsent(string, s -> { - int id = layerId.getAndIncrement(); - if (id > 250) { - throw new IllegalArgumentException("Too many string keys when inserting " + string); - } - idToString[id] = string; - return (byte) id; - }); - } - return result; + return stringToId.computeIfAbsent(string, s -> { + int id = stringId.getAndIncrement(); + if (id >= MAX_STRINGS) { + throw new IllegalArgumentException("Too many strings"); + } + idToString[id] = string; + return id; + }); } /** - * Returns a int value to each unique string passed in. - * - * @param string the string to store - * @return an int that can be converted back to a string by {@link #decodeInt(int)}. - * @throws IllegalArgumentException if called for too many values + * Variant of CommonStringEncoder based on byte rather than int for string indexing. */ - public int encodeInt(String string) { - // optimization to avoid more expensive computeIfAbsent call for the majority case when concurrent hash map already - // contains the value. - return stringToIdMap.computeIfAbsent(string, s -> { - int id = intStringId.getAndIncrement(); - idToStringMap.put(id, string); - return id; - }); + public static class AsByte { + private final CommonStringEncoder encoder = new CommonStringEncoder(); + + public String decode(byte id) { + return encoder.decode(id & 0xff); + } + + public byte encode(String string) { + if (encoder.stringId.get() > 255) { + throw new IllegalArgumentException("Too many strings"); + } + return (byte) encoder.encode(string); + } } } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java index d1063951ab..3d9d72b98a 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java @@ -7,26 +7,37 @@ class CommonStringEncoderTest { - private final CommonStringEncoder commonStringEncoder = new CommonStringEncoder(); + private final CommonStringEncoder commonStringEncoderInteger = new CommonStringEncoder(); + private final CommonStringEncoder.AsByte commonStringEncoderByte = new CommonStringEncoder.AsByte(); @Test - void testRoundTrip() { - byte a = commonStringEncoder.encodeByte("a"); - byte b = commonStringEncoder.encodeByte("b"); - assertEquals("a", commonStringEncoder.decodeByte(a)); - assertEquals(a, commonStringEncoder.encodeByte("a")); - assertEquals("b", commonStringEncoder.decodeByte(b)); - assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.decodeByte((byte) (b + 1))); + void testRoundTripByte() { + byte a = commonStringEncoderByte.encode("a"); + byte b = commonStringEncoderByte.encode("b"); + assertEquals("a", commonStringEncoderByte.decode(a)); + assertEquals(a, commonStringEncoderByte.encode("a")); + assertEquals("b", commonStringEncoderByte.decode(b)); + assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.decode((byte) (b + 1))); } @Test - void testLimitsTo250() { - for (int i = 0; i <= 250; i++) { + void testRoundTripInteger() { + int a = commonStringEncoderInteger.encode("a"); + int b = commonStringEncoderInteger.encode("b"); + assertEquals("a", commonStringEncoderInteger.decode(a)); + assertEquals(a, commonStringEncoderInteger.encode("a")); + assertEquals("b", commonStringEncoderInteger.decode(b)); + assertThrows(IllegalArgumentException.class, () -> commonStringEncoderInteger.decode(b + 1)); + } + + @Test + void testByteLimitsTo250() { + for (int i = 0; i <= 255; i++) { String string = Integer.toString(i); - byte encoded = commonStringEncoder.encodeByte(Integer.toString(i)); - String decoded = commonStringEncoder.decodeByte(encoded); + byte encoded = commonStringEncoderByte.encode(Integer.toString(i)); + String decoded = commonStringEncoderByte.decode(encoded); assertEquals(string, decoded); } - assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.encodeByte("too many")); + assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.encode("too many")); } } From 85ef266ebed216712234ea91c5c94f8aa1296c5f Mon Sep 17 00:00:00 2001 From: Brian Sperlongano Date: Sun, 19 Jun 2022 14:15:01 -0400 Subject: [PATCH 3/5] Add null check --- .../planetiler/util/CommonStringEncoder.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java index 321013fd4a..b30a570d7b 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java @@ -40,14 +40,18 @@ public String decode(int id) { public int encode(String string) { // optimization to avoid more expensive computeIfAbsent call for the majority case when concurrent hash map already // contains the value. - return stringToId.computeIfAbsent(string, s -> { - int id = stringId.getAndIncrement(); - if (id >= MAX_STRINGS) { - throw new IllegalArgumentException("Too many strings"); - } - idToString[id] = string; - return id; - }); + Integer result = stringToId.get(string); + if (result == null) { + result = stringToId.computeIfAbsent(string, s -> { + int id = stringId.getAndIncrement(); + if (id >= MAX_STRINGS) { + throw new IllegalArgumentException("Too many strings"); + } + idToString[id] = string; + return id; + }); + } + return result; } /** From ef9245f8008b128591a55a88ae61e4b1a0cde94a Mon Sep 17 00:00:00 2001 From: Brian Sperlongano Date: Sun, 19 Jun 2022 21:32:50 -0400 Subject: [PATCH 4/5] improve byte/int variants of CommonStringEncoder --- .../planetiler/collection/FeatureGroup.java | 18 +++++++----------- .../planetiler/util/CommonStringEncoder.java | 19 +++++++++++-------- .../util/CommonStringEncoderTest.java | 4 ++-- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java index 94b4be0432..46892bf13b 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java @@ -56,22 +56,18 @@ public final class FeatureGroup implements Iterable, private static final Logger LOGGER = LoggerFactory.getLogger(FeatureGroup.class); private final FeatureSort sorter; private final Profile profile; - private final CommonStringEncoder.AsByte commonStrings; + private final CommonStringEncoder.AsByte commonLayerStrings = new CommonStringEncoder.AsByte(); + private final CommonStringEncoder commonValueStrings = new CommonStringEncoder(100_000); private final Stats stats; private final LayerStats layerStats = new LayerStats(); private volatile boolean prepared = false; - FeatureGroup(FeatureSort sorter, Profile profile, CommonStringEncoder.AsByte commonStrings, Stats stats) { + FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) { this.sorter = sorter; this.profile = profile; - this.commonStrings = commonStrings; this.stats = stats; } - FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) { - this(sorter, profile, new CommonStringEncoder.AsByte(), stats); - } - /** Returns a feature grouper that stores all feature in-memory. Only suitable for toy use-cases like unit tests. */ public static FeatureGroup newInMemoryFeatureGroup(Profile profile, Stats stats) { return new FeatureGroup(FeatureSort.newInMemory(), profile, stats); @@ -190,7 +186,7 @@ public void close() throws IOException { private long encodeKey(RenderedFeature feature) { var vectorTileFeature = feature.vectorTileFeature(); - byte encodedLayer = commonStrings.encode(vectorTileFeature.layer()); + byte encodedLayer = commonLayerStrings.encode(vectorTileFeature.layer()); return encodeKey( feature.tile().encoded(), encodedLayer, @@ -214,7 +210,7 @@ private byte[] encodeValue(VectorTile.Feature vectorTileFeature, RenderedFeature packer.packMapHeader((int) attrs.values().stream().filter(Objects::nonNull).count()); for (Map.Entry entry : attrs.entrySet()) { if (entry.getValue() != null) { - packer.packInt(commonStrings.encode(entry.getKey())); + packer.packInt(commonValueStrings.encode(entry.getKey())); Object value = entry.getValue(); if (value instanceof String string) { packer.packValue(ValueFactory.newString(string)); @@ -427,7 +423,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) { int mapSize = unpacker.unpackMapHeader(); Map attrs = new HashMap<>(mapSize); for (int i = 0; i < mapSize; i++) { - String key = commonStrings.decode(unpacker.unpackByte()); + String key = commonValueStrings.decode(unpacker.unpackInt()); Value v = unpacker.unpackValue(); if (v.isStringValue()) { attrs.put(key, v.asStringValue().asString()); @@ -444,7 +440,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) { for (int i = 0; i < commandSize; i++) { commands[i] = unpacker.unpackInt(); } - String layer = commonStrings.decode(extractLayerIdFromKey(entry.key())); + String layer = commonLayerStrings.decode(extractLayerIdFromKey(entry.key())); return new VectorTile.Feature( layer, id, diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java index b30a570d7b..783e77ad55 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/CommonStringEncoder.java @@ -11,12 +11,18 @@ @ThreadSafe public class CommonStringEncoder { - private static final int MAX_STRINGS = 100_000; + private final int maxStrings; - private final Map stringToId = new ConcurrentHashMap<>(MAX_STRINGS); - private final String[] idToString = new String[MAX_STRINGS]; + private final Map stringToId; + private final String[] idToString; private final AtomicInteger stringId = new AtomicInteger(0); + public CommonStringEncoder(int maxStrings) { + this.maxStrings = maxStrings; + stringToId = new ConcurrentHashMap<>(maxStrings); + idToString = new String[maxStrings]; + } + /** * Returns the string for {@code id}. * @@ -44,7 +50,7 @@ public int encode(String string) { if (result == null) { result = stringToId.computeIfAbsent(string, s -> { int id = stringId.getAndIncrement(); - if (id >= MAX_STRINGS) { + if (id >= maxStrings) { throw new IllegalArgumentException("Too many strings"); } idToString[id] = string; @@ -58,16 +64,13 @@ public int encode(String string) { * Variant of CommonStringEncoder based on byte rather than int for string indexing. */ public static class AsByte { - private final CommonStringEncoder encoder = new CommonStringEncoder(); + private final CommonStringEncoder encoder = new CommonStringEncoder(256); public String decode(byte id) { return encoder.decode(id & 0xff); } public byte encode(String string) { - if (encoder.stringId.get() > 255) { - throw new IllegalArgumentException("Too many strings"); - } return (byte) encoder.encode(string); } } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java index 3d9d72b98a..023aaa609c 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java @@ -7,7 +7,7 @@ class CommonStringEncoderTest { - private final CommonStringEncoder commonStringEncoderInteger = new CommonStringEncoder(); + private final CommonStringEncoder commonStringEncoderInteger = new CommonStringEncoder(100_000); private final CommonStringEncoder.AsByte commonStringEncoderByte = new CommonStringEncoder.AsByte(); @Test @@ -31,7 +31,7 @@ void testRoundTripInteger() { } @Test - void testByteLimitsTo250() { + void testByteLimitsToMax() { for (int i = 0; i <= 255; i++) { String string = Integer.toString(i); byte encoded = commonStringEncoderByte.encode(Integer.toString(i)); From 8f5524d8dcde8aef1bbec3b9028d02aff0002f83 Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Mon, 20 Jun 2022 05:26:10 -0400 Subject: [PATCH 5/5] add test --- .../planetiler/util/CommonStringEncoderTest.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java index 023aaa609c..5fad22b467 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/CommonStringEncoderTest.java @@ -34,10 +34,21 @@ void testRoundTripInteger() { void testByteLimitsToMax() { for (int i = 0; i <= 255; i++) { String string = Integer.toString(i); - byte encoded = commonStringEncoderByte.encode(Integer.toString(i)); + byte encoded = commonStringEncoderByte.encode(string); String decoded = commonStringEncoderByte.decode(encoded); assertEquals(string, decoded); } assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.encode("too many")); } + + @Test + void testIntDoesNotLimitTo250() { + for (int i = 0; i < 100_000; i++) { + String string = Integer.toString(i); + int encoded = commonStringEncoderInteger.encode(string); + String decoded = commonStringEncoderInteger.decode(encoded); + assertEquals(string, decoded); + } + assertThrows(IllegalArgumentException.class, () -> commonStringEncoderInteger.encode("too many")); + } }