Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow more than 256 attribute keys #275

Merged
merged 5 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,18 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
private static final Logger LOGGER = LoggerFactory.getLogger(FeatureGroup.class);
private final FeatureSort sorter;
private final Profile profile;
private final CommonStringEncoder commonStrings;
private final CommonStringEncoder.AsByte commonLayerStrings = new CommonStringEncoder.AsByte();
private final CommonStringEncoder commonValueStrings = new CommonStringEncoder(100_000);
private final Stats stats;
private final LayerStats layerStats = new LayerStats();
private volatile boolean prepared = false;

FeatureGroup(FeatureSort sorter, Profile profile, CommonStringEncoder commonStrings, Stats stats) {
FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) {
this.sorter = sorter;
this.profile = profile;
this.commonStrings = commonStrings;
this.stats = stats;
}

FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) {
this(sorter, profile, new CommonStringEncoder(), stats);
}

/** Returns a feature grouper that stores all feature in-memory. Only suitable for toy use-cases like unit tests. */
public static FeatureGroup newInMemoryFeatureGroup(Profile profile, Stats stats) {
return new FeatureGroup(FeatureSort.newInMemory(), profile, stats);
Expand Down Expand Up @@ -190,7 +186,7 @@ public void close() throws IOException {

private long encodeKey(RenderedFeature feature) {
var vectorTileFeature = feature.vectorTileFeature();
byte encodedLayer = commonStrings.encode(vectorTileFeature.layer());
byte encodedLayer = commonLayerStrings.encode(vectorTileFeature.layer());
return encodeKey(
feature.tile().encoded(),
encodedLayer,
Expand All @@ -214,7 +210,7 @@ private byte[] encodeValue(VectorTile.Feature vectorTileFeature, RenderedFeature
packer.packMapHeader((int) attrs.values().stream().filter(Objects::nonNull).count());
for (Map.Entry<String, Object> entry : attrs.entrySet()) {
if (entry.getValue() != null) {
packer.packByte(commonStrings.encode(entry.getKey()));
packer.packInt(commonValueStrings.encode(entry.getKey()));
Object value = entry.getValue();
if (value instanceof String string) {
packer.packValue(ValueFactory.newString(string));
Expand Down Expand Up @@ -427,7 +423,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) {
int mapSize = unpacker.unpackMapHeader();
Map<String, Object> attrs = new HashMap<>(mapSize);
for (int i = 0; i < mapSize; i++) {
String key = commonStrings.decode(unpacker.unpackByte());
String key = commonValueStrings.decode(unpacker.unpackInt());
Value v = unpacker.unpackValue();
if (v.isStringValue()) {
attrs.put(key, v.asStringValue().asString());
Expand All @@ -444,7 +440,7 @@ private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) {
for (int i = 0; i < commandSize; i++) {
commands[i] = unpacker.unpackInt();
}
String layer = commonStrings.decode(extractLayerIdFromKey(entry.key()));
String layer = commonLayerStrings.decode(extractLayerIdFromKey(entry.key()));
return new VectorTile.Feature(
layer,
id,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,54 +1,77 @@
package com.onthegomap.planetiler.util;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.concurrent.ThreadSafe;

/**
* A utility for compressing up to 250 commonly-used strings (i.e. layer name, tag attributes) into a single byte.
* A utility for compressing commonly-used strings (i.e. layer name, tag attributes).
*/
@ThreadSafe
public class CommonStringEncoder {
ZeLonewolf marked this conversation as resolved.
Show resolved Hide resolved

private final ConcurrentMap<String, Byte> stringToId = new ConcurrentHashMap<>(255);
private final String[] idToString = new String[255];
private final AtomicInteger layerId = new AtomicInteger(0);
private final int maxStrings;

private final Map<String, Integer> stringToId;
private final String[] idToString;
private final AtomicInteger stringId = new AtomicInteger(0);

public CommonStringEncoder(int maxStrings) {
this.maxStrings = maxStrings;
stringToId = new ConcurrentHashMap<>(maxStrings);
idToString = new String[maxStrings];
}

/**
* Returns the string for {@code id}.
*
* @throws IllegalArgumentException if there is no value for {@code id}.
*/
public String decode(byte id) {
String str = idToString[id & 0xff];
public String decode(int id) {
String str = idToString[id];
if (str == null) {
throw new IllegalArgumentException("No string for " + id);
}
return str;
}

/**
* Returns a byte value to each unique string passed in.
* Returns a int value to each unique string passed in.
*
* @param string the string to store
* @return a byte that can be converted back to a string by {@link #decode(byte)}.
* @return an int that can be converted back to a string by {@link #decode(int)}.
* @throws IllegalArgumentException if called for too many values
*/
public byte encode(String string) {
public int encode(String string) {
// optimization to avoid more expensive computeIfAbsent call for the majority case when concurrent hash map already
// contains the value.
Byte result = stringToId.get(string);
Integer result = stringToId.get(string);
if (result == null) {
result = stringToId.computeIfAbsent(string, s -> {
int id = layerId.getAndIncrement();
if (id > 250) {
throw new IllegalArgumentException("Too many string keys when inserting " + string);
int id = stringId.getAndIncrement();
if (id >= maxStrings) {
throw new IllegalArgumentException("Too many strings");
}
idToString[id] = string;
return (byte) id;
return id;
});
}
return result;
}

/**
* Variant of CommonStringEncoder based on byte rather than int for string indexing.
*/
public static class AsByte {
private final CommonStringEncoder encoder = new CommonStringEncoder(256);

public String decode(byte id) {
return encoder.decode(id & 0xff);
}

public byte encode(String string) {
return (byte) encoder.encode(string);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.ZoomFunction;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.StreamSupport;
Expand Down Expand Up @@ -582,4 +583,32 @@ void testMultiLineStringCoercion() throws GeometryException {

assertFalse(iter.hasNext());
}

@Test
void testManyAttr() {

Map<String, Object> tags = new HashMap<>();

for (int i = 0; i < 500; i++) {
tags.put("key" + i, "val" + i);
}

var collector = factory.get(newReaderFeature(newPoint(0, 0), tags));
var point = collector.point("layername");

for (int i = 0; i < 500; i++) {
point.setAttr("key" + i, tags.get("key" + i));
}

assertFeatures(13, List.of(
Map.of(
"key0", "val0",
"key10", "val10",
"key100", "val100",
"key256", "val256",
"key499", "val499"
)
), collector);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,48 @@

class CommonStringEncoderTest {

private final CommonStringEncoder commonStringEncoder = new CommonStringEncoder();
private final CommonStringEncoder commonStringEncoderInteger = new CommonStringEncoder(100_000);
private final CommonStringEncoder.AsByte commonStringEncoderByte = new CommonStringEncoder.AsByte();

@Test
void testRoundTrip() {
byte a = commonStringEncoder.encode("a");
byte b = commonStringEncoder.encode("b");
assertEquals("a", commonStringEncoder.decode(a));
assertEquals(a, commonStringEncoder.encode("a"));
assertEquals("b", commonStringEncoder.decode(b));
assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.decode((byte) (b + 1)));
void testRoundTripByte() {
byte a = commonStringEncoderByte.encode("a");
byte b = commonStringEncoderByte.encode("b");
assertEquals("a", commonStringEncoderByte.decode(a));
assertEquals(a, commonStringEncoderByte.encode("a"));
assertEquals("b", commonStringEncoderByte.decode(b));
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.decode((byte) (b + 1)));
}

@Test
void testLimitsTo250() {
for (int i = 0; i <= 250; i++) {
void testRoundTripInteger() {
int a = commonStringEncoderInteger.encode("a");
int b = commonStringEncoderInteger.encode("b");
assertEquals("a", commonStringEncoderInteger.decode(a));
assertEquals(a, commonStringEncoderInteger.encode("a"));
assertEquals("b", commonStringEncoderInteger.decode(b));
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderInteger.decode(b + 1));
}

@Test
void testByteLimitsToMax() {
for (int i = 0; i <= 255; i++) {
String string = Integer.toString(i);
byte encoded = commonStringEncoderByte.encode(string);
String decoded = commonStringEncoderByte.decode(encoded);
assertEquals(string, decoded);
}
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.encode("too many"));
}

@Test
void testIntDoesNotLimitTo250() {
for (int i = 0; i < 100_000; i++) {
String string = Integer.toString(i);
byte encoded = commonStringEncoder.encode(Integer.toString(i));
String decoded = commonStringEncoder.decode(encoded);
int encoded = commonStringEncoderInteger.encode(string);
String decoded = commonStringEncoderInteger.decode(encoded);
assertEquals(string, decoded);
}
assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.encode("too many"));
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderInteger.encode("too many"));
}
}