-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add MultimapState API #23491
Add MultimapState API #23491
Changes from 7 commits
306db0f
4bd6e61
43831b2
bc8685e
3339044
1debe2b
838d18a
dde2c79
4c23ec6
e92f14d
a826d8c
ce8291f
e1d765b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
*/ | ||
package org.apache.beam.runners.core; | ||
|
||
import java.util.AbstractMap; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
|
@@ -36,6 +37,7 @@ | |
import org.apache.beam.sdk.state.BagState; | ||
import org.apache.beam.sdk.state.CombiningState; | ||
import org.apache.beam.sdk.state.MapState; | ||
import org.apache.beam.sdk.state.MultimapState; | ||
import org.apache.beam.sdk.state.OrderedListState; | ||
import org.apache.beam.sdk.state.ReadableState; | ||
import org.apache.beam.sdk.state.ReadableStates; | ||
|
@@ -51,11 +53,13 @@ | |
import org.apache.beam.sdk.util.CoderUtils; | ||
import org.apache.beam.sdk.util.CombineFnUtil; | ||
import org.apache.beam.sdk.values.TimestampedValue; | ||
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ArrayListMultimap; | ||
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; | ||
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet; | ||
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; | ||
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; | ||
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; | ||
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap; | ||
import org.checkerframework.checker.initialization.qual.Initialized; | ||
import org.checkerframework.checker.nullness.qual.NonNull; | ||
import org.checkerframework.checker.nullness.qual.UnknownKeyFor; | ||
|
@@ -153,6 +157,14 @@ public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap( | |
return new InMemoryMap<>(mapKeyCoder, mapValueCoder); | ||
} | ||
|
||
@Override | ||
public <KeyT, ValueT> MultimapState<KeyT, ValueT> bindMultimap( | ||
StateTag<MultimapState<KeyT, ValueT>> spec, | ||
Coder<KeyT> keyCoder, | ||
Coder<ValueT> valueCoder) { | ||
return new InMemoryMultimap<>(keyCoder, valueCoder); | ||
} | ||
|
||
@Override | ||
public <T> OrderedListState<T> bindOrderedList( | ||
StateTag<OrderedListState<T>> spec, Coder<T> elemCoder) { | ||
|
@@ -463,6 +475,124 @@ public InMemoryBag<T> copy() { | |
} | ||
} | ||
|
||
/** An {@link InMemoryState} implementation of {@link MultimapState}. */ | ||
public static final class InMemoryMultimap<K, V> | ||
implements MultimapState<K, V>, InMemoryState<InMemoryMultimap<K, V>> { | ||
private final Coder<K> keyCoder; | ||
private final Coder<V> valueCoder; | ||
private Multimap<Object, V> contents = ArrayListMultimap.create(); | ||
private Map<Object, K> structuralKeysMapping = Maps.newHashMap(); | ||
|
||
public InMemoryMultimap(Coder<K> keyCoder, Coder<V> valueCoder) { | ||
this.keyCoder = keyCoder; | ||
this.valueCoder = valueCoder; | ||
} | ||
|
||
@Override | ||
public void clear() { | ||
contents = ArrayListMultimap.create(); | ||
structuralKeysMapping = Maps.newHashMap(); | ||
} | ||
|
||
@Override | ||
public void put(K key, V value) { | ||
Object structuralKey = keyCoder.structuralValue(key); | ||
structuralKeysMapping.put(structuralKey, key); | ||
contents.put(structuralKey, value); | ||
} | ||
|
||
@Override | ||
public void remove(K key) { | ||
Object structuralKey = keyCoder.structuralValue(key); | ||
structuralKeysMapping.remove(structuralKey); | ||
contents.removeAll(structuralKey); | ||
} | ||
|
||
@Override | ||
public @UnknownKeyFor @NonNull @Initialized ReadableState<Iterable<V>> get(K key) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these annotations auto generated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have removed the annotations. |
||
return CollectionViewState.of(contents.get(keyCoder.structuralValue(key))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This returns a stale view in this order of operations:
I believe there are other combinations where you won't get what you want. Please ensure that ParDoTest covers:
Currently I see your covering entries before and after put/remove. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, updated ParDoTest to include those as well. |
||
} | ||
|
||
@Override | ||
public ReadableState<Iterable<K>> keys() { | ||
return CollectionViewState.of(structuralKeysMapping.values()); | ||
} | ||
|
||
@Override | ||
public ReadableState<Iterable<Map.Entry<K, Iterable<V>>>> entries() { | ||
return new ReadableState<Iterable<Map.Entry<K, Iterable<V>>>>() { | ||
@Override | ||
public Iterable<Map.Entry<K, Iterable<V>>> read() { | ||
List<Map.Entry<K, Iterable<V>>> result = new ArrayList<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. = Lists.arrayListWithExpectedSize(structuralKeysMapping.size() There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
for (Map.Entry<Object, K> entry : structuralKeysMapping.entrySet()) { | ||
result.add( | ||
new AbstractMap.SimpleEntry( | ||
entry.getValue(), ImmutableList.copyOf(contents.get(entry.getKey())))); | ||
} | ||
return ImmutableList.copyOf(result); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Collections.unmodifiableList, since it doesn't copy There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
} | ||
|
||
@Override | ||
public ReadableState<Iterable<Map.Entry<K, Iterable<V>>>> readLater() { | ||
return this; | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto on annotations There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
public @UnknownKeyFor @NonNull @Initialized ReadableState< | ||
@UnknownKeyFor @NonNull @Initialized Boolean> | ||
containsKey(K key) { | ||
return new ReadableState<Boolean>() { | ||
@Override | ||
public Boolean read() { | ||
return structuralKeysMapping.containsKey(keyCoder.structuralValue(key)); | ||
} | ||
|
||
@Override | ||
public @UnknownKeyFor @NonNull @Initialized ReadableState<Boolean> readLater() { | ||
return this; | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. annotations There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
public @UnknownKeyFor @NonNull @Initialized ReadableState< | ||
@UnknownKeyFor @NonNull @Initialized Boolean> | ||
isEmpty() { | ||
return new ReadableState<Boolean>() { | ||
@Override | ||
public Boolean read() { | ||
return contents.isEmpty(); | ||
} | ||
|
||
@Override | ||
public @UnknownKeyFor @NonNull @Initialized ReadableState<Boolean> readLater() { | ||
return this; | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public boolean isCleared() { | ||
return contents.isEmpty(); | ||
} | ||
|
||
@Override | ||
public InMemoryMultimap<K, V> copy() { | ||
InMemoryMultimap<K, V> that = new InMemoryMultimap<>(keyCoder, valueCoder); | ||
for (K key : this.structuralKeysMapping.values()) { | ||
// Make a copy of structuralKey as well | ||
Object structuralKey = keyCoder.structuralValue(key); | ||
that.structuralKeysMapping.put(structuralKey, uncheckedClone(keyCoder, key)); | ||
for (V value : this.contents.get(structuralKey)) { | ||
that.contents.put(structuralKey, uncheckedClone(valueCoder, value)); | ||
} | ||
} | ||
return that; | ||
} | ||
} | ||
|
||
/** An {@link InMemoryState} implementation of {@link OrderedListState}. */ | ||
public static final class InMemoryOrderedList<T> | ||
implements OrderedListState<T>, InMemoryState<InMemoryOrderedList<T>> { | ||
|
@@ -625,6 +755,28 @@ public InMemorySet<T> copy() { | |
} | ||
} | ||
|
||
private static class CollectionViewState<T> implements ReadableState<Iterable<T>> { | ||
private final Collection<T> collection; | ||
|
||
private CollectionViewState(Collection<T> collection) { | ||
this.collection = collection; | ||
} | ||
|
||
public static <T> CollectionViewState<T> of(Collection<T> collection) { | ||
return new CollectionViewState<>(collection); | ||
} | ||
|
||
@Override | ||
public Iterable<T> read() { | ||
return ImmutableList.copyOf(collection); | ||
} | ||
|
||
@Override | ||
public ReadableState<Iterable<T>> readLater() { | ||
return this; | ||
} | ||
} | ||
|
||
/** An {@link InMemoryState} implementation of {@link MapState}. */ | ||
public static final class InMemoryMap<K, V> | ||
implements MapState<K, V>, InMemoryState<InMemoryMap<K, V>> { | ||
|
@@ -685,28 +837,6 @@ public void remove(K key) { | |
contents.remove(key); | ||
} | ||
|
||
private static class CollectionViewState<T> implements ReadableState<Iterable<T>> { | ||
private final Collection<T> collection; | ||
|
||
private CollectionViewState(Collection<T> collection) { | ||
this.collection = collection; | ||
} | ||
|
||
public static <T> CollectionViewState<T> of(Collection<T> collection) { | ||
return new CollectionViewState<>(collection); | ||
} | ||
|
||
@Override | ||
public Iterable<T> read() { | ||
return ImmutableList.copyOf(collection); | ||
} | ||
|
||
@Override | ||
public ReadableState<Iterable<T>> readLater() { | ||
return this; | ||
} | ||
} | ||
|
||
@Override | ||
public ReadableState<Iterable<K>> keys() { | ||
return CollectionViewState.of(contents.keySet()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not just clear the instantiated contents?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See
beam/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryStateInternals.java
Line 411 in af6a04c
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should create a new container instead of
clear()
, this is also how Bag/Map/Set doclear()
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The +1 was for your response that we need to keep stuff consistent for past reads that were returned.