Skip to content

Commit

Permalink
Fixing getRecordByKeys in HoodieBackedTableMetadata
Browse files Browse the repository at this point in the history
  • Loading branch information
nsivabalan committed Jun 6, 2022
1 parent 597733e commit f85071f
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -299,7 +300,7 @@ public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception {
List<GenericRecord> expectedKey50and1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1")
|| (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList());
iterator =
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key1", "key50"), avroSchema);
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key1"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
.collect(Collectors.toList());
Expand All @@ -309,21 +310,27 @@ public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception {
List<GenericRecord> expectedKey50and0s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key0")
|| (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList());
iterator =
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key0", "key50"), avroSchema);
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key0"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
.collect(Collectors.toList());
assertEquals(expectedKey50and0s, recordsByPrefix);

// filter for "key1" and "key0" : entries from 'key10 to key19' and 'key00 to key09' should be matched.
List<GenericRecord> expectedKey1sand0s = allRecords.stream()
.filter(entry -> (entry.get("_row_key").toString()).contains("key0") || (entry.get("_row_key").toString()).contains("key1"))
.filter(entry -> (entry.get("_row_key").toString()).contains("key1") || (entry.get("_row_key").toString()).contains("key0"))
.collect(Collectors.toList());
iterator =
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key0", "key1"), avroSchema);
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key1", "key0"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
.collect(Collectors.toList());
Collections.sort(recordsByPrefix, new Comparator<GenericRecord>() {
@Override
public int compare(GenericRecord o1, GenericRecord o2) {
return o1.get("_row_key").toString().compareTo(o2.get("_row_key").toString());
}
});
assertEquals(expectedKey1sand0s, recordsByPrefix);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ private static Iterator<GenericRecord> getRecordByKeyPrefixIteratorInternal(HFil
if (!scanner.next()) {
return Collections.emptyIterator();
}
} else if (val == -1) {
// seek to beginning. anyways, its key prefix search.
scanner.seekTo();
}

class KeyPrefixIterator implements Iterator<GenericRecord> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
Expand Down Expand Up @@ -198,8 +197,12 @@ public HoodieData<HoodieRecord<HoodieMetadataPayload>> getRecordsByKeyPrefixes(L
}

@Override
public List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> keys,
public List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> keysUnsorted,
String partitionName) {
// Sort the columns so that keys are looked up in order
List<String> keys = new ArrayList<>();
keys.addAll(keysUnsorted);
Collections.sort(keys);
Map<Pair<String, FileSlice>, List<String>> partitionFileSliceToKeysMap = getPartitionFileSliceToKeysMapping(partitionName, keys);
List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> result = new ArrayList<>();
AtomicInteger fileSlicesKeysCount = new AtomicInteger();
Expand Down

0 comments on commit f85071f

Please sign in to comment.