Skip to content

Commit

Permalink
[HUDI-4192] HoodieHFileReader scan top cells after bottom cells throw…
Browse files Browse the repository at this point in the history
… NullPointerException (#5755)

SeekTo top cells avoid NullPointerException
  • Loading branch information
marchpure authored Jun 6, 2022
1 parent 5d18b80 commit 73b0be3
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,38 @@ public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception {
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
.collect(Collectors.toList());
assertEquals(Collections.emptyList(), recordsByPrefix);

// filter for "key50" and "key1" : entries from key50 and 'key10 to key19' should be matched.
List<GenericRecord> expectedKey50and1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1")
|| (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList());
iterator =
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key1"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
.collect(Collectors.toList());
assertEquals(expectedKey50and1s, recordsByPrefix);

// filter for "key50" and "key0" : entries from key50 and 'key00 to key09' should be matched.
List<GenericRecord> expectedKey50and0s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key0")
|| (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList());
iterator =
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key0"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
.collect(Collectors.toList());
assertEquals(expectedKey50and0s, recordsByPrefix);

// filter for "key1" and "key0" : entries from 'key10 to key19' and 'key00 to key09' should be matched.
List<GenericRecord> expectedKey1sand0s = expectedKey1s;
expectedKey1sand0s.addAll(allRecords.stream()
.filter(entry -> (entry.get("_row_key").toString()).contains("key0"))
.collect(Collectors.toList()));
iterator =
hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key1", "key0"), avroSchema);
recordsByPrefix =
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
.collect(Collectors.toList());
assertEquals(expectedKey1sand0s, recordsByPrefix);
}

@ParameterizedTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,12 @@ private static Iterator<GenericRecord> getRecordByKeyPrefixIteratorInternal(HFil
if (!scanner.next()) {
return Collections.emptyIterator();
}
} else if (val == -1) {
// If scanner is aleady on the top of hfile. avoid trigger seekTo again.
Option<Cell> headerCell = Option.fromJavaOptional(scanner.getReader().getFirstKey());
if (headerCell.isPresent() && !headerCell.get().equals(scanner.getCell())) {
scanner.seekTo();
}
}

class KeyPrefixIterator implements Iterator<GenericRecord> {
Expand Down

0 comments on commit 73b0be3

Please sign in to comment.