From 6f4b59997c516024d980b18c4d75321622c3e159 Mon Sep 17 00:00:00 2001 From: haomarch Date: Mon, 6 Jun 2022 02:37:20 +0800 Subject: [PATCH] [HUDI-4192] HoodieHFileReader scan top cells after bottom cells throw NullPointerException --- .../storage/TestHoodieHFileReaderWriter.java | 32 +++++++++++++++++++ .../hudi/io/storage/HoodieHFileReader.java | 7 ++++ 2 files changed, 39 insertions(+) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java index da6f717258877..baede154c99e4 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java @@ -294,6 +294,38 @@ public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception { StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) .collect(Collectors.toList()); assertEquals(Collections.emptyList(), recordsByPrefix); + + // filter for "key50" and "key1" : entries from key50 and 'key10 to key19' should be matched. + List expectedKey50and1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1") + || (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList()); + iterator = + hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key1"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .collect(Collectors.toList()); + assertEquals(expectedKey50and1s, recordsByPrefix); + + // filter for "key50" and "key0" : entries from key50 and 'key00 to key09' should be matched. + List expectedKey50and0s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key0") + || (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList()); + iterator = + hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key0"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .collect(Collectors.toList()); + assertEquals(expectedKey50and0s, recordsByPrefix); + + // filter for "key1" and "key0" : entries from 'key10 to key19' and 'key00 to key09' should be matched. + List expectedKey1sand0s = expectedKey1s; + expectedKey1sand0s.addAll(allRecords.stream() + .filter(entry -> (entry.get("_row_key").toString()).contains("key0")) + .collect(Collectors.toList())); + iterator = + hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key1", "key0"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .collect(Collectors.toList()); + assertEquals(expectedKey1sand0s, recordsByPrefix); } @ParameterizedTest diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java index 899c2475da26c..7776288ead41e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java @@ -52,6 +52,7 @@ import java.util.Iterator; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.TreeSet; import java.util.stream.Collectors; @@ -258,6 +259,12 @@ private static Iterator getRecordByKeyPrefixIteratorInternal(HFil if (!scanner.next()) { return Collections.emptyIterator(); } + } else if (val == -1) { + // If scanner is aleady on the top of hfile. avoid trigger seekTo again. + Optional headerCell = scanner.getReader().getFirstKey(); + if (headerCell.isPresent() && !headerCell.get().equals(scanner.getCell())) { + scanner.seekTo(); + } } class KeyPrefixIterator implements Iterator {