From 73b0be3c962112efe541ae04fe0ea6f298558f17 Mon Sep 17 00:00:00 2001 From: marchpure Date: Mon, 6 Jun 2022 12:07:26 +0800 Subject: [PATCH] [HUDI-4192] HoodieHFileReader scan top cells after bottom cells throw NullPointerException (#5755) SeekTo top cells avoid NullPointerException --- .../storage/TestHoodieHFileReaderWriter.java | 32 +++++++++++++++++++ .../hudi/io/storage/HoodieHFileReader.java | 6 ++++ 2 files changed, 38 insertions(+) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java index da6f71725887..baede154c99e 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java @@ -294,6 +294,38 @@ public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception { StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) .collect(Collectors.toList()); assertEquals(Collections.emptyList(), recordsByPrefix); + + // filter for "key50" and "key1" : entries from key50 and 'key10 to key19' should be matched. + List expectedKey50and1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1") + || (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList()); + iterator = + hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key1"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .collect(Collectors.toList()); + assertEquals(expectedKey50and1s, recordsByPrefix); + + // filter for "key50" and "key0" : entries from key50 and 'key00 to key09' should be matched. + List expectedKey50and0s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key0") + || (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList()); + iterator = + hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key0"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .collect(Collectors.toList()); + assertEquals(expectedKey50and0s, recordsByPrefix); + + // filter for "key1" and "key0" : entries from 'key10 to key19' and 'key00 to key09' should be matched. + List expectedKey1sand0s = expectedKey1s; + expectedKey1sand0s.addAll(allRecords.stream() + .filter(entry -> (entry.get("_row_key").toString()).contains("key0")) + .collect(Collectors.toList())); + iterator = + hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key1", "key0"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .collect(Collectors.toList()); + assertEquals(expectedKey1sand0s, recordsByPrefix); } @ParameterizedTest diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java index 899c2475da26..0bf31d2a2593 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java @@ -258,6 +258,12 @@ private static Iterator getRecordByKeyPrefixIteratorInternal(HFil if (!scanner.next()) { return Collections.emptyIterator(); } + } else if (val == -1) { + // If scanner is aleady on the top of hfile. avoid trigger seekTo again. + Option headerCell = Option.fromJavaOptional(scanner.getReader().getFirstKey()); + if (headerCell.isPresent() && !headerCell.get().equals(scanner.getCell())) { + scanner.seekTo(); + } } class KeyPrefixIterator implements Iterator {