From 6ddbda83e89c85bfe281458b53aeedfb9435ef44 Mon Sep 17 00:00:00 2001 From: Jeffrey Xiao Date: Wed, 14 Aug 2019 17:07:09 -0400 Subject: [PATCH] Fix range deletion tombstone ingestion with global seqno If we are writing a global seqno for an ingested file, the range tombstone meta-block gets accessed and put in the block cache during ingestion preparation. At the time, the global seqno has not been determined so the cached block does not have a global seqno. When the file's ingested and we read its range tombstone meta-block, it will be returned from the cache with no global seqno. In that case, we use the actual seqnos stored in the range tombstones, which are all zero, so the tombstones cover nothing. This commit adds a flag to disable filling the cache when the range tombstone meta-block is accessed for the first time and adds a regression test for the bug. --- db/external_sst_file_basic_test.cc | 39 ++++++++++++++++++++++++++ db/external_sst_file_ingestion_job.cc | 4 ++- db/plain_table_db_test.cc | 3 +- include/rocksdb/table.h | 3 +- table/adaptive_table_factory.cc | 3 +- table/adaptive_table_factory.h | 3 +- table/block_based_table_factory.cc | 7 +++-- table/block_based_table_factory.h | 3 +- table/block_based_table_reader.cc | 8 ++++-- table/block_based_table_reader.h | 4 ++- table/cuckoo_table_factory.cc | 3 +- table/cuckoo_table_factory.h | 3 +- table/mock_table.cc | 3 +- table/mock_table.h | 3 +- table/plain_table_factory.cc | 3 +- table/plain_table_factory.h | 3 +- utilities/options/options_util_test.cc | 3 +- 17 files changed, 80 insertions(+), 18 deletions(-) diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index 2c1b64078d1..ece95d0022f 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -689,6 +689,45 @@ TEST_F(ExternalSSTFileBasicTest, FadviseTrigger) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +TEST_F(ExternalSSTFileBasicTest, IngestRangeDeletionTombstoneWithGlobalSeqno) { + for (int i = 5; i < 25; i++) { + ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), Key(i) + "_val")); + } + + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + Reopen(options); + SstFileWriter sst_file_writer(EnvOptions(), options); + + // file.sst (delete 0 => 30) + std::string file = sst_files_dir_ + "file.sst"; + ASSERT_OK(sst_file_writer.Open(file)); + ASSERT_OK(sst_file_writer.DeleteRange(Key(0), Key(30))); + ExternalSstFileInfo file_info; + Status s = sst_file_writer.Finish(&file_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file_info.file_path, file); + ASSERT_EQ(file_info.num_entries, 0); + ASSERT_EQ(file_info.smallest_key, ""); + ASSERT_EQ(file_info.largest_key, ""); + ASSERT_EQ(file_info.num_range_del_entries, 1); + ASSERT_EQ(file_info.smallest_range_del_key, Key(0)); + ASSERT_EQ(file_info.largest_range_del_key, Key(30)); + + IngestExternalFileOptions ifo; + ifo.move_files = true; + ifo.snapshot_consistency = true; + ifo.allow_global_seqno = true; + ifo.write_global_seqno = true; + ifo.verify_checksums_before_ingest = true; + s = db_->IngestExternalFile({file}, ifo); + + ASSERT_OK(s); + for (int i = 5; i < 25; i++) { + ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); + } +} + TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) { int kNumLevels = 7; Options options = CurrentOptions(); diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 3b52e0a5379..e12b3195af0 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -305,7 +305,9 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( TableReaderOptions(*cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor.get(), env_options_, cfd_->internal_comparator()), - std::move(sst_file_reader), file_to_ingest->file_size, &table_reader); + std::move(sst_file_reader), file_to_ingest->file_size, &table_reader, + false /* prefetch_index_and_filter_in_cache */, + false /* fill_cache_with_range_del_blocks */); if (!status.ok()) { return status; } diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index 2dd0cff0b41..64f09dcc276 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -338,7 +338,8 @@ class TestPlainTableFactory : public PlainTableFactory { const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, - bool /*prefetch_index_and_filter_in_cache*/) const override { + bool /*prefetch_index_and_filter_in_cache*/, + bool /*fill_cache_with_range_del_blocks*/) const override { TableProperties* props = nullptr; auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 88fcc78ed8c..bf44cd3a43a 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -504,7 +504,8 @@ class TableFactory { const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, - bool prefetch_index_and_filter_in_cache = true) const = 0; + bool prefetch_index_and_filter_in_cache = true, + bool fill_cache_with_range_del_blocks = true) const = 0; // Return a table builder to write to a file for this table type. // diff --git a/table/adaptive_table_factory.cc b/table/adaptive_table_factory.cc index d5dcbc5f585..a9d25b5bafb 100644 --- a/table/adaptive_table_factory.cc +++ b/table/adaptive_table_factory.cc @@ -45,7 +45,8 @@ Status AdaptiveTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, - bool /*prefetch_index_and_filter_in_cache*/) const { + bool /*prefetch_index_and_filter_in_cache*/, + bool /*fill_cache_with_range_del_blocks*/) const { Footer footer; auto s = ReadFooterFromFile(file.get(), nullptr /* prefetch_buffer */, file_size, &footer); diff --git a/table/adaptive_table_factory.h b/table/adaptive_table_factory.h index 9d606362e1d..85c3a3f6517 100644 --- a/table/adaptive_table_factory.h +++ b/table/adaptive_table_factory.h @@ -37,7 +37,8 @@ class AdaptiveTableFactory : public TableFactory { const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, - bool prefetch_index_and_filter_in_cache = true) const override; + bool prefetch_index_and_filter_in_cache = true, + bool fill_cache_with_range_del_blocks = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 47fe8e1b0e3..8dc9e34eaab 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -196,12 +196,15 @@ Status BlockBasedTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, - bool prefetch_index_and_filter_in_cache) const { + bool prefetch_index_and_filter_in_cache, + bool fill_cache_with_range_del_blocks) const { return BlockBasedTable::Open( table_reader_options.ioptions, table_reader_options.env_options, table_options_, table_reader_options.internal_comparator, std::move(file), file_size, table_reader, table_reader_options.prefix_extractor, - prefetch_index_and_filter_in_cache, table_reader_options.skip_filters, + prefetch_index_and_filter_in_cache, + fill_cache_with_range_del_blocks, + table_reader_options.skip_filters, table_reader_options.level, table_reader_options.immortal, table_reader_options.largest_seqno, &tail_prefetch_stats_); } diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index 83676e9b9c0..272e4186cd8 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -54,7 +54,8 @@ class BlockBasedTableFactory : public TableFactory { const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, - bool prefetch_index_and_filter_in_cache = true) const override; + bool prefetch_index_and_filter_in_cache = true, + bool fill_cache_with_range_del_blocks = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index d6c9ab88796..cc76309dea8 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -777,6 +777,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, std::unique_ptr* table_reader, const SliceTransform* prefix_extractor, const bool prefetch_index_and_filter_in_cache, + const bool fill_cache_with_range_del_blocks, const bool skip_filters, const int level, const bool immortal_table, const SequenceNumber largest_seqno, @@ -857,7 +858,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, return s; } s = ReadRangeDelBlock(rep, prefetch_buffer.get(), meta_iter.get(), - internal_comparator); + internal_comparator, + fill_cache_with_range_del_blocks); if (!s.ok()) { return s; } @@ -1065,7 +1067,8 @@ Status BlockBasedTable::ReadPropertiesBlock( Status BlockBasedTable::ReadRangeDelBlock( Rep* rep, FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, - const InternalKeyComparator& internal_comparator) { + const InternalKeyComparator& internal_comparator, + const bool fill_cache_with_range_del_blocks) { Status s; bool found_range_del_block; BlockHandle range_del_handle; @@ -1077,6 +1080,7 @@ Status BlockBasedTable::ReadRangeDelBlock( s.ToString().c_str()); } else if (found_range_del_block && !range_del_handle.IsNull()) { ReadOptions read_options; + read_options.fill_cache = fill_cache_with_range_del_blocks; std::unique_ptr iter(NewDataBlockIterator( rep, read_options, range_del_handle, nullptr /* input_iter */, false /* is_index */, true /* key_includes_seq */, diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 1fcc8cbfa07..8e0de15f032 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -95,6 +95,7 @@ class BlockBasedTable : public TableReader { std::unique_ptr* table_reader, const SliceTransform* prefix_extractor = nullptr, bool prefetch_index_and_filter_in_cache = true, + bool fill_cache_with_range_del_blocks = true, bool skip_filters = false, int level = -1, const bool immortal_table = false, const SequenceNumber largest_seqno = 0, @@ -385,7 +386,8 @@ class BlockBasedTable : public TableReader { static Status ReadRangeDelBlock( Rep* rep, FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, - const InternalKeyComparator& internal_comparator); + const InternalKeyComparator& internal_comparator, + bool fill_cache_with_range_del_blocks); static Status ReadCompressionDictBlock( Rep* rep, FilePrefetchBuffer* prefetch_buffer, std::unique_ptr* compression_dict_block); diff --git a/table/cuckoo_table_factory.cc b/table/cuckoo_table_factory.cc index 74d18d51213..2674ccc32eb 100644 --- a/table/cuckoo_table_factory.cc +++ b/table/cuckoo_table_factory.cc @@ -16,7 +16,8 @@ Status CuckooTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, - bool /*prefetch_index_and_filter_in_cache*/) const { + bool /*prefetch_index_and_filter_in_cache*/, + bool /*fill_cache_with_range_del_blocks*/) const { std::unique_ptr new_reader(new CuckooTableReader( table_reader_options.ioptions, std::move(file), file_size, table_reader_options.internal_comparator.user_comparator(), nullptr)); diff --git a/table/cuckoo_table_factory.h b/table/cuckoo_table_factory.h index eb3c5e51768..52c45700a84 100644 --- a/table/cuckoo_table_factory.h +++ b/table/cuckoo_table_factory.h @@ -62,7 +62,8 @@ class CuckooTableFactory : public TableFactory { const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, - bool prefetch_index_and_filter_in_cache = true) const override; + bool prefetch_index_and_filter_in_cache = true, + bool fill_cache_with_range_del_blocks = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, diff --git a/table/mock_table.cc b/table/mock_table.cc index 9b250604803..80de8627a1a 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -68,7 +68,8 @@ Status MockTableFactory::NewTableReader( const TableReaderOptions& /*table_reader_options*/, std::unique_ptr&& file, uint64_t /*file_size*/, std::unique_ptr* table_reader, - bool /*prefetch_index_and_filter_in_cache*/) const { + bool /*prefetch_index_and_filter_in_cache*/, + bool /*fill_cache_with_range_del_blocks*/) const { uint32_t id = GetIDFromFile(file.get()); MutexLock lock_guard(&file_system_.mutex); diff --git a/table/mock_table.h b/table/mock_table.h index 5bca14644d8..eaedefdaf80 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -161,7 +161,8 @@ class MockTableFactory : public TableFactory { const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, - bool prefetch_index_and_filter_in_cache = true) const override; + bool prefetch_index_and_filter_in_cache = true, + bool fill_cache_with_range_del_blocks = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_familly_id, WritableFileWriter* file) const override; diff --git a/table/plain_table_factory.cc b/table/plain_table_factory.cc index 0dccec55242..9144863d8ab 100644 --- a/table/plain_table_factory.cc +++ b/table/plain_table_factory.cc @@ -22,7 +22,8 @@ Status PlainTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, - bool /*prefetch_index_and_filter_in_cache*/) const { + bool /*prefetch_index_and_filter_in_cache*/, + bool /*fill_cache_with_range_del_blocks*/) const { return PlainTableReader::Open( table_reader_options.ioptions, table_reader_options.env_options, table_reader_options.internal_comparator, std::move(file), file_size, diff --git a/table/plain_table_factory.h b/table/plain_table_factory.h index dade1566096..36c670571ab 100644 --- a/table/plain_table_factory.h +++ b/table/plain_table_factory.h @@ -151,7 +151,8 @@ class PlainTableFactory : public TableFactory { Status NewTableReader(const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, - bool prefetch_index_and_filter_in_cache) const override; + bool prefetch_index_and_filter_in_cache, + bool fill_cache_with_range_del_blocks) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, diff --git a/utilities/options/options_util_test.cc b/utilities/options/options_util_test.cc index ed7bfdfd6f7..78f37685967 100644 --- a/utilities/options/options_util_test.cc +++ b/utilities/options/options_util_test.cc @@ -152,7 +152,8 @@ class DummyTableFactory : public TableFactory { const TableReaderOptions& /*table_reader_options*/, std::unique_ptr&& /*file*/, uint64_t /*file_size*/, std::unique_ptr* /*table_reader*/, - bool /*prefetch_index_and_filter_in_cache*/) const override { + bool /*prefetch_index_and_filter_in_cache*/, + bool /*fill_cache_with_range_del_blocks*/) const override { return Status::NotSupported(); }