Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent overlapping #175

Merged
merged 2 commits into from
Dec 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion librocksdb_sys/rocksdb/db/compaction_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,7 @@ Status CompactionJob::FinishCompactionOutputFile(
auto meta = &sub_compact->current_output()->meta;
if (s.ok()) {
Slice lower_bound_guard, upper_bound_guard;
std::string smallest_user_key;
const Slice *lower_bound, *upper_bound;
if (sub_compact->outputs.size() == 1) {
// For the first output table, include range tombstones before the min key
Expand All @@ -1049,7 +1050,8 @@ Status CompactionJob::FinishCompactionOutputFile(
// For subsequent output tables, only include range tombstones from min
// key onwards since the previous file was extended to contain range
// tombstones falling before min key.
lower_bound_guard = meta->smallest.user_key();
smallest_user_key = meta->smallest.user_key().ToString(false /*hex*/);
lower_bound_guard = Slice(smallest_user_key);
lower_bound = &lower_bound_guard;
} else {
lower_bound = nullptr;
Expand Down
54 changes: 54 additions & 0 deletions librocksdb_sys/rocksdb/db/db_compaction_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1439,6 +1439,60 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
ASSERT_GT(old_num_files, new_num_files);
}

TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
// regression test for #2833: groups of files whose user-keys overlap at the
// endpoints could be split by `DeleteFilesInRange`. This caused old data to
// reappear, either because a new version of the key was removed, or a range
// deletion was partially dropped. It could also cause non-overlapping
// invariant to be violated if the files dropped by DeleteFilesInRange were
// a subset of files that a range deletion spans.
const int kNumL0Files = 2;
const int kValSize = 8 << 10; // 8KB
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = kNumL0Files;
options.target_file_size_base = 1 << 10; // 1KB
DestroyAndReopen(options);

// The snapshot prevents key 1 from having its old version dropped. The low
// `target_file_size_base` ensures two keys will be in each output file.
const Snapshot* snapshot = nullptr;
Random rnd(301);
// The value indicates which flush the key belonged to, which is enough
// for us to determine the keys' relative ages. After L0 flushes finish,
// files look like:
//
// File 0: 0 -> vals[0], 1 -> vals[0]
// File 1: 1 -> vals[1], 2 -> vals[1]
//
// Then L0->L1 compaction happens, which outputs keys as follows:
//
// File 0: 0 -> vals[0], 1 -> vals[1]
// File 1: 1 -> vals[0], 2 -> vals[1]
//
// DeleteFilesInRange shouldn't be allowed to drop just file 0, as that
// would cause `1 -> vals[0]` (an older key) to reappear.
std::string vals[kNumL0Files];
for (int i = 0; i < kNumL0Files; ++i) {
vals[i] = RandomString(&rnd, kValSize);
Put(Key(i), vals[i]);
Put(Key(i + 1), vals[i]);
Flush();
if (i == 0) {
snapshot = db_->GetSnapshot();
}
}
dbfull()->TEST_WaitForCompact();

// Verify `DeleteFilesInRange` can't drop only file 0 which would cause
// "1 -> vals[0]" to reappear.
Slice begin = Key(0);
Slice end = Key(1);
ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end));
ASSERT_EQ(vals[1], Get(Key(1)));

db_->ReleaseSnapshot(snapshot);
}

TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) {
int32_t trivial_move = 0;
int32_t non_trivial_move = 0;
Expand Down
24 changes: 9 additions & 15 deletions librocksdb_sys/rocksdb/db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2062,25 +2062,19 @@ Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family,
end_key = &end_storage;
}

vstorage->GetOverlappingInputs(i, begin_key, end_key, &level_files, -1,
nullptr, false);
vstorage->GetCleanInputsWithinInterval(i, begin_key, end_key,
&level_files, -1 /* hint_index */,
nullptr /* file_index */);
FileMetaData* level_file;
for (uint32_t j = 0; j < level_files.size(); j++) {
level_file = level_files[j];
if (((begin == nullptr) ||
(cfd->internal_comparator().user_comparator()->Compare(
level_file->smallest.user_key(), *begin) >= 0)) &&
((end == nullptr) ||
(cfd->internal_comparator().user_comparator()->Compare(
level_file->largest.user_key(), *end) <= 0))) {
if (level_file->being_compacted) {
continue;
}
edit.SetColumnFamily(cfd->GetID());
edit.DeleteFile(i, level_file->fd.GetNumber());
deleted_files.push_back(level_file);
level_file->being_compacted = true;
if (level_file->being_compacted) {
continue;
}
edit.SetColumnFamily(cfd->GetID());
edit.DeleteFile(i, level_file->fd.GetNumber());
deleted_files.push_back(level_file);
level_file->being_compacted = true;
}
}
if (edit.GetDeletedFiles().empty()) {
Expand Down
32 changes: 19 additions & 13 deletions librocksdb_sys/rocksdb/db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1777,27 +1777,33 @@ void VersionStorageInfo::GetOverlappingInputs(
void VersionStorageInfo::GetCleanInputsWithinInterval(
int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index) const {
if (level >= num_non_empty_levels_) {
inputs->clear();
if (file_index) {
*file_index = -1;
}
if (level >= num_non_empty_levels_ || level == 0 ||
level_files_brief_[level].num_files == 0) {
// this level is empty, no inputs within range
// also don't support clean input interval within L0
return;
}

inputs->clear();
Slice user_begin, user_end;
if (begin != nullptr) {
const auto& level_files = level_files_brief_[level];
if (begin == nullptr) {
user_begin = ExtractUserKey(level_files.files[0].smallest_key);
} else {
user_begin = begin->user_key();
}
if (end != nullptr) {
if (end == nullptr) {
user_end = ExtractUserKey(
level_files.files[level_files.num_files - 1].largest_key);
} else {
user_end = end->user_key();
}
if (file_index) {
*file_index = -1;
}
if (begin != nullptr && end != nullptr && level > 0) {
GetOverlappingInputsRangeBinarySearch(level, user_begin, user_end, inputs,
hint_index, file_index,
true /* within_interval */);
}
GetOverlappingInputsRangeBinarySearch(level, user_begin, user_end, inputs,
hint_index, file_index,
true /* within_interval */);
}

// Store in "*inputs" all files in "level" that overlap [begin,end]
Expand Down Expand Up @@ -1860,8 +1866,8 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
} else {
ExtendFileRangeOverlappingInterval(level, user_begin, user_end, mid,
&start_index, &end_index);
assert(end_index >= start_index);
}
assert(end_index >= start_index);
// insert overlapping files into vector
for (int i = start_index; i <= end_index; i++) {
inputs->push_back(files_[level][i]);
Expand Down
3 changes: 2 additions & 1 deletion librocksdb_sys/rocksdb/include/rocksdb/convenience.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,8 @@ void CancelAllBackgroundWork(DB* db, bool wait = false);

// Delete files which are entirely in the given range
// Could leave some keys in the range which are in files which are not
// entirely in the range.
// entirely in the range. Also leaves L0 files regardless of whether they're
// in the range.
// Snapshots before the delete might not see the data in the given range.
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
const Slice* begin, const Slice* end);
Expand Down