Skip to content

Commit

Permalink
container: use btree_map for interval_set
Browse files Browse the repository at this point in the history
Signed-off-by: Noah Watkins <noahwatkins@gmail.com>
  • Loading branch information
dotnwat committed Jun 11, 2024
1 parent ae2046d commit db9b4bc
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 62 deletions.
61 changes: 26 additions & 35 deletions src/v/container/include/container/interval_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
*/
#pragma once

#include <absl/container/btree_set.h>
#include <absl/container/btree_map.h>

/**
* A container that contains non-empty, open intervals.
Expand All @@ -29,26 +29,9 @@
*/
template<std::integral T>
class interval_set {
struct key {
// Inclusive.
T start;

// Exclusive.
// It's important that this not be used in any comparisons.
mutable T end;
};
struct compare {
using is_transparent = void;

bool operator()(const key& a, const key& b) const {
return a.start < b.start;
}

bool operator()(const T& a, const key& b) const { return a < b.start; }
bool operator()(const key& a, const T& b) const { return a.start < b; }
bool operator()(const T& a, const T& b) const { return a < b; }
};
using set_t = absl::btree_set<key, compare>;
// Key = interval start (inclusive)
// Value = interval end (exclusive)
using set_t = absl::btree_map<T, T>;

public:
using const_iterator = set_t::const_iterator;
Expand Down Expand Up @@ -104,6 +87,12 @@ class interval_set {
*/
[[nodiscard]] size_t size() const;

/**
* Convenience wrappers.
*/
static auto to_start(const_iterator it) { return it->first; }
static auto to_end(const_iterator it) { return it->second; }

private:
/**
* Extend the interval being pointed at with any intervals that overlap
Expand All @@ -114,20 +103,22 @@ class interval_set {
std::pair<const_iterator, bool> merge_right(const_iterator start_it);

set_t set_;

static auto& to_end(iterator it) { return it->second; }
};

template<std::integral T>
std::pair<typename interval_set<T>::const_iterator, bool>
interval_set<T>::merge_right(const_iterator start_it) {
auto start = start_it->start;
auto merged_end = start_it->end;
auto start = to_start(start_it);
auto merged_end = to_end(start_it);
auto next_it = std::next(start_it);
auto merge_end_it = next_it;

// Seek forward as long as the next interval if it overlaps with our merged
// interval. NOTE: <= because these are open intervals.
while (merge_end_it != set_.end() && merge_end_it->start <= merged_end) {
merged_end = std::max(merge_end_it->end, merged_end);
while (merge_end_it != set_.end() && to_start(merge_end_it) <= merged_end) {
merged_end = std::max(to_end(merge_end_it), merged_end);
merge_end_it = std::next(merge_end_it);
}
if (merge_end_it == next_it) {
Expand All @@ -137,7 +128,7 @@ interval_set<T>::merge_right(const_iterator start_it) {
// Replace our initial iterator and subsequent intervals with a merged
// version.
set_.erase(start_it, merge_end_it);
return set_.emplace(key{start, merged_end});
return set_.emplace(start, merged_end);
}

template<std::integral T>
Expand All @@ -151,7 +142,7 @@ interval_set<T>::insert(interval interval) {
const auto input_start = interval.start;
const auto input_end = input_start + length;
if (set_.empty()) {
return set_.emplace(key{input_start, input_end});
return set_.emplace(input_start, input_end);
}

auto it = set_.lower_bound(input_start);
Expand All @@ -163,8 +154,8 @@ interval_set<T>::insert(interval interval) {
// [ ) case 2
// In either case, just merge the expand the bounds of the existing
// iterator.
if (it != set_.end() && input_start == it->start) {
it->end = std::max(input_end, it->end);
if (it != set_.end() && input_start == to_start(it)) {
to_end(it) = std::max(input_end, to_end(it));
return merge_right(it);
}

Expand All @@ -179,14 +170,14 @@ interval_set<T>::insert(interval interval) {
// with it.
if (it != set_.begin()) {
auto prev = std::prev(it);
if (prev->end >= input_start) {
prev->end = std::max(input_end, prev->end);
if (to_end(prev) >= input_start) {
to_end(prev) = std::max(input_end, to_end(prev));
return merge_right(prev);
}
// Intentional fallthrough.
}
// Case 2: there's no overlap to the left. Just insert and merge forward.
auto ret = set_.emplace(key{input_start, input_end});
auto ret = set_.emplace(input_start, input_end);
return merge_right(ret.first);
}

Expand All @@ -199,7 +190,7 @@ interval_set<T>::const_iterator interval_set<T>::find(T index) const {
}
it = std::prev(it);

} else if (it->start == index) {
} else if (to_start(it) == index) {
return it;
} else if (it == set_.cbegin()) {
// Equality condition failing before this means that the index is
Expand All @@ -209,8 +200,8 @@ interval_set<T>::const_iterator interval_set<T>::find(T index) const {
--it;
}

assert(it->start < index);
if (index < it->end) {
assert(to_start(it) < index);
if (index < to_end(it)) {
return it;
}

Expand Down
44 changes: 22 additions & 22 deletions src/v/container/tests/interval_set_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ namespace {
void check_no_overlap(const set_t& s) {
std::optional<uint64_t> prev_last;
for (const auto& interval : s) {
auto start = interval.start;
auto end = interval.end;
auto start = interval.first;
auto end = interval.second;
EXPECT_GT(end, start);
if (prev_last) {
EXPECT_GT(start, prev_last.value());
Expand Down Expand Up @@ -63,8 +63,8 @@ TEST(IntervalSet, InsertMergeOverlappingIntervals) {
// [i, i + 10)
const auto ret1 = set.insert({0, 10});
EXPECT_TRUE(ret1.second);
EXPECT_EQ(ret1.first->start, 0);
EXPECT_EQ(ret1.first->end, 10);
EXPECT_EQ(ret1.first->first, 0);
EXPECT_EQ(ret1.first->second, 10);

// Insertion of a sub-interval results in the first interval being
// extended. Note that because of this, the iterators are safe (i.e.
Expand All @@ -73,8 +73,8 @@ TEST(IntervalSet, InsertMergeOverlappingIntervals) {
auto expected_end = 10 + i;
EXPECT_TRUE(ret2.second);
EXPECT_EQ(ret1, ret2);
EXPECT_EQ(ret1.first->start, 0);
EXPECT_EQ(ret1.first->end, expected_end);
EXPECT_EQ(ret1.first->first, 0);
EXPECT_EQ(ret1.first->second, expected_end);

// Confirm we can still find the expanded intervals.
const auto found_iter_first = set.find(0);
Expand All @@ -99,8 +99,8 @@ TEST(IntervalSet, InsertMergesAdjacentIntervals) {

// Since the intervals were exactly adjacent, they should be merged.
EXPECT_EQ(ret1, ret2);
EXPECT_EQ(ret1.first->start, 0);
EXPECT_EQ(ret1.first->end, 20);
EXPECT_EQ(ret1.first->first, 0);
EXPECT_EQ(ret1.first->second, 20);
check_no_overlap(set);
}

Expand Down Expand Up @@ -139,8 +139,8 @@ TEST(IntervalSet, InsertOverlapFront) {
EXPECT_TRUE(ret3.second);
EXPECT_EQ(ret1, ret2);
EXPECT_EQ(ret1, ret3);
EXPECT_EQ(ret1.first->start, 0);
EXPECT_EQ(ret1.first->end, 100);
EXPECT_EQ(ret1.first->first, 0);
EXPECT_EQ(ret1.first->second, 100);
EXPECT_EQ(1, set.size());

check_no_overlap(set);
Expand All @@ -150,8 +150,8 @@ TEST(IntervalSet, InsertOverlapCompletely) {
set_t set;
EXPECT_TRUE(set.insert({10, 20}).second);
EXPECT_TRUE(set.insert({0, 100}).second);
EXPECT_EQ(set.begin()->start, 0);
EXPECT_EQ(set.begin()->end, 100);
EXPECT_EQ(set.begin()->first, 0);
EXPECT_EQ(set.begin()->second, 100);
check_no_overlap(set);
}

Expand Down Expand Up @@ -239,16 +239,16 @@ TEST(IntervalSet, EraseBeginToEnd) {
EXPECT_TRUE(set.insert({20, 10}).second);
EXPECT_TRUE(set.insert({40, 10}).second);
EXPECT_EQ(3, set.size());
EXPECT_TRUE(set.begin()->start == 0);
EXPECT_TRUE(set.begin()->end == 10);
EXPECT_TRUE(set.begin()->first == 0);
EXPECT_TRUE(set.begin()->second == 10);

set.erase(set.begin());
EXPECT_EQ(set.begin()->start, 20);
EXPECT_EQ(set.begin()->end, 30);
EXPECT_EQ(set.begin()->first, 20);
EXPECT_EQ(set.begin()->second, 30);

set.erase(set.begin());
EXPECT_EQ(set.begin()->start, 40);
EXPECT_EQ(set.begin()->end, 50);
EXPECT_EQ(set.begin()->first, 40);
EXPECT_EQ(set.begin()->second, 50);

set.erase(set.begin());
EXPECT_EQ(set.begin(), set.end());
Expand Down Expand Up @@ -306,11 +306,11 @@ TEST_P(RandomizedIntervalSetTest, RandomInsertsSequentialErase) {
while (!filled_intervals.empty()) {
auto it = filled_intervals.begin();
if (highest_so_far) {
EXPECT_GT(it->start, *highest_so_far);
EXPECT_GT(it->first, *highest_so_far);
}
highest_so_far = it->end;
interval_filled_size += it->end - it->start;
for (auto i = it->start; i < it->end; i++) {
highest_so_far = it->second;
interval_filled_size += it->second - it->first;
for (auto i = it->first; i < it->second; i++) {
EXPECT_EQ(buf[i], 'X');
}
filled_intervals.erase(it);
Expand Down
20 changes: 15 additions & 5 deletions src/v/storage/mvlog/segment_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ segment_reader::segment_reader(
}
segment_reader::~segment_reader() { --segment_->num_readers_; }

namespace {
auto to_start(interval_set<size_t>::const_iterator it) {
return interval_set<size_t>::to_start(it);
}

auto to_end(interval_set<size_t>::const_iterator it) {
return interval_set<size_t>::to_end(it);
}
} // namespace

skipping_data_source::read_list_t
segment_reader::make_read_intervals(size_t start_pos, size_t length) const {
auto gap_it = gaps_.begin();
Expand All @@ -37,22 +47,22 @@ segment_reader::make_read_intervals(size_t start_pos, size_t length) const {
// intervals to read, and skipping any that are entirely below the start
// position.
while (gap_it != gaps_.end() && cur_iter_pos <= max_pos) {
const auto next_gap_max = gap_it->end - 1;
const auto next_gap_max = to_end(gap_it) - 1;
if (cur_iter_pos > next_gap_max) {
// We are ahead of the next gap. Drop it from the list to consider.
++gap_it;
continue;
}
if (cur_iter_pos >= gap_it->start) {
if (cur_iter_pos >= to_start(gap_it)) {
// We are in the middle of a gap. Skip to just past the end.
// NOTE: the gap end is exclusive.
cur_iter_pos = gap_it->end;
cur_iter_pos = to_end(gap_it);
++gap_it;
continue;
}
// The next gap is ahead of us. Read up to the start of it and skip
// over the gap.
const auto read_max_pos = std::min(max_pos, gap_it->start - 1);
const auto read_max_pos = std::min(max_pos, to_start(gap_it) - 1);
const auto read_length = read_max_pos - cur_iter_pos + 1;
read_intervals.emplace_back(cur_iter_pos, read_length);
vlog(
Expand All @@ -62,7 +72,7 @@ segment_reader::make_read_intervals(size_t start_pos, size_t length) const {
read_max_pos + 1,
start_pos,
max_pos + 1);
cur_iter_pos = gap_it->end;
cur_iter_pos = to_end(gap_it);
++gap_it;
}
// No more gaps, read the rest of the range.
Expand Down

0 comments on commit db9b4bc

Please sign in to comment.