Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[enhancement](be-ut)Add more indexed column reader be unit test #25652

Merged
merged 6 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions be/src/olap/primary_key_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class PrimaryKeyIndexBuilder {

uint64_t disk_size() const { return _disk_size; }

// used for be ut
uint32_t data_page_num() const { return _primary_key_index_builder->data_page_num(); }

Slice min_key() { return Slice(_min_key.data(), _min_key.size() - _seq_col_length); }
Slice max_key() { return Slice(_max_key.data(), _max_key.size() - _seq_col_length); }

Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/rowset/segment_v2/indexed_column_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <stdint.h>

#include <cstddef>
#include <cstdint>
#include <memory>

#include "common/status.h"
Expand Down Expand Up @@ -85,6 +86,8 @@ class IndexedColumnWriter {

uint64_t disk_size() const { return _disk_size; }

uint32_t data_page_num() const { return _num_data_pages + 1; }

private:
Status _finish_current_data_page(size_t& num_val);

Expand Down
153 changes: 153 additions & 0 deletions be/test/olap/primary_key_index_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,157 @@ TEST_F(PrimaryKeyIndexTest, builder) {
}
}

TEST_F(PrimaryKeyIndexTest, multiple_pages) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: all parameters should be named in a function [readability-named-parameter]

Suggested change
TEST_F(PrimaryKeyIndexTest, multiple_pages) {
TEST_F(PrimaryKeyIndexTest /*unused*/, multiple_pages /*unused*/) {

std::string filename = kTestDir + "/multiple_pages";
io::FileWriterPtr file_writer;
auto fs = io::global_local_filesystem();
EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());

config::primary_key_data_page_size = 5 * 5;
PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
static_cast<void>(builder.init());
size_t num_rows = 0;
std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
"00010", "00012", "00014", "00016", "00018"};
for (const std::string& key : keys) {
static_cast<void>(builder.add_item(key));
num_rows++;
}
EXPECT_EQ("00000", builder.min_key().to_string());
EXPECT_EQ("00018", builder.max_key().to_string());
EXPECT_EQ(builder.size(), 2 * 5 * 5);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 5 is a magic number; consider replacing it with a named constant [readability-magic-numbers]

    EXPECT_EQ(builder.size(), 2 * 5 * 5);
                                  ^

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 5 is a magic number; consider replacing it with a named constant [readability-magic-numbers]

    EXPECT_EQ(builder.size(), 2 * 5 * 5);
                                      ^

EXPECT_GT(builder.data_page_num(), 1);
segment_v2::PrimaryKeyIndexMetaPB index_meta;
EXPECT_TRUE(builder.finalize(&index_meta));
EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
EXPECT_TRUE(file_writer->close().ok());
EXPECT_EQ(num_rows, builder.num_rows());

PrimaryKeyIndexReader index_reader;
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
EXPECT_EQ(num_rows, index_reader.num_rows());

std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
bool exact_match = false;
uint32_t row_id;
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_TRUE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_to_ordinal(i);
EXPECT_TRUE(status.ok());
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}
{
auto status = index_iterator->seek_to_ordinal(10);
EXPECT_TRUE(status.ok());
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(10, row_id);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 10 is a magic number; consider replacing it with a named constant [readability-magic-numbers]

        EXPECT_EQ(10, row_id);
                  ^

}

std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009",
"00011", "00013", "00015", "00017"};
for (size_t i = 0; i < non_exist_keys.size(); i++) {
Slice slice(non_exist_keys[i]);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_FALSE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i + 1, row_id);
}
{
string key("00019");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_FALSE(exact_match);
EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
}
}

TEST_F(PrimaryKeyIndexTest, single_page) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: all parameters should be named in a function [readability-named-parameter]

Suggested change
TEST_F(PrimaryKeyIndexTest, single_page) {
TEST_F(PrimaryKeyIndexTest /*unused*/, single_page /*unused*/) {

std::string filename = kTestDir + "/single_page";
io::FileWriterPtr file_writer;
auto fs = io::global_local_filesystem();
EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
config::primary_key_data_page_size = 32768;

PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
static_cast<void>(builder.init());
size_t num_rows = 0;
std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
"00010", "00012", "00014", "00016", "00018"};
for (const std::string& key : keys) {
static_cast<void>(builder.add_item(key));
num_rows++;
}
EXPECT_EQ("00000", builder.min_key().to_string());
EXPECT_EQ("00018", builder.max_key().to_string());
EXPECT_EQ(builder.size(), 2 * 5 * 5);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 5 is a magic number; consider replacing it with a named constant [readability-magic-numbers]

    EXPECT_EQ(builder.size(), 2 * 5 * 5);
                                  ^

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 5 is a magic number; consider replacing it with a named constant [readability-magic-numbers]

    EXPECT_EQ(builder.size(), 2 * 5 * 5);
                                      ^

EXPECT_EQ(builder.data_page_num(), 1);
segment_v2::PrimaryKeyIndexMetaPB index_meta;
EXPECT_TRUE(builder.finalize(&index_meta));
EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
EXPECT_TRUE(file_writer->close().ok());
EXPECT_EQ(num_rows, builder.num_rows());

PrimaryKeyIndexReader index_reader;
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
EXPECT_EQ(num_rows, index_reader.num_rows());

std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
bool exact_match = false;
uint32_t row_id;
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_TRUE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}

std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009",
"00011", "00013", "00015", "00017"};
for (size_t i = 0; i < non_exist_keys.size(); i++) {
Slice slice(non_exist_keys[i]);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_FALSE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i + 1, row_id);
}
{
string key("00019");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_FALSE(exact_match);
EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
}
}
} // namespace doris