From a228d338919582e88925bb0a3a45722e588d22db Mon Sep 17 00:00:00 2001 From: airborne12 Date: Wed, 13 Nov 2024 18:14:29 +0800 Subject: [PATCH] refine --- be/src/olap/compaction.cpp | 6 +- .../segment_v2/inverted_index_compaction.cpp | 7 - .../segment_v2/inverted_index_file_writer.cpp | 87 +++------ .../segment_v2/inverted_index_file_writer.h | 18 +- .../segment_v2/inverted_index_writer.cpp | 4 +- .../inverted_index_file_writer_test.cpp | 167 +++++++++++------- 6 files changed, 155 insertions(+), 134 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index a40e28669e90cc7..aba51e33d7055f7 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -666,9 +666,11 @@ Status Compaction::do_inverted_index_compaction() { DORIS_TRY(inverted_index_file_readers[src_segment_id]->open(index_meta)); } for (int dest_segment_id = 0; dest_segment_id < dest_segment_num; dest_segment_id++) { - auto* dest_dir = + auto dest_dir = DORIS_TRY(inverted_index_file_writers[dest_segment_id]->open(index_meta)); - dest_index_dirs[dest_segment_id] = dest_dir; + // Destination directories in dest_index_dirs do not need to be deconstructed, + // but their lifecycle must be managed by inverted_index_file_writers. + dest_index_dirs[dest_segment_id] = dest_dir.get(); } auto st = compact_column(index_meta->index_id(), src_idx_dirs, dest_index_dirs, index_tmp_path.native(), trans_vec, dest_segment_num_rows); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp index 88a8f2417228bc3..f988c46c027c268 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp @@ -76,13 +76,6 @@ Status compact_column(int64_t index_id, // when index_writer is destroyed, if closeDir is set, dir will be close // _CLDECDELETE(dir) will try to ref_cnt--, when it decreases to 1, dir will be destroyed. _CLDECDELETE(dir) - for (auto* d : dest_index_dirs) { - if (d != nullptr) { - // NOTE: DO NOT close dest dir here, because it will be closed when dest index writer finalize. - //d->close(); - //_CLDELETE(d); - } - } // delete temporary segment_path, only when inverted_index_ram_dir_enable is false if (!config::inverted_index_ram_dir_enable) { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp index f180bb4841848cd..cf3eafc33993095 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp @@ -19,17 +19,14 @@ #include +#include #include #include "common/status.h" -#include "io/fs/file_writer.h" -#include "io/fs/local_file_system.h" -#include "olap/rowset/segment_v2/inverted_index_cache.h" #include "olap/rowset/segment_v2/inverted_index_desc.h" #include "olap/rowset/segment_v2/inverted_index_fs_directory.h" #include "olap/rowset/segment_v2/inverted_index_reader.h" #include "olap/tablet_schema.h" -#include "runtime/exec_env.h" namespace doris::segment_v2 { @@ -38,32 +35,11 @@ Status InvertedIndexFileWriter::initialize(InvertedIndexDirectoryMap& indices_di return Status::OK(); } -Result InvertedIndexFileWriter::open(const TabletIndex* index_meta) { - auto tmp_file_dir = ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir(); - const auto& local_fs = io::global_local_filesystem(); - auto local_fs_index_path = InvertedIndexDescriptor::get_temporary_index_path( - tmp_file_dir.native(), _rowset_id, _seg_id, index_meta->index_id(), - index_meta->get_index_suffix()); - bool exists = false; - auto st = local_fs->exists(local_fs_index_path, &exists); - DBUG_EXECUTE_IF("InvertedIndexFileWriter::open_local_fs_exists_error", - { st = Status::Error("debug point: no such file error"); }) - if (!st.ok()) { - LOG(ERROR) << "index_path:" << local_fs_index_path << " exists error:" << st; - return ResultError(st); - } - DBUG_EXECUTE_IF("InvertedIndexFileWriter::open_local_fs_exists_true", { exists = true; }) - if (exists) { - LOG(ERROR) << "try to init a directory:" << local_fs_index_path << " already exists"; - return ResultError( - Status::InternalError("InvertedIndexFileWriter::open directory already exists")); - } - - bool can_use_ram_dir = true; - auto* dir = DorisFSDirectoryFactory::getDirectory(local_fs, local_fs_index_path.c_str(), - can_use_ram_dir); - auto key = std::make_pair(index_meta->index_id(), index_meta->get_index_suffix()); - auto [it, inserted] = _indices_dirs.emplace(key, std::unique_ptr(dir)); +Status InvertedIndexFileWriter::_insert_directory_into_map(int64_t index_id, + const std::string& index_suffix, + std::shared_ptr dir) { + auto key = std::make_pair(index_id, index_suffix); + auto [it, inserted] = _indices_dirs.emplace(key, std::move(dir)); if (!inserted) { LOG(ERROR) << "InvertedIndexFileWriter::open attempted to insert a duplicate key: (" << key.first << ", " << key.second << ")"; @@ -71,8 +47,23 @@ Result InvertedIndexFileWriter::open(const TabletIndex* index for (const auto& entry : _indices_dirs) { LOG(ERROR) << "Key: (" << entry.first.first << ", " << entry.first.second << ")"; } - return ResultError(Status::InternalError( - "InvertedIndexFileWriter::open attempted to insert a duplicate dir")); + return Status::InternalError( + "InvertedIndexFileWriter::open attempted to insert a duplicate dir"); + } + return Status::OK(); +} + +Result> InvertedIndexFileWriter::open( + const TabletIndex* index_meta) { + auto local_fs_index_path = InvertedIndexDescriptor::get_temporary_index_path( + _tmp_dir, _rowset_id, _seg_id, index_meta->index_id(), index_meta->get_index_suffix()); + bool can_use_ram_dir = true; + auto dir = std::shared_ptr(DorisFSDirectoryFactory::getDirectory( + _local_fs, local_fs_index_path.c_str(), can_use_ram_dir)); + auto st = + _insert_directory_into_map(index_meta->index_id(), index_meta->get_index_suffix(), dir); + if (!st.ok()) { + return ResultError(st); } return dir; @@ -222,7 +213,7 @@ void InvertedIndexFileWriter::copyFile(const char* fileName, lucene::store::Dire int64_t chunk = bufferLength; while (remainder > 0) { - int64_t len = std::min(std::min(chunk, length), remainder); + int64_t len = std::min({chunk, length, remainder}); input->readBytes(buffer, len); output->writeBytes(buffer, len); remainder -= len; @@ -509,40 +500,20 @@ InvertedIndexFileWriter::prepare_file_metadata_v2(int64_t& current_offset) { for (const auto& entry : _indices_dirs) { const int64_t index_id = entry.first.first; const auto& index_suffix = entry.first.second; - const auto& dir = entry.second.get(); + auto* dir = entry.second.get(); // Get sorted files - auto sorted_files = get_sorted_files_v2(dir); + auto sorted_files = prepare_sorted_files(dir); for (const auto& file : sorted_files) { - int64_t file_length = dir->fileLength(file.c_str()); - file_metadata.emplace_back(index_id, index_suffix, file, current_offset, file_length, - dir); - current_offset += file_length; // Update the data offset + file_metadata.emplace_back(index_id, index_suffix, file.filename, current_offset, + file.filesize, dir); + current_offset += file.filesize; // Update the data offset } } return file_metadata; } -std::vector InvertedIndexFileWriter::get_sorted_files_v2( - lucene::store::Directory* dir) { - std::vector files; - dir->list(&files); - - // Remove write.lock file - files.erase(std::remove(files.begin(), files.end(), DorisFSDirectory::WRITE_LOCK_FILE), - files.end()); - - // Sort files by file length - std::sort(files.begin(), files.end(), [dir](const std::string& a, const std::string& b) { - int64_t size_a = dir->fileLength(a.c_str()); - int64_t size_b = dir->fileLength(b.c_str()); - return size_a < size_b; - }); - - return files; -} - void InvertedIndexFileWriter::write_index_headers_and_metadata( lucene::store::IndexOutput* output, const std::vector& file_metadata) { // Group files by index_id and index_suffix diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h index 38adfe7c3a75d3d..821507dfa62c3ab 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h @@ -28,7 +28,9 @@ #include "io/fs/file_system.h" #include "io/fs/file_writer.h" +#include "io/fs/local_file_system.h" #include "olap/rowset/segment_v2/inverted_index_desc.h" +#include "runtime/exec_env.h" namespace doris { class TabletIndex; @@ -36,7 +38,7 @@ class TabletIndex; namespace segment_v2 { class DorisFSDirectory; using InvertedIndexDirectoryMap = - std::map, std::unique_ptr>; + std::map, std::shared_ptr>; class InvertedIndexFileWriter; using InvertedIndexFileWriterPtr = std::unique_ptr; @@ -58,9 +60,13 @@ class InvertedIndexFileWriter { _rowset_id(std::move(rowset_id)), _seg_id(seg_id), _storage_format(storage_format), - _idx_v2_writer(std::move(file_writer)) {} + _local_fs(io::global_local_filesystem()), + _idx_v2_writer(std::move(file_writer)) { + auto tmp_file_dir = ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir(); + _tmp_dir = tmp_file_dir.native(); + } - Result open(const TabletIndex* index_meta); + Result> open(const TabletIndex* index_meta); Status delete_index(const TabletIndex* index_meta); Status initialize(InvertedIndexDirectoryMap& indices_dirs); ~InvertedIndexFileWriter() = default; @@ -134,12 +140,12 @@ class InvertedIndexFileWriter { directory(dir) {} }; std::vector prepare_file_metadata_v2(int64_t& current_offset); - std::vector get_sorted_files_v2(lucene::store::Directory* dir); void write_index_headers_and_metadata(lucene::store::IndexOutput* output, const std::vector& file_metadata); void copy_files_data_v2(lucene::store::IndexOutput* output, const std::vector& file_metadata); - + Status _insert_directory_into_map(int64_t index_id, const std::string& index_suffix, + std::shared_ptr dir); // Member variables... InvertedIndexDirectoryMap _indices_dirs; const io::FileSystemSPtr _fs; @@ -147,6 +153,8 @@ class InvertedIndexFileWriter { std::string _rowset_id; int64_t _seg_id; InvertedIndexStorageFormatPB _storage_format; + std::string _tmp_dir; + const std::shared_ptr& _local_fs; // write to disk or stream io::FileWriterPtr _idx_v2_writer = nullptr; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 29fe4609e59e9ca..a4f3ca55dd11c0b 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -197,7 +197,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { bool create_index = true; bool close_dir_on_shutdown = true; auto index_writer = std::make_unique( - _dir, _analyzer.get(), create_index, close_dir_on_shutdown); + _dir.get(), _analyzer.get(), create_index, close_dir_on_shutdown); DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_index_writer_setRAMBufferSizeMB_error", { index_writer->setRAMBufferSizeMB(-100); }) DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_index_writer_setMaxBufferedDocs_error", @@ -708,7 +708,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { std::unique_ptr _char_string_reader = nullptr; std::shared_ptr _bkd_writer = nullptr; InvertedIndexCtxSPtr _inverted_index_ctx = nullptr; - DorisFSDirectory* _dir = nullptr; + std::shared_ptr _dir = nullptr; const KeyCoder* _value_key_coder; const TabletIndex* _index_meta; InvertedIndexParserType _parser_type; diff --git a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp index 66841ca281e278c..621fa9b947c0b26 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp @@ -33,6 +33,20 @@ using namespace doris::vectorized; class InvertedIndexFileWriterTest : public ::testing::Test { protected: + class MockDorisFSDirectoryFileLength : public DorisFSDirectory { + public: + //MOCK_METHOD(lucene::store::IndexOutput*, createOutput, (const char* name), (override)); + MOCK_METHOD(int64_t, fileLength, (const char* name), (const, override)); + //MOCK_METHOD(void, close, (), (override)); + //MOCK_METHOD(const char*, getObjectName, (), (const, override)); + }; + class MockDorisFSDirectoryOpenInput : public DorisFSDirectory { + public: + MOCK_METHOD(bool, openInput, + (const char* name, lucene::store::IndexInput*& ret, CLuceneError& err, + int32_t bufferSize), + (override)); + }; void SetUp() override { char buffer[MAX_PATH_LEN]; ASSERT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); @@ -68,9 +82,6 @@ class InvertedIndexFileWriterTest : public ::testing::Test { _index_path_prefix = _absolute_dir + "/index_test"; _rowset_id = "test_rowset"; _seg_id = 1; - _storage_format = InvertedIndexStorageFormatPB::V2; - - _indices_dirs = InvertedIndexDirectoryMap(); } void TearDown() override { @@ -95,8 +106,6 @@ class InvertedIndexFileWriterTest : public ::testing::Test { std::string _index_path_prefix; std::string _rowset_id; int64_t _seg_id; - InvertedIndexStorageFormatPB _storage_format; - InvertedIndexDirectoryMap _indices_dirs; StorageEngine* _engine_ref = nullptr; std::unique_ptr _data_dir = nullptr; std::unique_ptr _inverted_index_searcher_cache; @@ -107,7 +116,8 @@ class InvertedIndexFileWriterTest : public ::testing::Test { }; TEST_F(InvertedIndexFileWriterTest, InitializeTest) { - InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, _storage_format); + InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, + InvertedIndexStorageFormatPB::V2); InvertedIndexDirectoryMap indices_dirs; indices_dirs.emplace(std::make_pair(1, "suffix1"), std::make_unique()); @@ -116,11 +126,12 @@ TEST_F(InvertedIndexFileWriterTest, InitializeTest) { Status status = writer.initialize(indices_dirs); ASSERT_TRUE(status.ok()); - ASSERT_EQ(writer.get_storage_format(), _storage_format); + ASSERT_EQ(writer.get_storage_format(), InvertedIndexStorageFormatPB::V2); } TEST_F(InvertedIndexFileWriterTest, OpenTest) { - InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, _storage_format); + InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, + InvertedIndexStorageFormatPB::V2); int64_t index_id = 1; std::string index_suffix = "suffix1"; @@ -129,24 +140,29 @@ TEST_F(InvertedIndexFileWriterTest, OpenTest) { auto open_result = writer.open(index_meta.get()); ASSERT_TRUE(open_result.has_value()); - DorisFSDirectory* dir = open_result.value(); + auto dir = open_result.value(); ASSERT_NE(dir, nullptr); auto key = std::make_pair(index_id, index_suffix); ASSERT_TRUE(writer._indices_dirs.find(key) != writer._indices_dirs.end()); - ASSERT_TRUE(writer._indices_dirs.find(key)->second.get() == dir); + ASSERT_TRUE(writer._indices_dirs.find(key)->second.get() == dir.get()); } TEST_F(InvertedIndexFileWriterTest, DeleteIndexTest) { - InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, _storage_format); + InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, + InvertedIndexStorageFormatPB::V2); InvertedIndexDirectoryMap indices_dirs; int64_t index_id = 1; std::string index_suffix = "suffix1"; - indices_dirs.emplace(std::make_pair(index_id, index_suffix), - std::make_unique()); - Status init_status = writer.initialize(indices_dirs); - ASSERT_TRUE(init_status.ok()); + auto st = writer._insert_directory_into_map(index_id, index_suffix, + std::make_shared()); + if (!st.ok()) { + std::cerr << "_insert_directory_into_map error in DeleteIndexTest: " << st.msg() + << std::endl; + ASSERT_TRUE(false); + return; + } auto key = std::make_pair(index_id, index_suffix); ASSERT_TRUE(writer._indices_dirs.find(key) != writer._indices_dirs.end()); @@ -171,13 +187,14 @@ TEST_F(InvertedIndexFileWriterTest, WriteV1Test) { auto open_result = writer.open(index_meta.get()); ASSERT_TRUE(open_result.has_value()); - DorisFSDirectory* dir = open_result.value(); + auto dir = open_result.value(); auto out_file = std::unique_ptr(dir->createOutput("write_v1_test")); out_file->writeString("test1"); out_file->close(); dir->close(); Status close_status = writer.close(); + if (!close_status.ok()) std::cout << "close error:" << close_status.msg() << std::endl; ASSERT_TRUE(close_status.ok()); const InvertedIndexFileInfo* file_info = writer.get_index_file_info(); @@ -206,7 +223,7 @@ TEST_F(InvertedIndexFileWriterTest, WriteV2Test) { ASSERT_NE(index_meta_1, nullptr); auto open_result_1 = writer.open(index_meta_1.get()); ASSERT_TRUE(open_result_1.has_value()); - DorisFSDirectory* dir_1 = open_result_1.value(); + auto dir_1 = open_result_1.value(); auto out_file_1 = std::unique_ptr( dir_1->createOutput("write_v2_test_index_1")); out_file_1->writeString("test1"); @@ -218,7 +235,7 @@ TEST_F(InvertedIndexFileWriterTest, WriteV2Test) { ASSERT_NE(index_meta_2, nullptr); auto open_result_2 = writer.open(index_meta_2.get()); ASSERT_TRUE(open_result_2.has_value()); - DorisFSDirectory* dir_2 = open_result_2.value(); + auto dir_2 = open_result_2.value(); auto out_file_2 = std::unique_ptr( dir_2->createOutput("write_v2_test_index_2")); out_file_2->writeString("test2"); @@ -236,28 +253,11 @@ TEST_F(InvertedIndexFileWriterTest, WriteV2Test) { ASSERT_EQ(total_size, file_info->index_size()); std::cout << "total_size:" << total_size << std::endl; } -/*TEST_F(InvertedIndexFileWriterTest, OpenTest_CreateOutputFailure) { - InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, InvertedIndexStorageFormatPB::V1); - std::string local_fs_index_path = InvertedIndexDescriptor::get_temporary_index_path( - ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir().native(), - _rowset_id, _seg_id, 1, "suffix1"); - _mock_dir->init(_fs, local_fs_index_path.c_str()); - EXPECT_CALL(*_mock_dir, createOutput(::testing::_)) - .WillOnce(::testing::Return(nullptr)); - - InvertedIndexDirectoryMap indices_dirs; - indices_dirs.emplace(std::make_pair(1, "suffix1"), std::move(_mock_dir)); - - Status status = writer.initialize(indices_dirs); - ASSERT_TRUE(status.ok()); - Status close_status = writer.close(); - ASSERT_TRUE(close_status.ok()); - }*/ + TEST_F(InvertedIndexFileWriterTest, HeaderLengthTest) { - InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, _storage_format); InvertedIndexDirectoryMap indices_dirs; - auto mock_dir1 = std::make_unique(); - auto mock_dir2 = std::make_unique(); + auto mock_dir1 = std::make_shared(); + auto mock_dir2 = std::make_shared(); std::string local_fs_index_path_1 = InvertedIndexDescriptor::get_temporary_index_path( ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir().native(), _rowset_id, _seg_id, 1, "suffix1"); @@ -284,11 +284,22 @@ TEST_F(InvertedIndexFileWriterTest, HeaderLengthTest) { out_file_2->writeString("test2"); out_file_2->close(); } - indices_dirs.emplace(std::make_pair(1, "suffix1"), std::move(mock_dir1)); - indices_dirs.emplace(std::make_pair(2, "suffix2"), std::move(mock_dir2)); + auto insertDirectory = [&](InvertedIndexFileWriter& writer, int64_t index_id, + const std::string& suffix, + std::shared_ptr& mock_dir) { + Status st = writer._insert_directory_into_map(index_id, suffix, mock_dir); + if (!st.ok()) { + std::cerr << "_insert_directory_into_map error in HeaderLengthTest: " << st.msg() + << std::endl; + assert(false); + return; + } + }; - Status init_status = writer.initialize(indices_dirs); - ASSERT_TRUE(init_status.ok()); + InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, + InvertedIndexStorageFormatPB::V2); + insertDirectory(writer, 1, "suffix1", mock_dir1); + insertDirectory(writer, 2, "suffix2", mock_dir2); int64_t header_length = writer.headerLength(); @@ -315,15 +326,9 @@ TEST_F(InvertedIndexFileWriterTest, HeaderLengthTest) { ASSERT_EQ(header_length, expected_header_length); } -class MockDorisFSDirectoryFileLength : public DorisFSDirectory { -public: - //MOCK_METHOD(lucene::store::IndexOutput*, createOutput, (const char* name), (override)); - MOCK_METHOD(int64_t, fileLength, (const char* name), (const, override)); - //MOCK_METHOD(void, close, (), (override)); - //MOCK_METHOD(const char*, getObjectName, (), (const, override)); -}; + TEST_F(InvertedIndexFileWriterTest, PrepareSortedFilesTest) { - auto mock_dir = std::make_unique(); + auto mock_dir = std::make_shared(); std::string local_fs_index_path = InvertedIndexDescriptor::get_temporary_index_path( ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir().native(), _rowset_id, _seg_id, 1, "suffix1"); @@ -344,20 +349,23 @@ TEST_F(InvertedIndexFileWriterTest, PrepareSortedFilesTest) { EXPECT_CALL(*mock_dir, fileLength(testing::StrEq("0.tii"))).WillOnce(testing::Return(1500)); EXPECT_CALL(*mock_dir, fileLength(testing::StrEq("nullbitmap"))).WillOnce(testing::Return(500)); - _indices_dirs.emplace(std::make_pair(1, "suffix1"), std::move(mock_dir)); - - InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, _storage_format); - - Status init_status = writer.initialize(_indices_dirs); - ASSERT_TRUE(init_status.ok()); + InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, + InvertedIndexStorageFormatPB::V2); + auto st = writer._insert_directory_into_map(1, "suffix1", mock_dir); + if (!st.ok()) { + std::cerr << "_insert_directory_into_map error in PrepareSortedFilesTest: " << st.msg() + << std::endl; + ASSERT_TRUE(false); + return; + } std::vector sorted_files = writer.prepare_sorted_files(writer._indices_dirs[std::make_pair(1, "suffix1")].get()); - // 1. segments1.dat (priority 1, size 1000) - // 2. fnm1.dat (priority 2, size 2000) - // 3. tii1.dat (priority 3, size 1500) - // 4. other1.dat (priority 4, size 500) + // 1. 0.segments (priority 1, size 1000) + // 2. 0.fnm (priority 2, size 2000) + // 3. 0.tii (priority 3, size 1500) + // 4. nullbitmap (priority 4, size 500) std::vector expected_order = {"0.segments", "0.fnm", "0.tii", "nullbitmap"}; ASSERT_EQ(sorted_files.size(), expected_order.size()); @@ -375,5 +383,44 @@ TEST_F(InvertedIndexFileWriterTest, PrepareSortedFilesTest) { } } } +TEST_F(InvertedIndexFileWriterTest, CopyFileTest_OpenInputFailure) { + auto mock_dir = std::make_shared(); + std::string local_fs_index_path = InvertedIndexDescriptor::get_temporary_index_path( + ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir().native(), _rowset_id, + _seg_id, 1, "suffix1"); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(local_fs_index_path).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(local_fs_index_path).ok()); + mock_dir->init(_fs, local_fs_index_path.c_str()); + std::vector files = {"0.segments", "0.fnm", "0.tii", "nullbitmap", "write.lock"}; + for (auto& file : files) { + auto out_file_1 = + std::unique_ptr(mock_dir->createOutput(file.c_str())); + out_file_1->writeString("test1"); + out_file_1->close(); + } + InvertedIndexFileWriter writer(_fs, _index_path_prefix, _rowset_id, _seg_id, + InvertedIndexStorageFormatPB::V2); + auto st = writer._insert_directory_into_map(1, "suffix1", mock_dir); + if (!st.ok()) { + std::cerr << "_insert_directory_into_map error in CopyFileTest_OpenInputFailure: " + << st.msg() << std::endl; + ASSERT_TRUE(false); + return; + } + + EXPECT_CALL(*mock_dir, + openInput(::testing::StrEq("0.segments"), ::testing::_, ::testing::_, ::testing::_)) + .WillOnce(::testing::Invoke([&](const char* name, lucene::store::IndexInput*& ret, + CLuceneError& err_ref, int bufferSize) { + err_ref.set(CL_ERR_IO, fmt::format("Could not open file, file is {}", name).data()); + return false; + })); + + uint8_t buffer[16384]; + ASSERT_THROW( + { writer.copyFile("0.segments", mock_dir.get(), nullptr, buffer, sizeof(buffer)); }, + CLuceneError); +} + } // namespace segment_v2 } // namespace doris \ No newline at end of file