From e074cfee146acdac4f87897d5b664377336cabf2 Mon Sep 17 00:00:00 2001 From: JaySon Date: Wed, 14 Sep 2022 18:20:59 +0800 Subject: [PATCH 01/17] *: Fix lint for files under src/Columns (#5883) ref pingcap/tiflash#4605 --- dbms/src/Columns/ColumnAggregateFunction.cpp | 4 ++-- dbms/src/Columns/ColumnAggregateFunction.h | 4 ++-- dbms/src/Columns/ColumnArray.cpp | 22 ++++++++++---------- dbms/src/Columns/ColumnConst.cpp | 2 +- dbms/src/Columns/ColumnDecimal.cpp | 20 +++++++++--------- dbms/src/Columns/ColumnDecimal.h | 2 +- dbms/src/Columns/ColumnFixedString.cpp | 6 +++--- dbms/src/Columns/ColumnNullable.cpp | 4 ++-- dbms/src/Columns/ColumnTuple.cpp | 2 +- dbms/src/Columns/ColumnVector.cpp | 14 ++++++------- dbms/src/Columns/FilterDescription.cpp | 10 ++++----- 11 files changed, 45 insertions(+), 45 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index b4495d83ccb..e87ff6a059a 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -76,7 +76,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues() const * AggregateFunction(quantileTiming(0.5), UInt64) * into UInt16 - already finished result of `quantileTiming`. */ - if (const AggregateFunctionState * function_state = typeid_cast(function)) + if (const auto * function_state = typeid_cast(function)) { auto res = createView(); res->set(function_state->getNestedFunction()); @@ -96,7 +96,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues() const void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) { - const ColumnAggregateFunction & from_concrete = static_cast(from); + const auto & from_concrete = static_cast(from); if (start + length > from_concrete.getData().size()) throw Exception( diff --git a/dbms/src/Columns/ColumnAggregateFunction.h b/dbms/src/Columns/ColumnAggregateFunction.h index 0620ffd8427..6dcbef61efe 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.h +++ b/dbms/src/Columns/ColumnAggregateFunction.h @@ -151,9 +151,9 @@ class ColumnAggregateFunction final : public COWPtrHelper(res); + auto & res_arr = DB::get(res); for (size_t i = 0; i < size; ++i) getData().get(offset + i, res_arr[i]); @@ -268,7 +268,7 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash, const TiDB::TiDBCollatorPt void ColumnArray::insert(const Field & x) { - const Array & array = DB::get(x); + const auto & array = DB::get(x); size_t size = array.size(); for (size_t i = 0; i < size; ++i) getData().insert(array[i]); @@ -278,7 +278,7 @@ void ColumnArray::insert(const Field & x) void ColumnArray::insertFrom(const IColumn & src_, size_t n) { - const ColumnArray & src = static_cast(src_); + const auto & src = static_cast(src_); size_t size = src.sizeAt(n); size_t offset = src.offsetAt(n); @@ -305,7 +305,7 @@ void ColumnArray::popBack(size_t n) int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const { - const ColumnArray & rhs = static_cast(rhs_); + const auto & rhs = static_cast(rhs_); /// Suboptimal size_t lhs_size = sizeAt(n); @@ -425,7 +425,7 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng if (length == 0) return; - const ColumnArray & src_concrete = static_cast(src); + const auto & src_concrete = static_cast(src); if (start + length > src_concrete.getOffsets().size()) throw Exception("Parameter out of bound in ColumnArray::insertRangeFrom method.", @@ -619,7 +619,7 @@ ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_h if (getOffsets().empty()) return ColumnArray::create(data); - const ColumnNullable & nullable_elems = static_cast(*data); + const auto & nullable_elems = static_cast(*data); auto array_of_nested = ColumnArray::create(nullable_elems.getNestedColumnPtr(), offsets); auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint); @@ -642,7 +642,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint if (getOffsets().empty()) return ColumnArray::create(data); - const ColumnTuple & tuple = static_cast(*data); + const auto & tuple = static_cast(*data); /// Make temporary arrays for each components of Tuple, then filter and collect back. @@ -824,7 +824,7 @@ ColumnPtr ColumnArray::replicateString(const Offsets & replicate_offsets) const if (0 == col_size) return res; - ColumnArray & array_res = static_cast(*res); + auto & array_res = static_cast(*res); const ColumnString & src_string = typeid_cast(*data); const ColumnString::Chars_t & src_chars = src_string.getChars(); @@ -935,7 +935,7 @@ ColumnPtr ColumnArray::replicateGeneric(const Offsets & replicate_offsets) const throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); MutableColumnPtr res = cloneEmpty(); - ColumnArray & res_concrete = static_cast(*res); + auto & res_concrete = static_cast(*res); if (0 == col_size) return res; @@ -956,7 +956,7 @@ ColumnPtr ColumnArray::replicateGeneric(const Offsets & replicate_offsets) const ColumnPtr ColumnArray::replicateNullable(const Offsets & replicate_offsets) const { - const ColumnNullable & nullable = static_cast(*data); + const auto & nullable = static_cast(*data); /// Make temporary arrays for each components of Nullable. Then replicate them independently and collect back to result. /// NOTE Offsets are calculated twice and it is redundant. @@ -976,7 +976,7 @@ ColumnPtr ColumnArray::replicateNullable(const Offsets & replicate_offsets) cons ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const { - const ColumnTuple & tuple = static_cast(*data); + const auto & tuple = static_cast(*data); /// Make temporary arrays for each components of Tuple. In the same way as for Nullable. diff --git a/dbms/src/Columns/ColumnConst.cpp b/dbms/src/Columns/ColumnConst.cpp index 1237110d5e1..75f58c624ff 100644 --- a/dbms/src/Columns/ColumnConst.cpp +++ b/dbms/src/Columns/ColumnConst.cpp @@ -31,7 +31,7 @@ ColumnConst::ColumnConst(const ColumnPtr & data_, size_t s) , s(s) { /// Squash Const of Const. - while (const ColumnConst * const_data = typeid_cast(data.get())) + while (const auto * const_data = typeid_cast(data.get())) data = const_data->getDataColumnPtr(); if (data->size() != 1) diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 96d54f320ca..9a372e4f5b6 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -95,7 +95,7 @@ const char * ColumnDecimal::deserializeAndInsertFromArena(const char * pos, c size_t offset = 0; bool s = unalignedLoad(pos + offset); offset += sizeof(bool); - size_t limb_count = unalignedLoad(pos + offset); + auto limb_count = unalignedLoad(pos + offset); offset += sizeof(size_t); val.resize(limb_count, limb_count); @@ -244,7 +244,7 @@ void ColumnDecimal::insertData(const char * src [[maybe_unused]], size_t /*le } else { - T tmp; + T tmp{}; memcpy(&tmp, src, sizeof(T)); data.emplace_back(tmp); } @@ -253,23 +253,23 @@ void ColumnDecimal::insertData(const char * src [[maybe_unused]], size_t /*le template bool ColumnDecimal::decodeTiDBRowV2Datum(size_t cursor, const String & raw_value, size_t /* length */, bool /* force_decode */) { - PrecType prec_ = raw_value[cursor++]; - ScaleType scale_ = raw_value[cursor++]; - auto type = createDecimal(prec_, scale_); - if (unlikely(!checkDecimal(*type))) + PrecType dec_prec = static_cast(raw_value[cursor++]); + ScaleType dec_scale = static_cast(raw_value[cursor++]); + auto dec_type = createDecimal(dec_prec, dec_scale); + if (unlikely(!checkDecimal(*dec_type))) { - throw Exception("Detected unmatched decimal value type: Decimal( " + std::to_string(prec_) + ", " + std::to_string(scale_) + ") when decoding with column type " + this->getName(), + throw Exception("Detected unmatched decimal value type: Decimal( " + std::to_string(dec_prec) + ", " + std::to_string(dec_scale) + ") when decoding with column type " + this->getName(), ErrorCodes::LOGICAL_ERROR); } - auto res = DecodeDecimalImpl(cursor, raw_value, prec_, scale_); - data.push_back(DecimalField(res, scale_)); + auto res = DecodeDecimalImpl(cursor, raw_value, dec_prec, dec_scale); + data.push_back(DecimalField(res, dec_scale)); return true; } template void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) { - const ColumnDecimal & src_vec = static_cast(src); + const auto & src_vec = static_cast(src); if (start + length > src_vec.data.size()) throw Exception("Parameters start = " + toString(start) + ", length = " + toString(length) + " are out of bound in ColumnDecimal::insertRangeFrom method (data.size() = " + toString(src_vec.data.size()) + ").", diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index b1d7f9c8c99..cd66bae1bdf 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -116,7 +116,7 @@ class ColumnDecimal final : public COWPtrHelper(src).getData()[n]); } - void insertData(const char * pos, size_t /*length*/) override; + void insertData(const char * src, size_t /*length*/) override; bool decodeTiDBRowV2Datum(size_t cursor, const String & raw_value, size_t length, bool force_decode) override; void insertDefault() override { data.push_back(T()); } void insert(const Field & x) override { data.push_back(DB::get::Type>(x)); } diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index db908b10f9a..51846cadc75 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -57,7 +57,7 @@ MutableColumnPtr ColumnFixedString::cloneResized(size_t size) const void ColumnFixedString::insert(const Field & x) { - const String & s = DB::get(x); + const auto & s = DB::get(x); if (s.size() > n) throw Exception("Too large string '" + s + "' for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE); @@ -69,7 +69,7 @@ void ColumnFixedString::insert(const Field & x) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) { - const ColumnFixedString & src = static_cast(src_); + const auto & src = static_cast(src_); if (n != src.getN()) throw Exception("Size of FixedString doesn't match", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH); @@ -180,7 +180,7 @@ void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_dir void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) { - const ColumnFixedString & src_concrete = static_cast(src); + const auto & src_concrete = static_cast(src); if (start + length > src_concrete.size()) throw Exception("Parameters start = " diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index a3ef93f1ad3..558b5574675 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -486,8 +486,8 @@ void getExtremesFromNullableContent(const ColumnVector & col, const NullMap & if (has_not_null) { - min = typename NearestFieldType::Type(cur_min); - max = typename NearestFieldType::Type(cur_max); + min = static_cast::Type>(cur_min); + max = static_cast::Type>(cur_max); } } diff --git a/dbms/src/Columns/ColumnTuple.cpp b/dbms/src/Columns/ColumnTuple.cpp index 974069b9f93..60841a51cf4 100644 --- a/dbms/src/Columns/ColumnTuple.cpp +++ b/dbms/src/Columns/ColumnTuple.cpp @@ -117,7 +117,7 @@ void ColumnTuple::insert(const Field & x) void ColumnTuple::insertFrom(const IColumn & src_, size_t n) { - const ColumnTuple & src = static_cast(src_); + const auto & src = static_cast(src_); const size_t tuple_size = columns.size(); if (src.columns.size() != tuple_size) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 49f1290e692..92d8c81b0ae 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -181,19 +181,19 @@ UInt64 ColumnVector::get64(size_t n) const template UInt64 ColumnVector::getUInt(size_t n) const { - return UInt64(data[n]); + return static_cast(data[n]); } template Int64 ColumnVector::getInt(size_t n) const { - return Int64(data[n]); + return static_cast(data[n]); } template void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) { - const ColumnVector & src_vec = static_cast(src); + const auto & src_vec = static_cast(src); if (start + length > src_vec.data.size()) throw Exception( @@ -334,8 +334,8 @@ void ColumnVector::getExtremes(Field & min, Field & max) const if (size == 0) { - min = typename NearestFieldType::Type(0); - max = typename NearestFieldType::Type(0); + min = static_cast::Type>(0); + max = static_cast::Type>(0); return; } @@ -369,8 +369,8 @@ void ColumnVector::getExtremes(Field & min, Field & max) const cur_max = x; } - min = typename NearestFieldType::Type(cur_min); - max = typename NearestFieldType::Type(cur_max); + min = static_cast::Type>(cur_min); + max = static_cast::Type>(cur_max); } /// Explicit template instantiations - to avoid code bloat in headers. diff --git a/dbms/src/Columns/FilterDescription.cpp b/dbms/src/Columns/FilterDescription.cpp index d8626483101..585380740ae 100644 --- a/dbms/src/Columns/FilterDescription.cpp +++ b/dbms/src/Columns/FilterDescription.cpp @@ -38,12 +38,12 @@ ConstantFilterDescription::ConstantFilterDescription(const IColumn & column) if (column.isColumnConst()) { - const ColumnConst & column_const = static_cast(column); + const auto & column_const = static_cast(column); const IColumn & column_nested = column_const.getDataColumn(); if (!typeid_cast(&column_nested)) { - const ColumnNullable * column_nested_nullable = typeid_cast(&column_nested); + const auto * column_nested_nullable = typeid_cast(&column_nested); if (!column_nested_nullable || !typeid_cast(&column_nested_nullable->getNestedColumn())) { throw Exception( @@ -63,18 +63,18 @@ ConstantFilterDescription::ConstantFilterDescription(const IColumn & column) FilterDescription::FilterDescription(const IColumn & column) { - if (const ColumnUInt8 * concrete_column = typeid_cast(&column)) + if (const auto * concrete_column = typeid_cast(&column)) { data = &concrete_column->getData(); return; } - if (const ColumnNullable * nullable_column = typeid_cast(&column)) + if (const auto * nullable_column = typeid_cast(&column)) { ColumnPtr nested_column = nullable_column->getNestedColumnPtr(); MutableColumnPtr mutable_holder = (*std::move(nested_column)).mutate(); - ColumnUInt8 * concrete_column = typeid_cast(mutable_holder.get()); + auto * concrete_column = typeid_cast(mutable_holder.get()); if (!concrete_column) throw Exception( fmt::format("Illegal type {} of column for filter. Must be UInt8 or Nullable(UInt8).", column.getName()), From 8a6e4d14fd5a58887416fcc97b3988eb98b00bfa Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Wed, 14 Sep 2022 20:10:59 +0800 Subject: [PATCH 02/17] storage: Use different names for block bg pool (#5867) ref pingcap/tiflash#5807 --- dbms/src/Interpreters/Context.cpp | 4 +- .../src/Storages/BackgroundProcessingPool.cpp | 16 +- dbms/src/Storages/BackgroundProcessingPool.h | 6 +- metrics/grafana/tiflash_summary.json | 293 +++++++++++++----- 4 files changed, 225 insertions(+), 94 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 49ee249d350..f1c04025bd1 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1366,7 +1366,7 @@ BackgroundProcessingPool & Context::initializeBackgroundPool(UInt16 pool_size) { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool = std::make_shared(pool_size); + shared->background_pool = std::make_shared(pool_size, "bg-"); return *shared->background_pool; } @@ -1380,7 +1380,7 @@ BackgroundProcessingPool & Context::initializeBlockableBackgroundPool(UInt16 poo { auto lock = getLock(); if (!shared->blockable_background_pool) - shared->blockable_background_pool = std::make_shared(pool_size); + shared->blockable_background_pool = std::make_shared(pool_size, "bg-block-"); return *shared->blockable_background_pool; } diff --git a/dbms/src/Storages/BackgroundProcessingPool.cpp b/dbms/src/Storages/BackgroundProcessingPool.cpp index 644e8d05b1a..6321d091fa1 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/BackgroundProcessingPool.cpp @@ -82,15 +82,18 @@ void BackgroundProcessingPool::TaskInfo::wake() } -BackgroundProcessingPool::BackgroundProcessingPool(int size_) +BackgroundProcessingPool::BackgroundProcessingPool(int size_, std::string thread_prefix_) : size(size_) + , thread_prefix(thread_prefix_) , thread_ids_counter(size_) { - LOG_FMT_INFO(&Poco::Logger::get("BackgroundProcessingPool"), "Create BackgroundProcessingPool with {} threads", size); + LOG_FMT_INFO(Logger::get("BackgroundProcessingPool"), "Create BackgroundProcessingPool, prefix={} n_threads={}", thread_prefix, size); threads.resize(size); - for (auto & thread : threads) - thread = std::thread([this] { threadFunction(); }); + for (size_t i = 0; i < size; ++i) + { + threads[i] = std::thread([this, i] { threadFunction(i); }); + } } @@ -142,11 +145,10 @@ BackgroundProcessingPool::~BackgroundProcessingPool() } -void BackgroundProcessingPool::threadFunction() +void BackgroundProcessingPool::threadFunction(size_t thread_idx) { { - static std::atomic_uint64_t tid{0}; - const auto name = "BkgPool" + std::to_string(tid++); + const auto name = thread_prefix + std::to_string(thread_idx); setThreadName(name.data()); is_background_thread = true; addThreadId(getTid()); diff --git a/dbms/src/Storages/BackgroundProcessingPool.h b/dbms/src/Storages/BackgroundProcessingPool.h index 49a01b3a397..066a6ebaa10 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.h +++ b/dbms/src/Storages/BackgroundProcessingPool.h @@ -81,7 +81,7 @@ class BackgroundProcessingPool using TaskHandle = std::shared_ptr; - explicit BackgroundProcessingPool(int size_); + explicit BackgroundProcessingPool(int size_, std::string thread_prefix_); size_t getNumberOfThreads() const { return size; } @@ -109,6 +109,7 @@ class BackgroundProcessingPool using Threads = std::vector; const size_t size; + const std::string thread_prefix; static constexpr double sleep_seconds = 10; static constexpr double sleep_seconds_random_part = 1.0; @@ -123,8 +124,7 @@ class BackgroundProcessingPool std::atomic shutdown{false}; std::condition_variable wake_event; - - void threadFunction(); + void threadFunction(size_t thread_idx); }; using BackgroundProcessingPoolPtr = std::shared_ptr; diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index c2c0e7496a5..601553dc966 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -52,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1662525920282, + "iteration": 1663121028653, "links": [], "panels": [ { @@ -1860,7 +1860,7 @@ "y": 16 }, "hiddenSeries": false, - "id": 151, + "id": 153, "legend": { "alignAsTable": true, "avg": false, @@ -1902,7 +1902,7 @@ "targets": [ { "exemplar": true, - "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"BkgPool.*\"}[1m]))", + "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"snap_sender.*\"}[1m]))", "format": "time_series", "hide": false, "instant": false, @@ -1914,7 +1914,7 @@ }, { "exemplar": true, - "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"BkgPool.*\"})", + "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"snap_sender.*\"})", "hide": false, "interval": "", "intervalFactor": 2, @@ -1926,7 +1926,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Storage Background Tasks", + "title": "Snapshot Sender", "tooltip": { "msResolution": false, "shared": true, @@ -1972,7 +1972,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": null, - "description": "Involved when manually compacting the data.", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -1989,7 +1989,7 @@ "y": 23 }, "hiddenSeries": false, - "id": 149, + "id": 151, "legend": { "alignAsTable": true, "avg": false, @@ -2031,7 +2031,7 @@ "targets": [ { "exemplar": true, - "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"m_compact_pool\"}[1m]))", + "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"bg_\\\\d+\"}[1m]))", "format": "time_series", "hide": false, "instant": false, @@ -2043,7 +2043,7 @@ }, { "exemplar": true, - "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"m_compact_pool\"})", + "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"bg_\\\\d+\"})", "hide": false, "interval": "", "intervalFactor": 2, @@ -2055,7 +2055,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Manual Compaction", + "title": "Storage Background (Small Tasks)", "tooltip": { "msResolution": false, "shared": true, @@ -2118,7 +2118,7 @@ "y": 23 }, "hiddenSeries": false, - "id": 153, + "id": 156, "legend": { "alignAsTable": true, "avg": false, @@ -2160,7 +2160,7 @@ "targets": [ { "exemplar": true, - "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"snap_sender.*\"}[1m]))", + "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"bg_block_\\\\d+\"}[1m]))", "format": "time_series", "hide": false, "instant": false, @@ -2172,7 +2172,7 @@ }, { "exemplar": true, - "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"snap_sender.*\"})", + "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"bg_block_\\\\d+\"})", "hide": false, "interval": "", "intervalFactor": 2, @@ -2184,7 +2184,136 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Snapshot Sender", + "title": "Storage Background (Large Tasks)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": null, + "description": "Involved when manually compacting the data.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 149, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Limit", + "color": "#F2495C", + "hideTooltip": true, + "linewidth": 2, + "nullPointMode": "connected" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"m_compact_pool\"}[1m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}} {{instance}}", + "refId": "A", + "step": 40 + }, + { + "exemplar": true, + "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"m_compact_pool\"})", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Limit", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Manual Compaction", "tooltip": { "msResolution": false, "shared": true, @@ -2254,7 +2383,7 @@ "h": 7, "w": 12, "x": 0, - "y": 33 + "y": 3 }, "hiddenSeries": false, "id": 9, @@ -2352,7 +2481,7 @@ "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 3 }, "hiddenSeries": false, "id": 2, @@ -2449,7 +2578,7 @@ "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 10 }, "hiddenSeries": false, "id": 11, @@ -2567,7 +2696,7 @@ "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 10 }, "hiddenSeries": false, "id": 12, @@ -2664,7 +2793,7 @@ "h": 7, "w": 12, "x": 0, - "y": 47 + "y": 17 }, "hiddenSeries": false, "id": 13, @@ -2782,7 +2911,7 @@ "h": 7, "w": 12, "x": 12, - "y": 47 + "y": 17 }, "hiddenSeries": false, "id": 14, @@ -2879,7 +3008,7 @@ "h": 7, "w": 12, "x": 0, - "y": 54 + "y": 24 }, "hiddenSeries": false, "id": 63, @@ -2995,7 +3124,7 @@ "h": 7, "w": 12, "x": 12, - "y": 54 + "y": 24 }, "hiddenSeries": false, "id": 77, @@ -3093,7 +3222,7 @@ "h": 7, "w": 12, "x": 0, - "y": 61 + "y": 31 }, "hiddenSeries": false, "id": 100, @@ -3192,7 +3321,7 @@ "h": 7, "w": 12, "x": 12, - "y": 61 + "y": 31 }, "hiddenSeries": false, "id": 101, @@ -3291,7 +3420,7 @@ "h": 7, "w": 12, "x": 0, - "y": 68 + "y": 38 }, "hiddenSeries": false, "id": 102, @@ -3390,7 +3519,7 @@ "h": 7, "w": 12, "x": 12, - "y": 68 + "y": 38 }, "hiddenSeries": false, "id": 103, @@ -3507,7 +3636,7 @@ "h": 8, "w": 12, "x": 0, - "y": 34 + "y": 4 }, "hiddenSeries": false, "id": 107, @@ -3609,7 +3738,7 @@ "h": 8, "w": 12, "x": 12, - "y": 34 + "y": 4 }, "hiddenSeries": false, "id": 109, @@ -3729,7 +3858,7 @@ "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 12 }, "hiddenSeries": false, "id": 111, @@ -3840,7 +3969,7 @@ "h": 8, "w": 12, "x": 12, - "y": 42 + "y": 12 }, "hiddenSeries": false, "id": 113, @@ -3951,7 +4080,7 @@ "h": 8, "w": 12, "x": 0, - "y": 50 + "y": 20 }, "hiddenSeries": false, "id": 117, @@ -4052,7 +4181,7 @@ "h": 8, "w": 12, "x": 12, - "y": 50 + "y": 20 }, "hiddenSeries": false, "id": 115, @@ -4185,7 +4314,7 @@ "h": 7, "w": 12, "x": 0, - "y": 42 + "y": 5 }, "hiddenSeries": false, "id": 17, @@ -4284,7 +4413,7 @@ "h": 7, "w": 12, "x": 12, - "y": 42 + "y": 5 }, "hiddenSeries": false, "id": 18, @@ -4390,7 +4519,7 @@ "h": 7, "w": 12, "x": 0, - "y": 49 + "y": 12 }, "hiddenSeries": false, "id": 19, @@ -4511,7 +4640,7 @@ "h": 7, "w": 12, "x": 12, - "y": 49 + "y": 12 }, "hiddenSeries": false, "id": 20, @@ -4659,7 +4788,7 @@ "h": 8, "w": 12, "x": 0, - "y": 36 + "y": 6 }, "hiddenSeries": false, "id": 41, @@ -4772,7 +4901,7 @@ "h": 8, "w": 12, "x": 12, - "y": 36 + "y": 6 }, "hiddenSeries": false, "id": 38, @@ -4930,7 +5059,7 @@ "h": 8, "w": 24, "x": 0, - "y": 44 + "y": 14 }, "hiddenSeries": false, "id": 40, @@ -5030,7 +5159,7 @@ "h": 5, "w": 12, "x": 0, - "y": 52 + "y": 22 }, "hiddenSeries": false, "id": 39, @@ -5133,7 +5262,7 @@ "h": 5, "w": 12, "x": 12, - "y": 52 + "y": 22 }, "hiddenSeries": false, "id": 42, @@ -5237,7 +5366,7 @@ "h": 5, "w": 12, "x": 0, - "y": 57 + "y": 27 }, "hiddenSeries": false, "id": 130, @@ -5339,7 +5468,7 @@ "h": 5, "w": 12, "x": 12, - "y": 57 + "y": 27 }, "hiddenSeries": false, "id": 131, @@ -5443,7 +5572,7 @@ "h": 8, "w": 12, "x": 0, - "y": 62 + "y": 32 }, "hiddenSeries": false, "id": 43, @@ -5549,7 +5678,7 @@ "h": 8, "w": 12, "x": 12, - "y": 62 + "y": 32 }, "heatmap": {}, "hideZeroBuckets": true, @@ -5612,7 +5741,7 @@ "h": 8, "w": 12, "x": 0, - "y": 70 + "y": 40 }, "hiddenSeries": false, "id": 46, @@ -5735,7 +5864,7 @@ "h": 8, "w": 12, "x": 12, - "y": 70 + "y": 40 }, "hiddenSeries": false, "id": 47, @@ -5859,7 +5988,7 @@ "h": 8, "w": 12, "x": 0, - "y": 78 + "y": 48 }, "height": "", "hiddenSeries": false, @@ -5989,7 +6118,7 @@ "h": 8, "w": 12, "x": 12, - "y": 78 + "y": 48 }, "height": "", "hiddenSeries": false, @@ -6117,7 +6246,7 @@ "h": 8, "w": 12, "x": 0, - "y": 86 + "y": 56 }, "hiddenSeries": false, "id": 88, @@ -6317,7 +6446,7 @@ "h": 8, "w": 12, "x": 12, - "y": 86 + "y": 56 }, "hiddenSeries": false, "id": 67, @@ -6431,7 +6560,7 @@ "h": 8, "w": 12, "x": 0, - "y": 94 + "y": 64 }, "hiddenSeries": false, "id": 84, @@ -6531,7 +6660,7 @@ "h": 8, "w": 12, "x": 12, - "y": 94 + "y": 64 }, "hiddenSeries": false, "id": 86, @@ -6663,7 +6792,7 @@ "h": 8, "w": 12, "x": 0, - "y": 102 + "y": 72 }, "hiddenSeries": false, "id": 132, @@ -6811,7 +6940,7 @@ "h": 8, "w": 24, "x": 0, - "y": 37 + "y": 7 }, "hiddenSeries": false, "id": 62, @@ -6930,7 +7059,7 @@ "h": 8, "w": 12, "x": 0, - "y": 45 + "y": 15 }, "height": "", "hiddenSeries": false, @@ -7049,7 +7178,7 @@ "h": 8, "w": 12, "x": 12, - "y": 45 + "y": 15 }, "height": "", "hiddenSeries": false, @@ -7166,7 +7295,7 @@ "h": 9, "w": 24, "x": 0, - "y": 53 + "y": 23 }, "height": "", "hiddenSeries": false, @@ -7288,7 +7417,7 @@ "h": 9, "w": 24, "x": 0, - "y": 62 + "y": 32 }, "hiddenSeries": false, "id": 90, @@ -7416,7 +7545,7 @@ "h": 8, "w": 12, "x": 0, - "y": 118 + "y": 8 }, "hiddenSeries": false, "id": 85, @@ -7541,7 +7670,7 @@ "h": 8, "w": 12, "x": 12, - "y": 118 + "y": 8 }, "hiddenSeries": false, "id": 128, @@ -7684,7 +7813,7 @@ "h": 8, "w": 12, "x": 0, - "y": 126 + "y": 16 }, "hiddenSeries": false, "id": 129, @@ -7792,7 +7921,7 @@ "h": 8, "w": 12, "x": 12, - "y": 126 + "y": 16 }, "hiddenSeries": false, "id": 123, @@ -7918,7 +8047,7 @@ "h": 8, "w": 12, "x": 0, - "y": 134 + "y": 24 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7995,7 +8124,7 @@ "h": 7, "w": 12, "x": 0, - "y": 39 + "y": 9 }, "hiddenSeries": false, "id": 35, @@ -8093,7 +8222,7 @@ "h": 7, "w": 12, "x": 12, - "y": 39 + "y": 9 }, "hiddenSeries": false, "id": 36, @@ -8211,7 +8340,7 @@ "h": 7, "w": 12, "x": 0, - "y": 46 + "y": 16 }, "hiddenSeries": false, "id": 37, @@ -8345,7 +8474,7 @@ "h": 7, "w": 12, "x": 12, - "y": 46 + "y": 16 }, "hiddenSeries": false, "id": 75, @@ -8449,7 +8578,7 @@ "h": 7, "w": 24, "x": 0, - "y": 53 + "y": 23 }, "hiddenSeries": false, "id": 82, @@ -8602,7 +8731,7 @@ "h": 7, "w": 12, "x": 0, - "y": 60 + "y": 30 }, "heatmap": {}, "hideZeroBuckets": true, @@ -8672,7 +8801,7 @@ "h": 7, "w": 12, "x": 12, - "y": 60 + "y": 30 }, "heatmap": {}, "hideZeroBuckets": true, @@ -8742,7 +8871,7 @@ "h": 7, "w": 12, "x": 0, - "y": 67 + "y": 37 }, "heatmap": {}, "hideZeroBuckets": true, @@ -8812,7 +8941,7 @@ "h": 7, "w": 12, "x": 12, - "y": 67 + "y": 37 }, "heatmap": {}, "hideZeroBuckets": true, @@ -8876,7 +9005,7 @@ "h": 7, "w": 24, "x": 0, - "y": 74 + "y": 44 }, "height": "", "hiddenSeries": false, @@ -8990,7 +9119,7 @@ "h": 7, "w": 12, "x": 0, - "y": 81 + "y": 51 }, "heatmap": {}, "hideZeroBuckets": true, @@ -9059,7 +9188,7 @@ "h": 7, "w": 12, "x": 12, - "y": 81 + "y": 51 }, "heatmap": {}, "hideZeroBuckets": true, @@ -9129,7 +9258,7 @@ "h": 7, "w": 12, "x": 0, - "y": 88 + "y": 58 }, "heatmap": {}, "hideZeroBuckets": true, @@ -9199,7 +9328,7 @@ "h": 7, "w": 12, "x": 12, - "y": 88 + "y": 58 }, "heatmap": {}, "hideZeroBuckets": true, @@ -9269,7 +9398,7 @@ "h": 7, "w": 12, "x": 0, - "y": 95 + "y": 65 }, "heatmap": {}, "hideZeroBuckets": true, @@ -9335,7 +9464,7 @@ "h": 7, "w": 12, "x": 12, - "y": 95 + "y": 65 }, "hiddenSeries": false, "id": 91, @@ -9478,7 +9607,7 @@ "h": 8, "w": 12, "x": 0, - "y": 40 + "y": 10 }, "hiddenSeries": false, "id": 99, @@ -9631,7 +9760,7 @@ "h": 8, "w": 12, "x": 12, - "y": 40 + "y": 10 }, "heatmap": {}, "hideZeroBuckets": true, From 047d91977195b69a3d169fb34c7571b78e5a6794 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Thu, 15 Sep 2022 12:06:59 +0800 Subject: [PATCH 03/17] Test: refine Executor Unit Test (#5889) ref pingcap/tiflash#4609 --- .../tests/gtest_aggregation_executor.cpp | 29 +-- .../src/Flash/tests/gtest_filter_executor.cpp | 29 ++- dbms/src/Flash/tests/gtest_join_executor.cpp | 62 +++--- dbms/src/Flash/tests/gtest_limit_executor.cpp | 2 + .../Flash/tests/gtest_projection_executor.cpp | 183 ++++++++---------- dbms/src/Flash/tests/gtest_topn_executor.cpp | 17 +- .../src/Flash/tests/gtest_window_executor.cpp | 91 ++++----- dbms/src/TestUtils/ExecutorTestUtils.cpp | 60 ++++++ dbms/src/TestUtils/ExecutorTestUtils.h | 16 +- .../WindowFunctions/tests/gtest_lead_lag.cpp | 6 +- 10 files changed, 249 insertions(+), 246 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 3f1766769f5..b2645d6e18a 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -119,17 +119,6 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest return context.scan(src.first, src.second).aggregation(agg_funcs, group_by_exprs).project(proj).build(context); } - void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) - { - WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN - for (size_t i = 1; i <= max_concurrency; i += step) - ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); - WRAP_FOR_DIS_ENABLE_PLANNER_END - } - - static const size_t max_concurrency = 10; - static const size_t step = 2; - const String db_name{"test_db"}; /// Prepare some data and names for tests of group by @@ -197,7 +186,7 @@ try for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]); - executeWithConcurrency(request, expect_cols[i]); + executeAndAssertColumnsEqual(request, expect_cols[i]); } } @@ -230,7 +219,7 @@ try for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]); - executeWithConcurrency(request, expect_cols[i]); + executeAndAssertColumnsEqual(request, expect_cols[i]); } } @@ -262,7 +251,7 @@ try for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]); - executeWithConcurrency(request, expect_cols[i]); + executeAndAssertColumnsEqual(request, expect_cols[i]); } /// Min function tests @@ -281,7 +270,7 @@ try for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]); - executeWithConcurrency(request, expect_cols[i]); + executeAndAssertColumnsEqual(request, expect_cols[i]); } } CATCH @@ -310,7 +299,7 @@ try for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(std::make_pair(db_name, table_name), {agg_funcs[i]}, group_by_exprs[i], projections[i]); - executeWithConcurrency(request, expect_cols[i]); + executeAndAssertColumnsEqual(request, expect_cols[i]); } } CATCH @@ -322,13 +311,13 @@ try .scan("aggnull_test", "t1") .aggregation({Max(col("s1"))}, {}) .build(context); - executeWithConcurrency(request, {{toNullableVec({"banana"})}}); + executeAndAssertColumnsEqual(request, {{toNullableVec({"banana"})}}); request = context .scan("aggnull_test", "t1") .aggregation({}, {col("s1")}) .build(context); - executeWithConcurrency(request, {{toNullableVec("s1", {{}, "banana"})}}); + executeAndAssertColumnsEqual(request, {{toNullableVec("s1", {{}, "banana"})}}); } CATCH @@ -340,7 +329,7 @@ try .scan("test_db", "test_table") .aggregation({Max(col("s1")), Max(col("s1"))}, {}) .build(context); - executeWithConcurrency( + executeAndAssertColumnsEqual( request, {{toNullableVec({3})}, {toNullableVec({3})}}); @@ -349,7 +338,7 @@ try .scan("test_db", "test_table") .aggregation({Max(col("s1")), Max(col("s1")), Sum(col("s2"))}, {}) .build(context); - executeWithConcurrency( + executeAndAssertColumnsEqual( request, {{toNullableVec({3})}, {toNullableVec({3})}, {toVec({6})}}); } diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp index 55f2279c954..ff3dc3ee893 100644 --- a/dbms/src/Flash/tests/gtest_filter_executor.cpp +++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp @@ -39,26 +39,22 @@ class FilterExecutorTestRunner : public DB::tests::ExecutorTest TEST_F(FilterExecutorTestRunner, equals) try { - WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN auto request = context .scan("test_db", "test_table") .filter(eq(col("s1"), col("s2"))) .build(context); - { - ASSERT_COLUMNS_EQ_R(executeStreams(request), - createColumns({toNullableVec({"banana"}), - toNullableVec({"banana"})})); - } + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana"}), + toNullableVec({"banana"})}); request = context.receive("exchange1") .filter(eq(col("s1"), col("s2"))) .build(context); - { - ASSERT_COLUMNS_EQ_R(executeStreams(request), - createColumns({toNullableVec({"banana"}), - toNullableVec({"banana"})})); - } - WRAP_FOR_DIS_ENABLE_PLANNER_END + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana"}), + toNullableVec({"banana"})}); } CATCH @@ -69,11 +65,10 @@ try .scan("test_db", "test_table") .filter(eq(col("test_table.s1"), col("test_table.s2"))) .build(context); - { - ASSERT_COLUMNS_EQ_R(executeStreams(request), - createColumns({toNullableVec({"banana"}), - toNullableVec({"banana"})})); - } + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana"}), + toNullableVec({"banana"})}); } CATCH diff --git a/dbms/src/Flash/tests/gtest_join_executor.cpp b/dbms/src/Flash/tests/gtest_join_executor.cpp index 124e04e4123..3f504ed29b6 100644 --- a/dbms/src/Flash/tests/gtest_join_executor.cpp +++ b/dbms/src/Flash/tests/gtest_join_executor.cpp @@ -23,8 +23,6 @@ namespace tests { class JoinExecutorTestRunner : public DB::tests::ExecutorTest { - static constexpr size_t max_concurrency_level = 10; - public: void initializeContext() override { @@ -60,16 +58,6 @@ class JoinExecutorTestRunner : public DB::tests::ExecutorTest toNullableVec("join_c", {"apple", "banana"})}); } - void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) - { - WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN - for (size_t i = 1; i <= max_concurrency_level; ++i) - { - ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); - } - WRAP_FOR_DIS_ENABLE_PLANNER_END - } - static constexpr size_t join_type_num = 7; static constexpr tipb::JoinType join_types[join_type_num] = { @@ -147,7 +135,7 @@ try .build(context); { - executeWithConcurrency(request, expected_cols[i * simple_test_num + j]); + executeAndAssertColumnsEqual(request, expected_cols[i * simple_test_num + j]); } } } @@ -287,7 +275,7 @@ try {col("b")}) .build(context); - executeWithConcurrency(request, expected_cols[i * join_type_num + j]); + executeAndAssertColumnsEqual(request, expected_cols[i * join_type_num + j]); } } } @@ -307,70 +295,70 @@ try context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeFloat}}, {toVec("a", {1.0})}); - executeWithConcurrency(cast_request(), {toNullableVec({1}), toNullableVec({1.0})}); + executeAndAssertColumnsEqual(cast_request(), {toNullableVec({1}), toNullableVec({1.0})}); /// int(1) == double(1.0) context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeLong}}, {toVec("a", {1})}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeDouble}}, {toVec("a", {1.0})}); - executeWithConcurrency(cast_request(), {toNullableVec({1}), toNullableVec({1.0})}); + executeAndAssertColumnsEqual(cast_request(), {toNullableVec({1}), toNullableVec({1.0})}); /// float(1) == double(1.0) context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeFloat}}, {toVec("a", {1})}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeDouble}}, {toVec("a", {1})}); - executeWithConcurrency(cast_request(), {toNullableVec({1}), toNullableVec({1})}); + executeAndAssertColumnsEqual(cast_request(), {toNullableVec({1}), toNullableVec({1})}); /// varchar('x') == char('x') context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeString}}, {toVec("a", {"x"})}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec("a", {"x"})}); - executeWithConcurrency(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); + executeAndAssertColumnsEqual(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); /// tinyblob('x') == varchar('x') context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeTinyBlob}}, {toVec("a", {"x"})}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec("a", {"x"})}); - executeWithConcurrency(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); + executeAndAssertColumnsEqual(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); /// mediumBlob('x') == varchar('x') context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeMediumBlob}}, {toVec("a", {"x"})}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec("a", {"x"})}); - executeWithConcurrency(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); + executeAndAssertColumnsEqual(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); /// blob('x') == varchar('x') context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeBlob}}, {toVec("a", {"x"})}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec("a", {"x"})}); - executeWithConcurrency(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); + executeAndAssertColumnsEqual(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); /// longBlob('x') == varchar('x') context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeLongBlob}}, {toVec("a", {"x"})}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeVarchar}}, {toVec("a", {"x"})}); - executeWithConcurrency(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); + executeAndAssertColumnsEqual(cast_request(), {toNullableVec({"x"}), toNullableVec({"x"})}); /// decimal with different scale context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeNewDecimal}}, {createColumn(std::make_tuple(9, 4), {"0.12"}, "a")}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeNewDecimal}}, {createColumn(std::make_tuple(9, 3), {"0.12"}, "a")}); - executeWithConcurrency(cast_request(), {createNullableColumn(std::make_tuple(65, 0), {"0.12"}, {0}), createNullableColumn(std::make_tuple(65, 0), {"0.12"}, {0})}); + executeAndAssertColumnsEqual(cast_request(), {createNullableColumn(std::make_tuple(65, 0), {"0.12"}, {0}), createNullableColumn(std::make_tuple(65, 0), {"0.12"}, {0})}); /// datetime(1970-01-01 00:00:01) == timestamp(1970-01-01 00:00:01) context.addMockTable("cast", "t1", {{"a", TiDB::TP::TypeDatetime}}, {createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 6)}); context.addMockTable("cast", "t2", {{"a", TiDB::TP::TypeTimestamp}}, {createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 6)}); - executeWithConcurrency(cast_request(), {createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 0), createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 0)}); + executeAndAssertColumnsEqual(cast_request(), {createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 0), createDateTimeColumn({{{1970, 1, 1, 0, 0, 1, 0}}}, 0)}); } CATCH @@ -398,7 +386,7 @@ try .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")}) .build(context); - executeWithConcurrency(request, expected_cols[i]); + executeAndAssertColumnsEqual(request, expected_cols[i]); } } CATCH @@ -440,7 +428,7 @@ try auto request = t1 .join(t2, tipb::JoinType::TypeInnerJoin, {}, {}, {}, {cond}, {}) .build(context); - executeWithConcurrency(request, expected_cols[i++]); + executeAndAssertColumnsEqual(request, expected_cols[i++]); } { @@ -448,7 +436,7 @@ try auto request = t1 .join(t2, tipb::JoinType::TypeLeftOuterJoin, {}, {cond}, {}, {}, {}) .build(context); - executeWithConcurrency(request, expected_cols[i++]); + executeAndAssertColumnsEqual(request, expected_cols[i++]); } { @@ -456,7 +444,7 @@ try auto request = t1 .join(t2, tipb::JoinType::TypeRightOuterJoin, {}, {}, {cond}, {}, {}) .build(context); - executeWithConcurrency(request, expected_cols[i++]); + executeAndAssertColumnsEqual(request, expected_cols[i++]); } { @@ -464,7 +452,7 @@ try auto request = t1 .join(t2, tipb::JoinType::TypeSemiJoin, {}, {}, {}, {cond}, {}) .build(context); - executeWithConcurrency(request, expected_cols[i++]); + executeAndAssertColumnsEqual(request, expected_cols[i++]); } { @@ -472,7 +460,7 @@ try auto request = t1 .join(t2, tipb::JoinType::TypeAntiSemiJoin, {}, {}, {}, {cond}, {}) .build(context); - executeWithConcurrency(request, expected_cols[i++]); + executeAndAssertColumnsEqual(request, expected_cols[i++]); } { @@ -480,7 +468,7 @@ try auto request = t1 .join(t2, tipb::JoinType::TypeLeftOuterSemiJoin, {}, {}, {}, {cond}, {}) .build(context); - executeWithConcurrency(request, expected_cols[i++]); + executeAndAssertColumnsEqual(request, expected_cols[i++]); } { @@ -488,7 +476,7 @@ try auto request = t1 .join(t2, tipb::JoinType::TypeAntiLeftOuterSemiJoin, {}, {}, {}, {cond}, {}) .build(context); - executeWithConcurrency(request, expected_cols[i++]); + executeAndAssertColumnsEqual(request, expected_cols[i++]); } } CATCH @@ -501,7 +489,7 @@ try .join(context.scan("test_db", "r_table"), tipb::JoinType::TypeLeftOuterJoin, {col("join_c")}) .build(context); { - executeWithConcurrency(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); + executeAndAssertColumnsEqual(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); } request = context @@ -510,7 +498,7 @@ try .project({"s", "join_c"}) .build(context); { - executeWithConcurrency(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); + executeAndAssertColumnsEqual(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); } request = context @@ -518,7 +506,7 @@ try .join(context.scan("test_db", "r_table_2"), tipb::JoinType::TypeLeftOuterJoin, {col("join_c")}) .build(context); { - executeWithConcurrency(request, {toNullableVec({"banana", "banana", "banana", "banana"}), toNullableVec({"apple", "apple", "apple", "banana"}), toNullableVec({"banana", "banana", "banana", {}}), toNullableVec({"apple", "apple", "apple", {}})}); + executeAndAssertColumnsEqual(request, {toNullableVec({"banana", "banana", "banana", "banana"}), toNullableVec({"apple", "apple", "apple", "banana"}), toNullableVec({"banana", "banana", "banana", {}}), toNullableVec({"apple", "apple", "apple", {}})}); } } CATCH @@ -531,7 +519,7 @@ try .join(context.receive("exchange_r_table"), tipb::JoinType::TypeLeftOuterJoin, {col("join_c")}) .build(context); { - executeWithConcurrency(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); + executeAndAssertColumnsEqual(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); } } CATCH @@ -544,7 +532,7 @@ try .join(context.receive("exchange_r_table"), tipb::JoinType::TypeLeftOuterJoin, {col("join_c")}) .build(context); { - executeWithConcurrency(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); + executeAndAssertColumnsEqual(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); } } CATCH diff --git a/dbms/src/Flash/tests/gtest_limit_executor.cpp b/dbms/src/Flash/tests/gtest_limit_executor.cpp index 8f77b036bf0..239df250306 100644 --- a/dbms/src/Flash/tests/gtest_limit_executor.cpp +++ b/dbms/src/Flash/tests/gtest_limit_executor.cpp @@ -71,6 +71,8 @@ try WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols); WRAP_FOR_DIS_ENABLE_PLANNER_END + + executeAndAssertRowsEqual(request, std::min(limit_num, col_data_num)); } } CATCH diff --git a/dbms/src/Flash/tests/gtest_projection_executor.cpp b/dbms/src/Flash/tests/gtest_projection_executor.cpp index 68fcfcc6657..d7e90b6cb51 100644 --- a/dbms/src/Flash/tests/gtest_projection_executor.cpp +++ b/dbms/src/Flash/tests/gtest_projection_executor.cpp @@ -69,18 +69,6 @@ class ExecutorProjectionTestRunner : public DB::tests::ExecutorTest return context.scan(db_name, table_name).project(param).build(context); }; - static const size_t max_concurrency_level = 10; - - void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) - { - WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN - for (size_t i = 1; i <= max_concurrency_level; i += 2) - { - ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); - } - WRAP_FOR_DIS_ENABLE_PLANNER_END - } - /// Prepare column data const ColDataString col0{"col0-0", "col0-1", "", "col0-2", {}, "col0-3", ""}; const ColDataString col1{"col1-0", {}, "", "col1-1", "", "col1-2", "col1-3"}; @@ -111,29 +99,28 @@ try { /// Check single column auto request = buildDAGRequest({col_names[4]}); - executeWithConcurrency(request, {toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, {toNullableVec(col_names[4], col4_sorted_asc)}); /// Check multi columns request = buildDAGRequest({col_names[0], col_names[4]}); - executeWithConcurrency(request, - { - toNullableVec(col_names[0], col0_sorted_asc), - toNullableVec(col_names[4], col4_sorted_asc), - }); + executeAndAssertColumnsEqual( + request, + {toNullableVec(col_names[0], col0_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Check multi columns request = buildDAGRequest({col_names[0], col_names[1], col_names[4]}); - executeWithConcurrency(request, - {toNullableVec(col_names[0], col0_sorted_asc), - toNullableVec(col_names[1], col1_sorted_asc), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec(col_names[0], col0_sorted_asc), + toNullableVec(col_names[1], col1_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Check duplicate columns request = buildDAGRequest({col_names[4], col_names[4], col_names[4]}); - executeWithConcurrency(request, - {toNullableVec(col_names[4], col4_sorted_asc), - toNullableVec(col_names[4], col4_sorted_asc), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec(col_names[4], col4_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); { /// Check large number of columns @@ -149,7 +136,7 @@ try } request = buildDAGRequest(projection_input); - executeWithConcurrency(request, columns); + executeAndAssertColumnsEqual(request, columns); } } CATCH @@ -163,82 +150,82 @@ try /// Data type: TypeString request = buildDAGRequest({eq(col(col_names[0]), col(col_names[0])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); request = buildDAGRequest({eq(col(col_names[0]), col(col_names[1])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 0, 1, 0, {}, 0, 0}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 0, 1, 0, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Data type: TypeLong request = buildDAGRequest({eq(col(col_names[3]), col(col_names[4])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 0, 0, 0, {}, 1, 0}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 0, 0, 0, {}, 1, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Test "greater" function /// Data type: TypeString request = buildDAGRequest({gt(col(col_names[0]), col(col_names[1])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 0, 0, 0, {}, 0, 0}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 0, 0, 0, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); request = buildDAGRequest({gt(col(col_names[1]), col(col_names[0])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 1, 0, 1, {}, 1, 1}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 1, 0, 1, {}, 1, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Data type: TypeLong request = buildDAGRequest({gt(col(col_names[3]), col(col_names[4])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 0, 1, 1, {}, 0, 0}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 0, 1, 1, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); request = buildDAGRequest({gt(col(col_names[4]), col(col_names[3])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 1, 0, 0, {}, 0, 1}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 1, 0, 0, {}, 0, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Test "and" function /// Data type: TypeString request = buildDAGRequest({And(col(col_names[0]), col(col_names[0])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 0, 0, 0, 0, 0, 0}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 0, 0, 0, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); request = buildDAGRequest({And(col(col_names[0]), col(col_names[1])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({0, 0, 0, 0, 0, 0, 0}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({0, 0, 0, 0, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Data type: TypeLong request = buildDAGRequest({And(col(col_names[3]), col(col_names[4])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 1, 0, 0, {}, 1, 0}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 1, 0, 0, {}, 1, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Test "not" function /// Data type: TypeString request = buildDAGRequest({NOT(col(col_names[0])), NOT(col(col_names[1])), NOT(col(col_names[2])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), - toNullableVec({1, 1, 1, 1, {}, 1, 1}), - toNullableVec({1, {}, 1, 1, 1, 1, {}}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, {}, 1, 1}), + toNullableVec({1, {}, 1, 1, 1, 1, {}}), + toNullableVec(col_names[4], col4_sorted_asc)}); /// Data type: TypeLong request = buildDAGRequest({NOT(col(col_names[3])), NOT(col(col_names[4])), col(col_names[4])}); - executeWithConcurrency(request, - {toNullableVec({{}, 0, 1, 0, {}, 0, 1}), - toNullableVec({{}, 0, 0, 1, 0, 0, 0}), - toNullableVec(col_names[4], col4_sorted_asc)}); + executeAndAssertColumnsEqual(request, + {toNullableVec({{}, 0, 1, 0, {}, 0, 1}), + toNullableVec({{}, 0, 0, 1, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); /// TODO more functions... } @@ -261,7 +248,7 @@ try .scan("test_db", "test_table2") .project({functions[index]}) .build(context); - executeWithConcurrency(req, {functions_result[index]}); + executeAndAssertColumnsEqual(req, {functions_result[index]}); }; for (size_t i = 0; i < functions.size(); ++i) test_single_function(i); @@ -295,8 +282,8 @@ try fs.push_back(functions[k]); fs_result.push_back(functions_result[k]); - executeWithConcurrency(multi_functions(fs), fs_result); - executeWithConcurrency(multi_functions_then_agg(fs), createColumns({toVec({5})})); + executeAndAssertColumnsEqual(multi_functions(fs), fs_result); + executeAndAssertColumnsEqual(multi_functions_then_agg(fs), createColumns({toVec({5})})); fs.pop_back(); fs_result.pop_back(); @@ -314,10 +301,10 @@ try plusInt(plusInt(col("s1"), col("s2")), col("s3")), plusInt(plusInt(plusInt(col("s1"), col("s2")), col("s3")), col("s4"))}) .build(context); - executeWithConcurrency(req, - {toNullableVec({2, 2, 2, 2, 2}), - toNullableVec({3, 3, 3, 3, 3}), - toNullableVec({4, 4, 4, 4, 4})}); + executeAndAssertColumnsEqual(req, + {toNullableVec({2, 2, 2, 2, 2}), + toNullableVec({3, 3, 3, 3, 3}), + toNullableVec({4, 4, 4, 4, 4})}); } CATCH @@ -331,7 +318,7 @@ try .project({col("s1"), col("s2")}) .project({col("s1")}) .build(context); - executeWithConcurrency(req, {toNullableVec({1, 1, 1, 1, 1})}); + executeAndAssertColumnsEqual(req, {toNullableVec({1, 1, 1, 1, 1})}); req = context .scan("test_db", "test_table3") @@ -339,24 +326,24 @@ try .project({col("s1"), col("s2"), col("s3"), col("s4"), plusInt(plusInt(col("s1"), col("s2")), col("s3"))}) .project({plusInt(plusInt(plusInt(col("s1"), col("s2")), col("s3")), col("s4"))}) .build(context); - executeWithConcurrency(req, {toNullableVec({4, 4, 4, 4, 4})}); + executeAndAssertColumnsEqual(req, {toNullableVec({4, 4, 4, 4, 4})}); req = context .scan("test_db", "test_table3") .project({col("s1"), col("s2"), col("s3"), col("s4")}) .project({col("s1"), col("s2"), col("s3"), col("s4")}) .build(context); - executeWithConcurrency(req, - {toNullableVec({1, 1, 1, 1, 1}), - toNullableVec({1, 1, 1, 1, 1}), - toNullableVec({1, 1, 1, 1, 1}), - toNullableVec({1, 1, 1, 1, 1})}); + executeAndAssertColumnsEqual(req, + {toNullableVec({1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, 1})}); req = context .scan("test_db", "test_table3") .project({lit(Field(String("a")))}) .build(context); - executeWithConcurrency(req, {createColumns({toVec({"a", "a", "a", "a", "a"})})}); + executeAndAssertColumnsEqual(req, {createColumns({toVec({"a", "a", "a", "a", "a"})})}); req = context .scan("test_db", "test_table3") @@ -364,7 +351,7 @@ try .project(MockAstVec{}) .project({lit(Field(String("a")))}) .build(context); - executeWithConcurrency(req, {createColumns({toVec({"a", "a", "a", "a", "a"})})}); + executeAndAssertColumnsEqual(req, {createColumns({toVec({"a", "a", "a", "a", "a"})})}); req = context .scan("test_db", "test_table3") @@ -372,19 +359,19 @@ try .project(MockAstVec{}) .project({lit(Field(String("a")))}) .build(context); - executeWithConcurrency(req, {createColumns({toVec({"a", "a", "a", "a", "a"})})}); + executeAndAssertColumnsEqual(req, {createColumns({toVec({"a", "a", "a", "a", "a"})})}); req = context .scan("test_db", "test_table3") .project({col("s1")}) .project({col("s1"), col("s1"), col("s1"), col("s1"), col("s1")}) .build(context); - executeWithConcurrency(req, - {toNullableVec({1, 1, 1, 1, 1}), - toNullableVec({1, 1, 1, 1, 1}), - toNullableVec({1, 1, 1, 1, 1}), - toNullableVec({1, 1, 1, 1, 1}), - toNullableVec({1, 1, 1, 1, 1})}); + executeAndAssertColumnsEqual(req, + {toNullableVec({1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, 1})}); } CATCH @@ -396,7 +383,7 @@ try .project({col("s1"), col("s2"), col("s3"), col("s4")}) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(req, {createColumns({toVec({5})})}); + executeAndAssertColumnsEqual(req, {createColumns({toVec({5})})}); req = context.scan("test_db", "test_table3") .project({col("s1"), col("s2"), col("s3"), col("s4")}) @@ -405,7 +392,7 @@ try .project({col("s1")}) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(req, {createColumns({toVec({5})})}); + executeAndAssertColumnsEqual(req, {createColumns({toVec({5})})}); req = context.scan("test_db", "test_table3") .project({col("s1"), col("s2"), col("s3"), col("s4"), plusInt(col("s1"), col("s2"))}) @@ -413,7 +400,7 @@ try .project({plusInt(plusInt(plusInt(col("s1"), col("s2")), col("s3")), col("s4"))}) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(req, {createColumns({toVec({5})})}); + executeAndAssertColumnsEqual(req, {createColumns({toVec({5})})}); req = context .scan("test_db", "test_table3") @@ -422,18 +409,18 @@ try plusInt(plusInt(plusInt(col("s1"), col("s2")), col("s3")), col("s4"))}) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(req, {createColumns({toVec({5})})}); + executeAndAssertColumnsEqual(req, {createColumns({toVec({5})})}); req = context .scan("test_db", "test_table3") .project({col("s1")}) .aggregation({Count(col("s1")), Sum(col("s1")), Max(col("s1")), Min(col("s1"))}, {}) .build(context); - executeWithConcurrency(req, - {toVec({5}), - toVec({5}), - toNullableVec({1}), - toNullableVec({1})}); + executeAndAssertColumnsEqual(req, + {toVec({5}), + toVec({5}), + toNullableVec({1}), + toNullableVec({1})}); } CATCH diff --git a/dbms/src/Flash/tests/gtest_topn_executor.cpp b/dbms/src/Flash/tests/gtest_topn_executor.cpp index e18f7b8869d..461f0e8b4c9 100644 --- a/dbms/src/Flash/tests/gtest_topn_executor.cpp +++ b/dbms/src/Flash/tests/gtest_topn_executor.cpp @@ -78,7 +78,6 @@ class ExecutorTopNTestRunner : public DB::tests::ExecutorTest TEST_F(ExecutorTopNTestRunner, TopN) try { - WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN std::shared_ptr request; std::vector expect_cols; @@ -104,10 +103,7 @@ try else expect_cols.push_back({toNullableVec(single_col_name, ColumnWithString(col0.begin(), col0.begin() + limit_num))}); - ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); - ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), expect_cols[0]); - ASSERT_COLUMNS_EQ_R(executeStreams(request, 4), expect_cols[0]); - ASSERT_COLUMNS_EQ_R(executeStreams(request, 8), expect_cols[0]); + executeAndAssertColumnsEqual(request, expect_cols.back()); } } } @@ -140,10 +136,9 @@ try for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(table_name, order_by_items[i], 100); - ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[i]); + executeAndAssertColumnsEqual(request, expect_cols[i]); } } - WRAP_FOR_DIS_ENABLE_PLANNER_END } CATCH @@ -161,7 +156,6 @@ try ASTPtr col3_ast = col(col_name[3]); ASTPtr func_ast; - WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN { /// "and" function expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{{}, {}, 32, 27, 36, 34}), @@ -176,7 +170,7 @@ try func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); - ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + executeAndAssertColumnsEqual(request, expect_cols.back()); } } @@ -194,7 +188,7 @@ try func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); - ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + executeAndAssertColumnsEqual(request, expect_cols.back()); } } @@ -212,12 +206,11 @@ try func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); - ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + executeAndAssertColumnsEqual(request, expect_cols.back()); } } /// TODO more functions... - WRAP_FOR_DIS_ENABLE_PLANNER_END } CATCH diff --git a/dbms/src/Flash/tests/gtest_window_executor.cpp b/dbms/src/Flash/tests/gtest_window_executor.cpp index 147c26ff237..022ea2c5e32 100644 --- a/dbms/src/Flash/tests/gtest_window_executor.cpp +++ b/dbms/src/Flash/tests/gtest_window_executor.cpp @@ -67,27 +67,10 @@ class WindowExecutorTestRunner : public DB::tests::ExecutorTest toVec("value", {"a", "b", "c", "d", "e", "f", "g", "h"})}); } - void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) - { - WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN - ASSERT_COLUMNS_EQ_R(expect_columns, executeStreams(request)); - for (size_t i = 2; i <= max_concurrency_level; ++i) - { - ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); - } - WRAP_FOR_DIS_ENABLE_PLANNER_END - } - void executeWithTableScanAndConcurrency(const std::shared_ptr & request, const String & db, const String & table_name, const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & expect_columns) { context.addMockTableColumnData(db, table_name, source_columns); - ASSERT_COLUMNS_EQ_R(expect_columns, executeStreams(request)); - WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN - for (size_t i = 2; i <= max_concurrency_level; ++i) - { - ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); - } - WRAP_FOR_DIS_ENABLE_PLANNER_END + executeAndAssertColumnsEqual(request, expect_columns); } }; @@ -101,7 +84,7 @@ try .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) .build(context); - executeWithConcurrency( + executeAndAssertColumnsEqual( request, createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), @@ -132,10 +115,10 @@ try .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) .build(context); - executeWithConcurrency(request, - createColumns({toNullableVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), - toNullableVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); + executeAndAssertColumnsEqual(request, + createColumns({toNullableVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // nullable executeWithTableScanAndConcurrency(request, @@ -154,10 +137,10 @@ try .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) .build(context); - executeWithConcurrency(request, - createColumns({toNullableVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), - toNullableVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); + executeAndAssertColumnsEqual(request, + createColumns({toNullableVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // nullable executeWithTableScanAndConcurrency(request, @@ -202,20 +185,20 @@ try .window(RowNumber(), {{"order1", false}, {"order2", false}}, {{"partition1", false}, {"partition2", false}}, buildDefaultRowsFrame()) .build(context); - executeWithConcurrency(request, - createColumns({toNullableVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), - toNullableVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), - toNullableVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), - toNullableVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), - toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})})); + executeAndAssertColumnsEqual(request, + createColumns({toNullableVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + toNullableVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), + toNullableVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), + toNullableVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})})); /***** rank, dense_rank *****/ request = context.scan("test_db", "test_table_for_rank").sort({{"partition", false}, {"order", false}}, true).window({Rank(), DenseRank()}, {{"order", false}}, {{"partition", false}}, MockWindowFrame{}).build(context); - executeWithConcurrency(request, - createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), - toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})})); + executeAndAssertColumnsEqual(request, + createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})})); // nullable executeWithTableScanAndConcurrency(request, @@ -252,10 +235,10 @@ try .sort({{"partition", false}, {"order", false}}, true) .window(functions[index], {"order", false}, {"partition", false}, MockWindowFrame{}) .build(context); - executeWithConcurrency(request, - createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), - functions_result[index]})); + executeAndAssertColumnsEqual(request, + createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + functions_result[index]})); }; for (size_t i = 0; i < functions.size(); ++i) test_single_window_function(i); @@ -292,8 +275,8 @@ try wfs.push_back(functions[k]); wfs_result.push_back(functions_result[k]); - executeWithConcurrency(gen_merge_window_request(wfs), wfs_result); - executeWithConcurrency(gen_split_window_request(wfs), wfs_result); + executeAndAssertColumnsEqual(gen_merge_window_request(wfs), wfs_result); + executeAndAssertColumnsEqual(gen_split_window_request(wfs), wfs_result); wfs.pop_back(); wfs_result.pop_back(); @@ -326,7 +309,7 @@ try .window(RowNumber(), {"order", true}, {"partition", false}, buildDefaultRowsFrame()) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(request, createColumns({toVec({8})})); + executeAndAssertColumnsEqual(request, createColumns({toVec({8})})); /* select count(1) from ( @@ -343,7 +326,7 @@ try .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(request, createColumns({toVec({8})})); + executeAndAssertColumnsEqual(request, createColumns({toVec({8})})); request = context .scan("test_db", "test_table") @@ -351,7 +334,7 @@ try .window({RowNumber(), RowNumber()}, {{"order", false}}, {{"partition", false}}, buildDefaultRowsFrame()) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(request, createColumns({toVec({8})})); + executeAndAssertColumnsEqual(request, createColumns({toVec({8})})); /* select count(1) from ( @@ -368,7 +351,7 @@ try .window(DenseRank(), {"order", false}, {"partition", false}, MockWindowFrame()) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(request, createColumns({toVec({8})})); + executeAndAssertColumnsEqual(request, createColumns({toVec({8})})); request = context .scan("test_db", "test_table") @@ -376,7 +359,7 @@ try .window({Rank(), DenseRank()}, {{"order", false}}, {{"partition", false}}, MockWindowFrame()) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(request, createColumns({toVec({8})})); + executeAndAssertColumnsEqual(request, createColumns({toVec({8})})); /* select count(1) from ( @@ -393,7 +376,7 @@ try .window({DenseRank(), DenseRank(), Rank()}, {{"order", false}}, {{"partition", false}}, MockWindowFrame()) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(request, createColumns({toVec({8})})); + executeAndAssertColumnsEqual(request, createColumns({toVec({8})})); request = context .scan("test_db", "test_table") @@ -403,7 +386,7 @@ try .window(Rank(), {"order", false}, {"partition", false}, MockWindowFrame()) .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); - executeWithConcurrency(request, createColumns({toVec({8})})); + executeAndAssertColumnsEqual(request, createColumns({toVec({8})})); } CATCH @@ -420,7 +403,7 @@ try .window(Lead1(concat(col("value"), col("value"))), {"order", false}, {"partition", false}, MockWindowFrame()) .build(context); result.emplace_back(toNullableVec({"bb", "cc", "dd", {}, "ff", "gg", "hh", {}})); - executeWithConcurrency(request, result); + executeAndAssertColumnsEqual(request, result); result.pop_back(); request = context @@ -429,7 +412,7 @@ try .window(Lag2(concat(col("value"), lit(Field(String("0")))), lit(Field(static_cast(2)))), {"order", false}, {"partition", false}, MockWindowFrame()) .build(context); result.emplace_back(toNullableVec({{}, {}, "a0", "b0", {}, {}, "e0", "f0"})); - executeWithConcurrency(request, result); + executeAndAssertColumnsEqual(request, result); result.pop_back(); request = context @@ -438,7 +421,7 @@ try .window(Lead2(concat(col("value"), concat(lit(Field(String("0"))), col("value"))), lit(Field(static_cast(1)))), {"order", false}, {"partition", false}, MockWindowFrame()) .build(context); result.emplace_back(toNullableVec({"b0b", "c0c", "d0d", {}, "f0f", "g0g", "h0h", {}})); - executeWithConcurrency(request, result); + executeAndAssertColumnsEqual(request, result); } CATCH diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp index 3d8a028d88c..93e250284ff 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.cpp +++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp @@ -104,6 +104,66 @@ void ExecutorTest::executeInterpreter(const String & expected_string, const std: ASSERT_EQ(Poco::trim(expected_string), Poco::trim(fb.toString())); } +void ExecutorTest::executeExecutor( + const std::shared_ptr & request, + std::function<::testing::AssertionResult(const ColumnsWithTypeAndName &)> assert_func) +{ + WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN + std::vector concurrencies{1, 2, 10}; + for (auto concurrency : concurrencies) + { + std::vector block_sizes{1, 2, DEFAULT_BLOCK_SIZE}; + for (auto block_size : block_sizes) + { + context.context.setSetting("max_block_size", Field(static_cast(block_size))); + auto test_info_msg = [&]() { + const auto & test_info = testing::UnitTest::GetInstance()->current_test_info(); + assert(test_info); + return fmt::format( + "test info:\n" + " file: {}\n" + " line: {}\n" + " test_case_name: {}\n" + " test_func_name: {}\n" + " enable_planner: {}\n" + " concurrency: {}\n" + " block_size: {}\n" + " dag_request: \n{}", + test_info->file(), + test_info->line(), + test_info->test_case_name(), + test_info->name(), + enable_planner, + concurrency, + block_size, + ExecutorSerializer().serialize(request.get())); + }; + ASSERT_TRUE(assert_func(executeStreams(request, concurrency))) << test_info_msg(); + } + } + WRAP_FOR_DIS_ENABLE_PLANNER_END +} + +void ExecutorTest::executeAndAssertColumnsEqual(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) +{ + executeExecutor(request, [&](const ColumnsWithTypeAndName & res) { + return columnsEqual(expect_columns, res, /*_restrict=*/false); + }); +} + +void ExecutorTest::executeAndAssertRowsEqual(const std::shared_ptr & request, size_t expect_rows) +{ + executeExecutor(request, [&](const ColumnsWithTypeAndName & res) { + auto actual_rows = Block(res).rows(); + if (expect_rows != actual_rows) + { + String msg = fmt::format("\nColumns rows mismatch\nexpected_rows: {}\nactual_rows: {}", expect_rows, actual_rows); + return testing::AssertionFailure() << msg; + } + return testing::AssertionSuccess(); + }); +} + namespace { Block mergeBlocks(Blocks blocks) diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index 025551281e1..1402caf4afa 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -23,20 +23,20 @@ #include #include +#include + namespace DB::tests { TiDB::TP dataTypeToTP(const DataTypePtr & type); -void executeInterpreter(const std::shared_ptr & request, Context & context); - DB::ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream); DB::ColumnsWithTypeAndName readBlocks(std::vector streams); #define WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN \ std::vector bools{false, true}; \ - for (auto flag : bools) \ + for (auto enable_planner : bools) \ { \ - enablePlanner(flag); + enablePlanner(enable_planner); #define WRAP_FOR_DIS_ENABLE_PLANNER_END } @@ -68,6 +68,9 @@ class ExecutorTest : public ::testing::Test void executeInterpreter(const String & expected_string, const std::shared_ptr & request, size_t concurrency); + void executeAndAssertColumnsEqual(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns); + void executeAndAssertRowsEqual(const std::shared_ptr & request, size_t expect_rows); + enum SourceType { TableScan, @@ -94,6 +97,11 @@ class ExecutorTest : public ::testing::Test const std::shared_ptr & request, size_t concurrency = 1); +private: + void executeExecutor( + const std::shared_ptr & request, + std::function<::testing::AssertionResult(const ColumnsWithTypeAndName &)> assert_func); + protected: MockDAGRequestContext context; std::unique_ptr dag_context_ptr; diff --git a/dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp b/dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp index 2ef85f35597..1a8d9a7bcfc 100644 --- a/dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp +++ b/dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp @@ -41,10 +41,8 @@ class LeadLag : public DB::tests::ExecutorTest { context.context.setSetting("max_block_size", Field(static_cast(block_size))); ASSERT_COLUMNS_EQ_R(expect_columns, executeStreams(request)); - for (size_t i = 2; i <= max_concurrency_level; ++i) - { - ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); - } + ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, 2)); + ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, max_concurrency_level)); } WRAP_FOR_DIS_ENABLE_PLANNER_END } From f07fe50d19db40ee3244665213226d94288ab853 Mon Sep 17 00:00:00 2001 From: hehechen Date: Thu, 15 Sep 2022 13:02:59 +0800 Subject: [PATCH 04/17] Use safe_ts to determine whether the peer's data synchronization progress is far behind the leader. (#5866) ref pingcap/tiflash#4902 --- contrib/tiflash-proxy | 2 +- dbms/src/Storages/Transaction/ProxyFFI.cpp | 7 ++ dbms/src/Storages/Transaction/ProxyFFI.h | 2 + .../Transaction/ProxyFFIStatusService.cpp | 13 +++- dbms/src/Storages/Transaction/RegionTable.cpp | 6 ++ dbms/src/Storages/Transaction/RegionTable.h | 64 ++++++++++++++++--- 6 files changed, 81 insertions(+), 13 deletions(-) diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index 91c7f1ec241..1ffec9a6b5c 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit 91c7f1ec241f1e40b76c55c1cde70b048adc4a2f +Subproject commit 1ffec9a6b5ce7f081151f4462ca6a6d618dde9eb diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index f0a8f35cc99..f2c5d1f8c3c 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -559,4 +559,11 @@ raft_serverpb::RegionLocalState TiFlashRaftProxyHelper::getRegionLocalState(uint return state; } +void HandleSafeTSUpdate(EngineStoreServerWrap * server, uint64_t region_id, uint64_t self_safe_ts, uint64_t leader_safe_ts) +{ + RegionTable & region_table = server->tmt->getRegionTable(); + region_table.updateSelfSafeTS(region_id, self_safe_ts); + region_table.updateLeaderSafeTS(region_id, leader_safe_ts); + LOG_FMT_TRACE(&Poco::Logger::get(__FUNCTION__), "update safe ts in region_id={}, leader_safe_ts={}, self_safe_ts={}", region_id, leader_safe_ts, self_safe_ts); +} } // namespace DB diff --git a/dbms/src/Storages/Transaction/ProxyFFI.h b/dbms/src/Storages/Transaction/ProxyFFI.h index a4607396942..7107d9f0c5c 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.h +++ b/dbms/src/Storages/Transaction/ProxyFFI.h @@ -147,6 +147,7 @@ BaseBuffView strIntoView(const std::string * str_ptr); CppStrWithView GetConfig(EngineStoreServerWrap *, uint8_t full); void SetStore(EngineStoreServerWrap *, BaseBuffView); void SetPBMsByBytes(MsgPBType type, RawVoidPtr ptr, BaseBuffView view); +void HandleSafeTSUpdate(EngineStoreServerWrap * server, uint64_t region_id, uint64_t self_safe_ts, uint64_t leader_safe_ts); } inline EngineStoreServerHelper GetEngineStoreServerHelper( @@ -175,6 +176,7 @@ inline EngineStoreServerHelper GetEngineStoreServerHelper( .fn_get_config = GetConfig, .fn_set_store = SetStore, .fn_set_pb_msg_by_bytes = SetPBMsByBytes, + .fn_handle_safe_ts_update = HandleSafeTSUpdate, }; } } // namespace DB diff --git a/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp b/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp index 792f149f588..d2eb4454b05 100644 --- a/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp @@ -55,11 +55,18 @@ HttpRequestRes HandleHttpRequestSyncStatus( // if storage is not created in ch, flash replica should not be available. if (tmt.getStorages().get(table_id)) { - tmt.getRegionTable().handleInternalRegionsByTable(table_id, [&](const RegionTable::InternalRegions & regions) { - count = regions.size(); + RegionTable & region_table = tmt.getRegionTable(); + region_table.handleInternalRegionsByTable(table_id, [&](const RegionTable::InternalRegions & regions) { region_list.reserve(regions.size()); for (const auto & region : regions) - region_list.push_back(region.first); + { + if (!region_table.isSafeTSLag(region.first)) + { + region_list.push_back(region.first); + } + } + count = region_list.size(); + LOG_FMT_DEBUG(&Poco::Logger::get(__FUNCTION__), "table_id={}, total_region_count={}, ready_region_count={}", table_id, regions.size(), count); }); } ss << count << std::endl; diff --git a/dbms/src/Storages/Transaction/RegionTable.cpp b/dbms/src/Storages/Transaction/RegionTable.cpp index 5ae36a4bd64..3bf2d8457cb 100644 --- a/dbms/src/Storages/Transaction/RegionTable.cpp +++ b/dbms/src/Storages/Transaction/RegionTable.cpp @@ -185,7 +185,11 @@ void RegionTable::removeTable(TableID table_id) // Remove from region list. for (const auto & region_info : table.regions) + { regions.erase(region_info.first); + leader_safe_ts.erase(region_info.first); + self_safe_ts.erase(region_info.first); + } // Remove from table map. tables.erase(it); @@ -263,6 +267,8 @@ void RegionTable::removeRegion(const RegionID region_id, bool remove_data, const handle_range = internal_region_it->second.range_in_table; regions.erase(it); + leader_safe_ts.erase(region_id); + self_safe_ts.erase(region_id); table.regions.erase(internal_region_it); if (table.regions.empty()) { diff --git a/dbms/src/Storages/Transaction/RegionTable.h b/dbms/src/Storages/Transaction/RegionTable.h index 717b1cd568f..171e3aba08c 100644 --- a/dbms/src/Storages/Transaction/RegionTable.h +++ b/dbms/src/Storages/Transaction/RegionTable.h @@ -23,9 +23,11 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -55,6 +57,12 @@ struct RegionPtrWithSnapshotFiles; class RegionScanFilter; using RegionScanFilterPtr = std::shared_ptr; +using SafeTS = UInt64; +enum : SafeTS +{ + InvalidSafeTS = std::numeric_limits::max() +}; + class RegionTable : private boost::noncopyable { public: @@ -76,7 +84,7 @@ class RegionTable : private boost::noncopyable struct Table : boost::noncopyable { - Table(const TableID table_id_) + explicit Table(const TableID table_id_) : table_id(table_id_) {} TableID table_id; @@ -85,6 +93,7 @@ class RegionTable : private boost::noncopyable using TableMap = std::unordered_map; using RegionInfoMap = std::unordered_map; + using SafeTsMap = std::unordered_map; using DirtyRegions = std::unordered_set; using TableToOptimize = std::unordered_set; @@ -93,7 +102,7 @@ class RegionTable : private boost::noncopyable { using FlushThresholdsData = std::vector>; - FlushThresholds(FlushThresholdsData && data_) + explicit FlushThresholds(FlushThresholdsData && data_) : data(std::make_shared(std::move(data_))) {} @@ -117,7 +126,7 @@ class RegionTable : private boost::noncopyable mutable std::mutex mutex; }; - RegionTable(Context & context_); + explicit RegionTable(Context & context_); void restore(); void setFlushThresholds(const FlushThresholds::FlushThresholdsData & flush_thresholds_); @@ -127,14 +136,14 @@ class RegionTable : private boost::noncopyable /// This functional only shrink the table range of this region_id void shrinkRegionRange(const Region & region); - void removeRegion(const RegionID region_id, bool remove_data, const RegionTaskLock &); + void removeRegion(RegionID region_id, bool remove_data, const RegionTaskLock &); bool tryFlushRegions(); RegionDataReadInfoList tryFlushRegion(RegionID region_id, bool try_persist = false); RegionDataReadInfoList tryFlushRegion(const RegionPtrWithBlock & region, bool try_persist); - void handleInternalRegionsByTable(const TableID table_id, std::function && callback) const; - std::vector> getRegionsByTable(const TableID table_id) const; + void handleInternalRegionsByTable(TableID table_id, std::function && callback) const; + std::vector> getRegionsByTable(TableID table_id) const; /// Write the data of the given region into the table with the given table ID, fill the data list for outer to remove. /// Will trigger schema sync on read error for only once, @@ -159,17 +168,49 @@ class RegionTable : private boost::noncopyable Poco::Logger * log); /// extend range for possible InternalRegion or add one. - void extendRegionRange(const RegionID region_id, const RegionRangeKeys & region_range_keys); + void extendRegionRange(RegionID region_id, const RegionRangeKeys & region_range_keys); + void updateSelfSafeTS(UInt64 region_id, UInt64 safe_ts) + { + if (safe_ts == InvalidSafeTS) + { + return; + } + std::lock_guard lock(mutex); + self_safe_ts[region_id] = safe_ts; + } + void updateLeaderSafeTS(UInt64 region_id, UInt64 safe_ts) + { + if (safe_ts == InvalidSafeTS) + { + return; + } + std::lock_guard lock(mutex); + leader_safe_ts[region_id] = safe_ts; + } + + // unit: ms. If safe_ts diff is larger than 2min, we think the data synchronization progress is far behind the leader. + static const UInt64 SafeTsDiffThreshold = 2 * 60 * 1000; + bool isSafeTSLag(UInt64 region_id) + { + auto self_it = self_safe_ts.find(region_id); + auto leader_it = leader_safe_ts.find(region_id); + if (self_it == self_safe_ts.end() || leader_it == leader_safe_ts.end()) + { + return false; + } + LOG_FMT_TRACE(log, "region_id:{}, table_id:{}, leader_safe_ts:{}, self_safe_ts:{}", region_id, regions[region_id], leader_it->second, self_it->second); + return (leader_it->second > self_it->second) && (leader_it->second - self_it->second > SafeTsDiffThreshold); + } private: friend class MockTiDB; friend class StorageDeltaMerge; - Table & getOrCreateTable(const TableID table_id); + Table & getOrCreateTable(TableID table_id); void removeTable(TableID table_id); InternalRegion & insertRegion(Table & table, const Region & region); InternalRegion & getOrInsertRegion(const Region & region); - InternalRegion & insertRegion(Table & table, const RegionRangeKeys & region_range_keys, const RegionID region_id); + InternalRegion & insertRegion(Table & table, const RegionRangeKeys & region_range_keys, RegionID region_id); InternalRegion & doGetInternalRegion(TableID table_id, RegionID region_id); RegionDataReadInfoList flushRegion(const RegionPtrWithBlock & region, bool try_persist) const; @@ -179,6 +220,11 @@ class RegionTable : private boost::noncopyable private: TableMap tables; RegionInfoMap regions; + // safe_ts is maintained by check_leader RPC (https://github.com/tikv/tikv/blob/1ea26a2ac8761af356cc5c0825eb89a0b8fc9749/components/resolved_ts/src/advance.rs#L262), + // leader_safe_ts is the safe_ts in leader, leader will send to learner to advance safe_ts of learner, and TiFlash will record the safe_ts in check_leader RPC as leader_safe_ts. + SafeTsMap leader_safe_ts; + // self_safe_ts is the safe_ts in learner. When TiFlash proxy receive from leader, TiFlash will update self_safe_ts when TiFlash has applied the raft log to applied_index. + SafeTsMap self_safe_ts; DirtyRegions dirty_regions; FlushThresholds flush_thresholds; From 0d0cde5a1057144a058386fc31b94bcbf343e500 Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Thu, 15 Sep 2022 14:08:59 +0800 Subject: [PATCH 05/17] storage: require lock for segment operations (#5894) ref pingcap/tiflash#5237 --- dbms/src/Storages/DeltaMerge/DeltaMergeStore.h | 4 ++-- .../DeltaMergeStore_InternalSegment.cpp | 6 +++--- dbms/src/Storages/DeltaMerge/Segment.cpp | 15 +++++++++------ dbms/src/Storages/DeltaMerge/Segment.h | 15 ++++++++++++++- .../DeltaMerge/tests/gtest_dm_segment.cpp | 10 +++++----- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 7d4f9a6c1d9..acef24e0c42 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -512,11 +512,11 @@ class DeltaMergeStore : private boost::noncopyable bool handleBackgroundTask(bool heavy); // isSegmentValid should be protected by lock on `read_write_mutex` - inline bool isSegmentValid(std::shared_lock &, const SegmentPtr & segment) + inline bool isSegmentValid(const std::shared_lock &, const SegmentPtr & segment) { return doIsSegmentValid(segment); } - inline bool isSegmentValid(std::unique_lock &, const SegmentPtr & segment) + inline bool isSegmentValid(const std::unique_lock &, const SegmentPtr & segment) { return doIsSegmentValid(segment); } diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp index 39a6e58d0c6..5beaf23b3bb 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp @@ -116,7 +116,7 @@ SegmentPair DeltaMergeStore::segmentSplit(DMContext & dm_context, const SegmentP auto segment_lock = segment->mustGetUpdateLock(); - std::tie(new_left, new_right) = segment->applySplit(dm_context, segment_snap, wbs, split_info); + std::tie(new_left, new_right) = segment->applySplit(segment_lock, dm_context, segment_snap, wbs, split_info); wbs.writeMeta(); @@ -278,7 +278,7 @@ SegmentPtr DeltaMergeStore::segmentMerge(DMContext & dm_context, const std::vect for (const auto & seg : ordered_segments) locks.emplace_back(seg->mustGetUpdateLock()); - merged = Segment::applyMerge(dm_context, ordered_segments, ordered_snapshots, wbs, merged_stable); + merged = Segment::applyMerge(locks, dm_context, ordered_segments, ordered_snapshots, wbs, merged_stable); wbs.writeMeta(); @@ -412,7 +412,7 @@ SegmentPtr DeltaMergeStore::segmentMergeDelta( auto segment_lock = segment->mustGetUpdateLock(); - new_segment = segment->applyMergeDelta(dm_context, segment_snap, wbs, new_stable); + new_segment = segment->applyMergeDelta(segment_lock, dm_context, segment_snap, wbs, new_stable); wbs.writeMeta(); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index e567322a0e8..fdaef7f8401 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -640,7 +640,7 @@ SegmentPtr Segment::mergeDelta(DMContext & dm_context, const ColumnDefinesPtr & SYNC_FOR("before_Segment::applyMergeDelta"); // pause without holding the lock on the segment auto lock = mustGetUpdateLock(); - auto new_segment = applyMergeDelta(dm_context, segment_snap, wbs, new_stable); + auto new_segment = applyMergeDelta(lock, dm_context, segment_snap, wbs, new_stable); wbs.writeAll(); return new_segment; @@ -674,7 +674,8 @@ StableValueSpacePtr Segment::prepareMergeDelta(DMContext & dm_context, return new_stable; } -SegmentPtr Segment::applyMergeDelta(DMContext & context, +SegmentPtr Segment::applyMergeDelta(const Segment::Lock &, // + DMContext & context, const SegmentSnapshotPtr & segment_snap, WriteBatches & wbs, const StableValueSpacePtr & new_stable) const @@ -731,7 +732,7 @@ SegmentPair Segment::split(DMContext & dm_context, const ColumnDefinesPtr & sche SYNC_FOR("before_Segment::applySplit"); // pause without holding the lock on the segment auto lock = mustGetUpdateLock(); - auto segment_pair = applySplit(dm_context, segment_snap, wbs, split_info); + auto segment_pair = applySplit(lock, dm_context, segment_snap, wbs, split_info); wbs.writeAll(); @@ -1141,7 +1142,8 @@ std::optional Segment::prepareSplitPhysical(DMContext & dm_c return {SplitInfo{false, split_point, my_new_stable, other_stable}}; } -SegmentPair Segment::applySplit(DMContext & dm_context, // +SegmentPair Segment::applySplit(const Segment::Lock &, // + DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, WriteBatches & wbs, SplitInfo & split_info) const @@ -1241,7 +1243,7 @@ SegmentPtr Segment::merge(DMContext & dm_context, const ColumnDefinesPtr & schem for (const auto & seg : ordered_segments) locks.emplace_back(seg->mustGetUpdateLock()); - auto merged = applyMerge(dm_context, ordered_segments, ordered_snapshots, wbs, merged_stable); + auto merged = applyMerge(locks, dm_context, ordered_segments, ordered_snapshots, wbs, merged_stable); wbs.writeAll(); return merged; @@ -1326,7 +1328,8 @@ StableValueSpacePtr Segment::prepareMerge(DMContext & dm_context, // return merged_stable; } -SegmentPtr Segment::applyMerge(DMContext & dm_context, // +SegmentPtr Segment::applyMerge(const std::vector &, // + DMContext & dm_context, const std::vector & ordered_segments, const std::vector & ordered_snapshots, WriteBatches & wbs, diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index 4005c0fa431..422bb87b125 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -69,6 +69,7 @@ class Segment : private boost::noncopyable { public: using DeltaTree = DefaultDeltaTree; + using Lock = DeltaValueSpace::Lock; struct ReadInfo { @@ -207,7 +208,11 @@ class Segment : private boost::noncopyable const SegmentSnapshotPtr & segment_snap, WriteBatches & wbs) const; + /** + * Should be protected behind the Segment update lock. + */ [[nodiscard]] SegmentPair applySplit( + const Lock &, DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, WriteBatches & wbs, @@ -229,7 +234,11 @@ class Segment : private boost::noncopyable const std::vector & ordered_snapshots, WriteBatches & wbs); + /** + * Should be protected behind the update lock for all related segments. + */ [[nodiscard]] static SegmentPtr applyMerge( + const std::vector &, DMContext & dm_context, const std::vector & ordered_segments, const std::vector & ordered_snapshots, @@ -247,7 +256,12 @@ class Segment : private boost::noncopyable const ColumnDefinesPtr & schema_snap, const SegmentSnapshotPtr & segment_snap, WriteBatches & wbs) const; + + /** + * Should be protected behind the Segment update lock. + */ [[nodiscard]] SegmentPtr applyMergeDelta( + const Lock &, DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, WriteBatches & wbs, @@ -283,7 +297,6 @@ class Segment : private boost::noncopyable static String simpleInfo(const std::vector & segments); static String info(const std::vector & segments); - using Lock = DeltaValueSpace::Lock; bool getUpdateLock(Lock & lock) const { return delta->getLock(lock); } Lock mustGetUpdateLock() const diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 79ae30298ff..b5de675d2ee 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -1128,7 +1128,7 @@ try split_info->other_stable->enableDMFilesGC(); auto lock = segment->mustGetUpdateLock(); - std::tie(segment, other_segment) = segment->applySplit(dmContext(), segment_snap, wbs, split_info.value()); + std::tie(segment, other_segment) = segment->applySplit(lock, dmContext(), segment_snap, wbs, split_info.value()); wbs.writeAll(); } @@ -1157,10 +1157,10 @@ try wbs.writeLogAndData(); merged_stable->enableDMFilesGC(); - auto left_lock = segment->mustGetUpdateLock(); - auto right_lock = other_segment->mustGetUpdateLock(); - - segment = Segment::applyMerge(dmContext(), {segment, other_segment}, {left_snap, right_snap}, wbs, merged_stable); + std::vector locks; + locks.emplace_back(segment->mustGetUpdateLock()); + locks.emplace_back(other_segment->mustGetUpdateLock()); + segment = Segment::applyMerge(locks, dmContext(), {segment, other_segment}, {left_snap, right_snap}, wbs, merged_stable); wbs.writeAll(); } From f4e976bc3a199f8d1ad0975c0d6de8835c862c85 Mon Sep 17 00:00:00 2001 From: S1mple Date: Thu, 15 Sep 2022 14:46:59 +0800 Subject: [PATCH 06/17] Introduce magic_enum into tiflash (#5843) ref pingcap/tiflash#5758 --- .gitmodules | 3 + contrib/CMakeLists.txt | 2 + contrib/magic_enum | 1 + dbms/CMakeLists.txt | 1 + dbms/src/Common/tests/gtest_magic_enum.cpp | 59 +++++++++++++++++ .../Storages/DeltaMerge/DeltaMergeStore.cpp | 9 +-- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 65 ------------------- .../DeltaMerge/DeltaMergeStore_InternalBg.cpp | 8 ++- .../DeltaMergeStore_InternalSegment.cpp | 4 +- 9 files changed, 79 insertions(+), 73 deletions(-) create mode 160000 contrib/magic_enum create mode 100644 dbms/src/Common/tests/gtest_magic_enum.cpp diff --git a/.gitmodules b/.gitmodules index ebf82d2aa55..231a1fa779d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -83,3 +83,6 @@ [submodule "contrib/arm-optimized-routines"] path = contrib/arm-optimized-routines url = https://github.com/ARM-software/optimized-routines +[submodule "contrib/magic_enum"] + path = contrib/magic_enum + url = https://github.com/Neargye/magic_enum.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 043327e54ab..46286aa204d 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -166,3 +166,5 @@ endif() if (ARCH_AARCH64 AND ARCH_LINUX) add_subdirectory(arm-optimized-routines-cmake) endif () + +add_subdirectory(magic_enum) diff --git a/contrib/magic_enum b/contrib/magic_enum new file mode 160000 index 00000000000..43a9272f450 --- /dev/null +++ b/contrib/magic_enum @@ -0,0 +1 @@ +Subproject commit 43a9272f450fd07e85868ed277ebd793e40806df diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 8ae86abea13..376c8b48ec4 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -192,6 +192,7 @@ target_link_libraries (clickhouse_common_io prometheus-cpp::push prometheus-cpp::pull cpptoml + magic_enum libsymbolization ) target_include_directories (clickhouse_common_io BEFORE PRIVATE ${kvClient_SOURCE_DIR}/include) diff --git a/dbms/src/Common/tests/gtest_magic_enum.cpp b/dbms/src/Common/tests/gtest_magic_enum.cpp new file mode 100644 index 00000000000..e0214e7d438 --- /dev/null +++ b/dbms/src/Common/tests/gtest_magic_enum.cpp @@ -0,0 +1,59 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +namespace DB::tests +{ +TEST(MagicEnumTest, EnumConversion) +{ + using crc64::Mode; + // mode_entries -> {{Mode::Table, "Table"}, {Mode::Auto, "Auto"}, {Mode::SIMD_128, "SIMD_128"}...} + // mode_entries[0].first -> Mode::Table + // mode_entries[0].second -> "Table" + constexpr auto mode_entries = magic_enum::enum_entries(); + ASSERT_EQ(mode_entries.size(), magic_enum::enum_names().size()); + ASSERT_EQ(mode_entries.size(), magic_enum::enum_values().size()); + ASSERT_EQ(mode_entries.size(), magic_enum::enum_count()); + + for (const auto & entry : mode_entries) + { + // enum value to string + ASSERT_EQ(magic_enum::enum_name(entry.first), entry.second); + // string to enum value + auto mode = magic_enum::enum_cast(entry.second); + ASSERT_TRUE(mode.has_value()); + ASSERT_EQ(entry.first, mode); + } + + // enum value to integer + int mode_integer = 2; + auto mode_from_int = magic_enum::enum_cast(mode_integer); + ASSERT_TRUE(mode_from_int.has_value()); + ASSERT_EQ(mode_from_int.value(), Mode::SIMD_128); + + // indexed access to enum value + std::size_t index = 1; + ASSERT_EQ(magic_enum::enum_value(index), Mode::Auto); + + // edge cases + ASSERT_FALSE(magic_enum::enum_cast("table").has_value()); + ASSERT_FALSE(magic_enum::enum_cast("auto").has_value()); + ASSERT_FALSE(magic_enum::enum_cast(-1).has_value()); + ASSERT_FALSE(magic_enum::enum_cast(99999).has_value()); +} +} // namespace DB::tests diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 018c601d609..15babb12afa 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -43,6 +43,7 @@ #include #include +#include #include namespace ProfileEvents @@ -123,15 +124,15 @@ std::pair DeltaMergeStore::MergeDeltaTaskPool::tryAddTask(const Back light_tasks.push(task); break; default: - throw Exception(fmt::format("Unsupported task type: {}", toString(task.type))); + throw Exception(fmt::format("Unsupported task type: {}", magic_enum::enum_name(task.type))); } LOG_FMT_DEBUG( log_, "Segment task add to background task pool, segment={} task={} by_whom={}", task.segment->simpleInfo(), - toString(task.type), - toString(whom)); + magic_enum::enum_name(task.type), + magic_enum::enum_name(whom)); return std::make_pair(true, is_heavy); } @@ -145,7 +146,7 @@ DeltaMergeStore::BackgroundTask DeltaMergeStore::MergeDeltaTaskPool::nextTask(bo auto task = tasks.front(); tasks.pop(); - LOG_FMT_DEBUG(log_, "Segment task pop from background task pool, segment={} task={}", task.segment->simpleInfo(), toString(task.type)); + LOG_FMT_DEBUG(log_, "Segment task pop from background task pool, segment={} task={}", task.segment->simpleInfo(), magic_enum::enum_name(task.type)); return task; } diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index acef24e0c42..8fc0aefebc8 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -188,71 +188,6 @@ class DeltaMergeStore : private boost::noncopyable Manual, }; - static std::string toString(ThreadType type) - { - switch (type) - { - case Init: - return "Init"; - case Write: - return "Write"; - case Read: - return "Read"; - case BG_Split: - return "BG_Split"; - case BG_Merge: - return "BG_Merge"; - case BG_MergeDelta: - return "BG_MergeDelta"; - case BG_Compact: - return "BG_Compact"; - case BG_Flush: - return "BG_Flush"; - case BG_GC: - return "BG_GC"; - default: - return "Unknown"; - } - } - - static std::string toString(TaskType type) - { - switch (type) - { - case Split: - return "Split"; - case Merge: - return "Merge"; - case MergeDelta: - return "MergeDelta"; - case Compact: - return "Compact"; - case Flush: - return "Flush"; - case PlaceIndex: - return "PlaceIndex"; - default: - return "Unknown"; - } - } - - static std::string toString(MergeDeltaReason type) - { - switch (type) - { - case BackgroundThreadPool: - return "BackgroundThreadPool"; - case BackgroundGCThread: - return "BackgroundGCThread"; - case ForegroundWrite: - return "ForegroundWrite"; - case Manual: - return "Manual"; - default: - return "Unknown"; - } - } - struct BackgroundTask { TaskType type; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp index 43c10d3e2eb..0a7e9cd6c2c 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp @@ -18,6 +18,8 @@ #include #include +#include + namespace CurrentMetrics { extern const Metric DT_SnapshotOfDeltaMerge; @@ -174,7 +176,7 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) { /// Note that `task.dm_context->db_context` will be free after query is finish. We should not use that in background task. task.dm_context->min_version = latest_gc_safe_point.load(std::memory_order_relaxed); - LOG_FMT_DEBUG(log, "Task {} GC safe point: {}", toString(task.type), task.dm_context->min_version); + LOG_FMT_DEBUG(log, "Task {} GC safe point: {}", magic_enum::enum_name(task.type), task.dm_context->min_version); } SegmentPtr left, right; @@ -216,7 +218,7 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) task.segment->placeDeltaIndex(*task.dm_context); break; default: - throw Exception(fmt::format("Unsupported task type: {}", toString(task.type))); + throw Exception(fmt::format("Unsupported task type: {}", magic_enum::enum_name(task.type))); } } catch (const Exception & e) @@ -224,7 +226,7 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) LOG_FMT_ERROR( log, "Execute task on segment failed, task={} segment={}{} err={}", - DeltaMergeStore::toString(task.type), + magic_enum::enum_name(task.type), task.segment->simpleInfo(), ((bool)task.next_segment ? (fmt::format(" next_segment={}", task.next_segment->simpleInfo())) : ""), e.message()); diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp index 5beaf23b3bb..9f682ab4296 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp @@ -16,6 +16,8 @@ #include #include +#include + namespace CurrentMetrics { extern const Metric DT_DeltaMerge; @@ -321,7 +323,7 @@ SegmentPtr DeltaMergeStore::segmentMergeDelta( const MergeDeltaReason reason, SegmentSnapshotPtr segment_snap) { - LOG_FMT_INFO(log, "MergeDelta - Begin, reason={} safe_point={} segment={}", toString(reason), dm_context.min_version, segment->info()); + LOG_FMT_INFO(log, "MergeDelta - Begin, reason={} safe_point={} segment={}", magic_enum::enum_name(reason), dm_context.min_version, segment->info()); ColumnDefinesPtr schema_snap; From 89946b77f0d8c94180ccac20c8ef682908694ef8 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Thu, 15 Sep 2022 16:04:59 +0800 Subject: [PATCH 07/17] .*: Separate `TemporaryFileStream` from `AggregatingBlockInputStream/ParallelAggregatingBlockInputStream` (#5886) close pingcap/tiflash#5887 --- .../AggregatingBlockInputStream.cpp | 15 ------- .../DataStreams/AggregatingBlockInputStream.h | 17 +------- .../ParallelAggregatingBlockInputStream.cpp | 17 -------- .../ParallelAggregatingBlockInputStream.h | 17 +------- dbms/src/DataStreams/TemporaryFileStream.cpp | 32 +++++++++++++++ dbms/src/DataStreams/TemporaryFileStream.h | 40 +++++++++++++++++++ 6 files changed, 76 insertions(+), 62 deletions(-) create mode 100644 dbms/src/DataStreams/TemporaryFileStream.cpp create mode 100644 dbms/src/DataStreams/TemporaryFileStream.h diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp index 4cd09d1ea63..bbb56c9552e 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp @@ -12,10 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include -#include namespace DB { @@ -81,17 +79,4 @@ Block AggregatingBlockInputStream::readImpl() return impl->read(); } - -AggregatingBlockInputStream::TemporaryFileStream::TemporaryFileStream(const std::string & path, const FileProviderPtr & file_provider_) - : file_provider{file_provider_} - , file_in(file_provider, path, EncryptionPath(path, "")) - , compressed_in(file_in) - , block_in(std::make_shared(compressed_in, ClickHouseRevision::get())) -{} - -AggregatingBlockInputStream::TemporaryFileStream::~TemporaryFileStream() -{ - file_provider->deleteRegularFile(file_in.getFileName(), EncryptionPath(file_in.getFileName(), "")); -} - } // namespace DB diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.h b/dbms/src/DataStreams/AggregatingBlockInputStream.h index e0ba5bfacf3..f99ab268319 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.h +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.h @@ -15,9 +15,7 @@ #pragma once #include -#include -#include -#include +#include #include namespace DB @@ -67,18 +65,7 @@ class AggregatingBlockInputStream : public IProfilingBlockInputStream bool executed = false; - /// To read the data that was flushed into the temporary data file. - struct TemporaryFileStream - { - FileProviderPtr file_provider; - ReadBufferFromFileProvider file_in; - CompressedReadBuffer<> compressed_in; - BlockInputStreamPtr block_in; - - TemporaryFileStream(const std::string & path, const FileProviderPtr & file_provider_); - ~TemporaryFileStream(); - }; - std::vector> temporary_inputs; + TemporaryFileStreams temporary_inputs; /** From here we will get the completed blocks after the aggregation. */ std::unique_ptr impl; diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index 4d85abcb3b2..60e97ee23ba 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -12,12 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include #include #include -#include #include namespace DB @@ -127,21 +125,6 @@ Block ParallelAggregatingBlockInputStream::readImpl() return impl->read(); } - -ParallelAggregatingBlockInputStream::TemporaryFileStream::TemporaryFileStream( - const std::string & path, - const FileProviderPtr & file_provider_) - : file_provider(file_provider_) - , file_in(file_provider, path, EncryptionPath(path, "")) - , compressed_in(file_in) - , block_in(std::make_shared(compressed_in, ClickHouseRevision::get())) -{} - -ParallelAggregatingBlockInputStream::TemporaryFileStream::~TemporaryFileStream() -{ - file_provider->deleteRegularFile(file_in.getFileName(), EncryptionPath(file_in.getFileName(), "")); -} - void ParallelAggregatingBlockInputStream::Handler::onBlock(Block & block, size_t thread_num) { parent.aggregator.executeOnBlock( diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h index 907622c8364..41d9eb6752f 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h @@ -16,9 +16,7 @@ #include #include -#include -#include -#include +#include namespace DB { @@ -87,18 +85,7 @@ class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream std::atomic executed{false}; - /// To read the data stored into the temporary data file. - struct TemporaryFileStream - { - FileProviderPtr file_provider; - ReadBufferFromFileProvider file_in; - CompressedReadBuffer<> compressed_in; - BlockInputStreamPtr block_in; - - TemporaryFileStream(const std::string & path, const FileProviderPtr & file_provider_); - ~TemporaryFileStream(); - }; - std::vector> temporary_inputs; + TemporaryFileStreams temporary_inputs; ManyAggregatedDataVariants many_data; Exceptions exceptions; diff --git a/dbms/src/DataStreams/TemporaryFileStream.cpp b/dbms/src/DataStreams/TemporaryFileStream.cpp new file mode 100644 index 00000000000..1b8de947c66 --- /dev/null +++ b/dbms/src/DataStreams/TemporaryFileStream.cpp @@ -0,0 +1,32 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB +{ +TemporaryFileStream::TemporaryFileStream(const std::string & path, const FileProviderPtr & file_provider_) + : file_provider{file_provider_} + , file_in(file_provider, path, EncryptionPath(path, "")) + , compressed_in(file_in) + , block_in(std::make_shared(compressed_in, ClickHouseRevision::get())) +{} + +TemporaryFileStream::~TemporaryFileStream() +{ + file_provider->deleteRegularFile(file_in.getFileName(), EncryptionPath(file_in.getFileName(), "")); +} +} // namespace DB diff --git a/dbms/src/DataStreams/TemporaryFileStream.h b/dbms/src/DataStreams/TemporaryFileStream.h new file mode 100644 index 00000000000..4316e4ee8a6 --- /dev/null +++ b/dbms/src/DataStreams/TemporaryFileStream.h @@ -0,0 +1,40 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace DB +{ +/// To read the data that was flushed into the temporary data file. +struct TemporaryFileStream +{ + FileProviderPtr file_provider; + ReadBufferFromFileProvider file_in; + CompressedReadBuffer<> compressed_in; + BlockInputStreamPtr block_in; + + TemporaryFileStream(const std::string & path, const FileProviderPtr & file_provider_); + ~TemporaryFileStream(); +}; + +using TemporaryFileStreams = std::vector>; +} // namespace DB From d540fbcb2819f18e5066fbed918b9aca03c7e58a Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Thu, 15 Sep 2022 16:48:59 +0800 Subject: [PATCH 08/17] storage: Merge multiple segments in gc thread (#5863) ref pingcap/tiflash#5237 --- dbms/src/Common/FailPoint.cpp | 1 + dbms/src/Storages/DeltaMerge/DMContext.h | 6 + .../Storages/DeltaMerge/DeltaMergeStore.cpp | 56 +-- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 23 +- .../DeltaMerge/DeltaMergeStore_InternalBg.cpp | 352 ++++++++++++------ .../DeltaMergeStore_InternalSegment.cpp | 3 + .../DeltaMerge/tests/MultiSegmentTestUtil.h | 2 +- .../tests/gtest_dm_delta_merge_store.cpp | 157 ++++++++ 8 files changed, 434 insertions(+), 166 deletions(-) diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index ce2f065711a..f14d7a2d91a 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -79,6 +79,7 @@ std::unordered_map> FailPointHelper::f #define APPLY_FOR_FAILPOINTS(M) \ M(skip_check_segment_update) \ + M(gc_skip_update_safe_point) \ M(force_set_page_file_write_errno) \ M(force_split_io_size_4k) \ M(minimum_block_size_for_cross_join) \ diff --git a/dbms/src/Storages/DeltaMerge/DMContext.h b/dbms/src/Storages/DeltaMerge/DMContext.h index 76da0714844..59320665011 100644 --- a/dbms/src/Storages/DeltaMerge/DMContext.h +++ b/dbms/src/Storages/DeltaMerge/DMContext.h @@ -72,6 +72,10 @@ struct DMContext : private boost::noncopyable const size_t delta_small_column_file_bytes; // The expected stable pack rows. const size_t stable_pack_rows; + // The rows of segment to be regarded as small. Small segments will be merged. + const size_t small_segment_rows; + // The bytes of segment to be regarded as small. Small segments will be merged. + const size_t small_segment_bytes; // The number of points to check for calculating region split. const size_t region_split_check_points = 128; @@ -111,6 +115,8 @@ struct DMContext : private boost::noncopyable , delta_small_column_file_rows(settings.dt_segment_delta_small_column_file_rows) , delta_small_column_file_bytes(settings.dt_segment_delta_small_column_file_size) , stable_pack_rows(settings.dt_segment_stable_pack_rows) + , small_segment_rows(settings.dt_segment_limit_rows / 3) + , small_segment_bytes(settings.dt_segment_limit_size / 3) , enable_logical_split(settings.dt_enable_logical_split) , read_delta_only(settings.dt_read_delta_only) , read_stable_only(settings.dt_read_stable_only) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 15babb12afa..255997e1d5b 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -106,7 +106,6 @@ std::pair DeltaMergeStore::MergeDeltaTaskPool::tryAddTask(const Back switch (task.type) { case TaskType::Split: - case TaskType::Merge: case TaskType::MergeDelta: is_heavy = true; // reserve some task space for light tasks @@ -1179,8 +1178,6 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const || delta_bytes - delta_last_try_split_bytes >= delta_cache_limit_bytes)) || (segment_rows >= segment_limit_rows * 3 || segment_bytes >= segment_limit_bytes * 3); - bool should_merge = segment_rows < segment_limit_rows / 3 && segment_bytes < segment_limit_bytes / 3; - // Don't do compact on starting up. bool should_compact = (thread_type != ThreadType::Init) && std::max(static_cast(column_file_count) - delta_last_try_compact_column_files, 0) >= 10; @@ -1237,7 +1234,7 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const { delta_last_try_flush_rows = delta_rows; delta_last_try_flush_bytes = delta_bytes; - try_add_background_task(BackgroundTask{TaskType::Flush, dm_context, segment, {}}); + try_add_background_task(BackgroundTask{TaskType::Flush, dm_context, segment}); } } } @@ -1247,36 +1244,6 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const if (segment->getDelta()->isUpdating()) return; - /// Now start trying structure update. - - auto get_merge_sibling = [&]() -> SegmentPtr { - /// For complexity reason, currently we only try to merge with next segment. Normally it is good enough. - - // The last segment cannot be merged. - if (segment->getRowKeyRange().isEndInfinite()) - return {}; - SegmentPtr next_segment; - { - std::shared_lock read_write_lock(read_write_mutex); - - auto it = segments.find(segment->getRowKeyRange().getEnd()); - // check legality - if (it == segments.end()) - return {}; - auto & cur_segment = it->second; - if (cur_segment.get() != segment.get()) - return {}; - ++it; - if (it == segments.end()) - return {}; - next_segment = it->second; - auto limit = dm_context->segment_limit_rows / 5; - if (next_segment->getEstimatedRows() >= limit) - return {}; - } - return next_segment; - }; - auto try_fg_merge_delta = [&]() -> SegmentPtr { // If the table is already dropped, don't trigger foreground merge delta when executing `remove region peer`, // or the raft-log apply threads may be blocked. @@ -1304,7 +1271,7 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const if (should_background_merge_delta) { delta_last_try_merge_delta_rows = delta_rows; - try_add_background_task(BackgroundTask{TaskType::MergeDelta, dm_context, segment, {}}); + try_add_background_task(BackgroundTask{TaskType::MergeDelta, dm_context, segment}); return true; } return false; @@ -1314,12 +1281,12 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const { delta_last_try_split_rows = delta_rows; delta_last_try_split_bytes = delta_bytes; - try_add_background_task(BackgroundTask{TaskType::Split, dm_context, seg, {}}); + try_add_background_task(BackgroundTask{TaskType::Split, dm_context, seg}); return true; } return false; }; - auto try_fg_split = [&](const SegmentPtr & my_segment) -> bool { + auto try_fg_split = [&](const SegmentPtr & my_segment) { auto my_segment_size = my_segment->getEstimatedBytes(); auto my_should_split = my_segment_size >= dm_context->segment_force_split_bytes; if (my_should_split && !my_segment->isSplitForbidden()) @@ -1335,15 +1302,6 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const } return false; }; - auto try_bg_merge = [&]() { - SegmentPtr merge_sibling; - if (should_merge && (merge_sibling = get_merge_sibling())) - { - try_add_background_task(BackgroundTask{TaskType::Merge, dm_context, segment, merge_sibling}); - return true; - } - return false; - }; auto try_bg_compact = [&]() { /// Compact task should be a really low priority task. /// And if the segment is flushing, @@ -1353,7 +1311,7 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const if (should_compact && !segment->isFlushing()) { delta_last_try_compact_column_files = column_file_count; - try_add_background_task(BackgroundTask{TaskType::Compact, dm_context, segment, {}}); + try_add_background_task(BackgroundTask{TaskType::Compact, dm_context, segment}); return true; } return false; @@ -1362,7 +1320,7 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const if (should_place_delta_index) { delta_last_try_place_delta_index_rows = delta_rows; - try_add_background_task(BackgroundTask{TaskType::PlaceIndex, dm_context, segment, {}}); + try_add_background_task(BackgroundTask{TaskType::PlaceIndex, dm_context, segment}); return true; } return false; @@ -1407,8 +1365,6 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const if (try_bg_merge_delta()) return; } - if (try_bg_merge()) - return; if (try_bg_compact()) return; if (try_place_delta_index()) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 8fc0aefebc8..cc61ca052b9 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -173,7 +173,6 @@ class DeltaMergeStore : private boost::noncopyable enum TaskType { Split, - Merge, MergeDelta, Compact, Flush, @@ -194,7 +193,6 @@ class DeltaMergeStore : private boost::noncopyable DMContextPtr dm_context; SegmentPtr segment; - SegmentPtr next_segment; explicit operator bool() const { return segment != nullptr; } }; @@ -340,6 +338,27 @@ class DeltaMergeStore : private boost::noncopyable /// Iterator over all segments and apply gc jobs. UInt64 onSyncGc(Int64 limit); + /** + * Try to merge the segment in the current thread as the GC operation. + * This function may be blocking, and should be called in the GC background thread. + */ + SegmentPtr gcTrySegmentMerge(const DMContextPtr & dm_context, const SegmentPtr & segment); + + /** + * Try to merge delta in the current thread as the GC operation. + * This function may be blocking, and should be called in the GC background thread. + */ + SegmentPtr gcTrySegmentMergeDelta(const DMContextPtr & dm_context, const SegmentPtr & segment, const SegmentPtr & prev_segment, const SegmentPtr & next_segment, DB::Timestamp gc_safe_point); + + /** + * Starting from the given base segment, find continuous segments that could be merged. + * + * When there are mergeable segments, the baseSegment is returned in index 0 and mergeable segments are then placed in order. + * It is ensured that there are at least 2 elements in the returned vector. + * When there is no mergeable segment, the returned vector will be empty. + */ + std::vector getMergeableSegments(const DMContextPtr & context, const SegmentPtr & baseSegment); + /// Apply DDL `commands` on `table_columns` void applyAlters(const AlterCommands & commands, // OptionTableInfoConstRef table_info, diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp index 0a7e9cd6c2c..cee4553cb7e 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp @@ -29,6 +29,7 @@ namespace DB { namespace FailPoints { +extern const char gc_skip_update_safe_point[]; extern const char pause_before_dt_background_delta_merge[]; extern const char pause_until_dt_background_delta_merge[]; } // namespace FailPoints @@ -149,6 +150,51 @@ void DeltaMergeStore::setUpBackgroundTask(const DMContextPtr & dm_context) blockable_background_pool_handle->wake(); } +std::vector DeltaMergeStore::getMergeableSegments(const DMContextPtr & context, const SegmentPtr & baseSegment) +{ + // Last segment cannot be merged. + if (baseSegment->getRowKeyRange().isEndInfinite()) + return {}; + + // We only merge small segments into a larger one. + // Note: it is possible that there is a very small segment close to a very large segment. + // In this case, the small segment will not get merged. It is possible that we can allow + // segment merging for this case in future. + auto max_total_rows = context->small_segment_rows; + auto max_total_bytes = context->small_segment_bytes; + + std::vector results; + { + std::shared_lock lock(read_write_mutex); + + if (!isSegmentValid(lock, baseSegment)) + return {}; + + results.reserve(4); // In most cases we will only find <= 4 segments to merge. + results.emplace_back(baseSegment); + auto accumulated_rows = baseSegment->getEstimatedRows(); + auto accumulated_bytes = baseSegment->getEstimatedBytes(); + + auto it = segments.upper_bound(baseSegment->getRowKeyRange().getEnd()); + while (it != segments.end()) + { + const auto & this_seg = it->second; + const auto this_rows = this_seg->getEstimatedRows(); + const auto this_bytes = this_seg->getEstimatedBytes(); + if (accumulated_rows + this_rows >= max_total_rows || accumulated_bytes + this_bytes >= max_total_bytes) + break; + results.emplace_back(this_seg); + accumulated_rows += this_rows; + accumulated_bytes += this_bytes; + it++; + } + } + + if (results.size() < 2) + return {}; + + return results; +} bool DeltaMergeStore::updateGCSafePoint() { @@ -189,10 +235,6 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) std::tie(left, right) = segmentSplit(*task.dm_context, task.segment, false); type = ThreadType::BG_Split; break; - case TaskType::Merge: - segmentMerge(*task.dm_context, {task.segment, task.next_segment}, false); - type = ThreadType::BG_Merge; - break; case TaskType::MergeDelta: { FAIL_POINT_PAUSE(FailPoints::pause_before_dt_background_delta_merge); @@ -225,10 +267,9 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) { LOG_FMT_ERROR( log, - "Execute task on segment failed, task={} segment={}{} err={}", + "Execute task on segment failed, task={} segment={} err={}", magic_enum::enum_name(task.type), task.segment->simpleInfo(), - ((bool)task.next_segment ? (fmt::format(" next_segment={}", task.next_segment->simpleInfo())) : ""), e.message()); e.rethrow(); } @@ -248,8 +289,9 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) } namespace GC + { -enum Type +enum class MergeDeltaReason { Unknown, TooManyDeleteRange, @@ -257,15 +299,15 @@ enum Type TooManyInvalidVersion, }; -static std::string toString(Type type) +static std::string toString(MergeDeltaReason type) { switch (type) { - case TooManyDeleteRange: + case MergeDeltaReason::TooManyDeleteRange: return "TooManyDeleteRange"; - case TooMuchOutOfRange: + case MergeDeltaReason::TooMuchOutOfRange: return "TooMuchOutOfRange"; - case TooManyInvalidVersion: + case MergeDeltaReason::TooManyInvalidVersion: return "TooManyInvalidVersion"; default: return "Unknown"; @@ -390,15 +432,183 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte seg->setValidDataRatioChecked(); return (valid_rows < total_rows * (1 - invalid_data_ratio_threshold)) || (valid_bytes < total_bytes * (1 - invalid_data_ratio_threshold)); } + } // namespace GC +SegmentPtr DeltaMergeStore::gcTrySegmentMerge(const DMContextPtr & dm_context, const SegmentPtr & segment) +{ + auto segment_rows = segment->getEstimatedRows(); + auto segment_bytes = segment->getEstimatedBytes(); + if (segment_rows >= dm_context->small_segment_rows || segment_bytes >= dm_context->small_segment_bytes) + { + LOG_FMT_TRACE( + log, + "GC - Merge skipped because current segment is not small, segment={} table={}", + segment->simpleInfo(), + table_name); + return {}; + } + + auto segments_to_merge = getMergeableSegments(dm_context, segment); + if (segments_to_merge.size() < 2) + { + LOG_FMT_TRACE( + log, + "GC - Merge skipped because cannot find adjacent segments to merge, segment={} table={}", + segment->simpleInfo(), + table_name); + return {}; + } + + LOG_FMT_INFO( + log, + "GC - Trigger Merge, segment={} table={}", + segment->simpleInfo(), + table_name); + auto new_segment = segmentMerge(*dm_context, segments_to_merge, false); + if (new_segment) + { + checkSegmentUpdate(dm_context, segment, ThreadType::BG_GC); + } + + return new_segment; +} + +SegmentPtr DeltaMergeStore::gcTrySegmentMergeDelta(const DMContextPtr & dm_context, const SegmentPtr & segment, const SegmentPtr & prev_segment, const SegmentPtr & next_segment, DB::Timestamp gc_safe_point) +{ + SegmentSnapshotPtr segment_snap; + { + std::shared_lock lock(read_write_mutex); + + // The segment we just retrieved may be dropped from the map. Let's verify it again before creating a snapshot. + if (!isSegmentValid(lock, segment)) + { + LOG_FMT_TRACE(log, "GC - Skip checking MergeDelta because not valid, segment={} table={}", segment->simpleInfo(), table_name); + return {}; + } + + segment_snap = segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); + if (!segment_snap) + { + LOG_FMT_TRACE( + log, + "GC - Skip checking MergeDelta because snapshot failed, segment={} table={}", + segment->simpleInfo(), + table_name); + return {}; + } + } + + RowKeyRange segment_range = segment->getRowKeyRange(); + + // Check whether we should apply compact on this segment + auto invalid_data_ratio_threshold = global_context.getSettingsRef().dt_bg_gc_delta_delete_ratio_to_trigger_gc; + RUNTIME_ASSERT(invalid_data_ratio_threshold >= 0 && invalid_data_ratio_threshold <= 1); + + bool should_compact = false; + GC::MergeDeltaReason compact_reason = GC::MergeDeltaReason::Unknown; + + if (GC::shouldCompactDeltaWithStable( + *dm_context, + segment_snap, + segment_range, + invalid_data_ratio_threshold, + log)) + { + should_compact = true; + compact_reason = GC::MergeDeltaReason::TooManyDeleteRange; + } + + if (!should_compact && segment->isValidDataRatioChecked()) + { + if (GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange( + *dm_context, + segment, + segment_snap, + prev_segment, + next_segment, + invalid_data_ratio_threshold, + log)) + { + should_compact = true; + compact_reason = GC::MergeDeltaReason::TooMuchOutOfRange; + } + } + + if (!should_compact && (segment->getLastCheckGCSafePoint() < gc_safe_point)) + { + // Avoid recheck this segment when gc_safe_point doesn't change regardless whether we trigger this segment's DeltaMerge or not. + // Because after we calculate StableProperty and compare it with this gc_safe_point, + // there is no need to recheck it again using the same gc_safe_point. + // On the other hand, if it should do DeltaMerge using this gc_safe_point, and the DeltaMerge is interruptted by other process, + // it's still worth to wait another gc_safe_point to check this segment again. + segment->setLastCheckGCSafePoint(gc_safe_point); + dm_context->min_version = gc_safe_point; + + // calculate StableProperty if needed + if (!segment->getStable()->isStablePropertyCached()) + segment->getStable()->calculateStableProperty(*dm_context, segment_range, isCommonHandle()); + + if (GC::shouldCompactStableWithTooManyInvalidVersion( + segment, + gc_safe_point, + global_context.getSettingsRef().dt_bg_gc_ratio_threhold_to_trigger_gc, + log)) + { + should_compact = true; + compact_reason = GC::MergeDeltaReason::TooManyInvalidVersion; + } + } + + if (!should_compact) + { + LOG_FMT_TRACE( + log, + "GC - MergeDelta skipped, segment={} table={}", + segment->simpleInfo(), + table_name); + return {}; + } + + LOG_FMT_INFO( + log, + "GC - Trigger MergeDelta, compact_reason={} segment={} table={}", + GC::toString(compact_reason), + segment->simpleInfo(), + table_name); + auto new_segment = segmentMergeDelta(*dm_context, segment, MergeDeltaReason::BackgroundGCThread, segment_snap); + + if (!new_segment) + { + LOG_FMT_DEBUG( + log, + "GC - MergeDelta aborted, compact_reason={} segment={} table={}", + GC::toString(compact_reason), + segment->simpleInfo(), + table_name); + return {}; + } + + segment_snap = {}; + checkSegmentUpdate(dm_context, segment, ThreadType::BG_GC); + + return new_segment; +} + UInt64 DeltaMergeStore::onSyncGc(Int64 limit) { if (shutdown_called.load(std::memory_order_relaxed)) return 0; - if (!updateGCSafePoint()) - return 0; + bool skip_update_safe_point = false; + fiu_do_on(FailPoints::gc_skip_update_safe_point, { + skip_update_safe_point = true; + }); + if (!skip_update_safe_point) + { + if (!updateGCSafePoint()) + return 0; + } { std::shared_lock lock(read_write_mutex); @@ -431,7 +641,6 @@ UInt64 DeltaMergeStore::onSyncGc(Int64 limit) SegmentPtr segment; SegmentPtr prev_segment = nullptr; SegmentPtr next_segment = nullptr; - SegmentSnapshotPtr segment_snap; { std::shared_lock lock(read_write_mutex); @@ -446,7 +655,7 @@ UInt64 DeltaMergeStore::onSyncGc(Int64 limit) segment = segment_it->second; next_gc_check_key = segment_it->first.toRowKeyValue(); - segment_snap = segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); + auto next_segment_it = next(segment_it, 1); if (next_segment_it != segments.end()) { @@ -460,122 +669,39 @@ UInt64 DeltaMergeStore::onSyncGc(Int64 limit) } assert(segment != nullptr); - if (segment->hasAbandoned() || segment_snap == nullptr) + if (segment->hasAbandoned()) continue; - const auto segment_id = segment->segmentId(); - RowKeyRange segment_range = segment->getRowKeyRange(); - - // meet empty segment, try merge it - if (segment_snap->getRows() == 0) - { - // release segment_snap before checkSegmentUpdate, otherwise this segment is still in update status. - segment_snap = {}; - checkSegmentUpdate(dm_context, segment, ThreadType::BG_GC); - continue; - } - try { - // Check whether we should apply gc on this segment - auto invalid_data_ratio_threshold = global_context.getSettingsRef().dt_bg_gc_delta_delete_ratio_to_trigger_gc; - RUNTIME_ASSERT(invalid_data_ratio_threshold >= 0 && invalid_data_ratio_threshold <= 1); - bool should_compact = false; - GC::Type gc_type = GC::Type::Unknown; - if (GC::shouldCompactDeltaWithStable( - *dm_context, - segment_snap, - segment_range, - invalid_data_ratio_threshold, - log)) - { - should_compact = true; - gc_type = GC::Type::TooManyDeleteRange; - } - else if (!segment->isValidDataRatioChecked()) - { - if (GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange( - *dm_context, - segment, - segment_snap, - prev_segment, - next_segment, - invalid_data_ratio_threshold, - log)) - { - should_compact = true; - gc_type = GC::Type::TooMuchOutOfRange; - } - } - else if (!should_compact && (segment->getLastCheckGCSafePoint() < gc_safe_point)) - { - // Avoid recheck this segment when gc_safe_point doesn't change regardless whether we trigger this segment's DeltaMerge or not. - // Because after we calculate StableProperty and compare it with this gc_safe_point, - // there is no need to recheck it again using the same gc_safe_point. - // On the other hand, if it should do DeltaMerge using this gc_safe_point, and the DeltaMerge is interruptted by other process, - // it's still worth to wait another gc_safe_point to check this segment again. - segment->setLastCheckGCSafePoint(gc_safe_point); - dm_context->min_version = gc_safe_point; - - // calculate StableProperty if needed - if (!segment->getStable()->isStablePropertyCached()) - segment->getStable()->calculateStableProperty(*dm_context, segment_range, isCommonHandle()); - - if (GC::shouldCompactStableWithTooManyInvalidVersion( - segment, - gc_safe_point, - global_context.getSettingsRef().dt_bg_gc_ratio_threhold_to_trigger_gc, - log)) - { - should_compact = true; - gc_type = GC::Type::TooManyInvalidVersion; - } - } - bool finish_gc_on_segment = false; - if (should_compact) + SegmentPtr new_seg = nullptr; + if (!new_seg) + new_seg = gcTrySegmentMerge(dm_context, segment); + if (!new_seg) + new_seg = gcTrySegmentMergeDelta(dm_context, segment, prev_segment, next_segment, gc_safe_point); + + if (!new_seg) { - if (segment = segmentMergeDelta(*dm_context, segment, MergeDeltaReason::BackgroundGCThread, segment_snap); segment) - { - // Continue to check whether we need to apply more tasks on this segment - segment_snap = {}; - checkSegmentUpdate(dm_context, segment, ThreadType::BG_GC); - gc_segments_num++; - finish_gc_on_segment = true; - LOG_FMT_DEBUG( - log, - "Finish GC-merge-delta, segment={} table={}, gc_type={}", - segment->simpleInfo(), - table_name, - GC::toString(gc_type)); - } - else - { - LOG_FMT_DEBUG( - log, - "GC aborted, segment={} table={}, gc_type={}", - segment->simpleInfo(), - table_name, - GC::toString(gc_type)); - } - } - if (!finish_gc_on_segment) LOG_FMT_TRACE( log, - "GC skipped, segment={} table={}", + "GC - Skipped segment, segment={} table={}", segment->simpleInfo(), table_name); + continue; + } + + gc_segments_num++; } catch (Exception & e) { - e.addMessage(fmt::format("while apply gc Segment [{}] [range={}] [table={}]", segment_id, segment_range.toDebugString(), table_name)); + e.addMessage(fmt::format("Error while GC segment, segment={} table={}", segment->info(), table_name)); e.rethrow(); } } if (gc_segments_num != 0) - { LOG_FMT_DEBUG(log, "Finish GC, gc_segments_num={}", gc_segments_num); - } + return gc_segments_num; } diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp index 9f682ab4296..11616c9a9ae 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -194,6 +195,8 @@ SegmentPtr DeltaMergeStore::segmentMerge(DMContext & dm_context, const std::vect if (seg->flushCache(dm_context)) break; + SYNC_FOR("before_DeltaMergeStore::segmentMerge|retry_flush"); + // Else: retry. Flush could fail. Typical cases: // #1. The segment is abandoned (due to an update is finished) // #2. There is another flush in progress, for example, triggered in background diff --git a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h index 100f27912e6..429def67d41 100644 --- a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h +++ b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h @@ -84,7 +84,7 @@ class MultiSegmentTestUtil : private boost::noncopyable { UNUSED(_key); LOG_FMT_INFO(log, "Segment #{}: Range = {}", segment_idx, seg->getRowKeyRange().toDebugString()); - rows_by_segments[segment_idx] = seg->getStable()->getRows(); + rows_by_segments[segment_idx] = seg->getEstimatedRows(); expected_stable_rows[segment_idx] = seg->getStable()->getRows(); expected_delta_rows[segment_idx] = seg->getDelta()->getRows(); segment_idx++; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index bca226c8b7b..c10dbd6df35 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -41,6 +41,7 @@ namespace DB { namespace FailPoints { +extern const char gc_skip_update_safe_point[]; extern const char pause_before_dt_background_delta_merge[]; extern const char pause_until_dt_background_delta_merge[]; extern const char force_triggle_background_merge_delta[]; @@ -3384,6 +3385,162 @@ try CATCH +class DeltaMergeStoreBackgroundTest + : public DB::base::TiFlashStorageTestBasic +{ +public: + void SetUp() override + { + FailPointHelper::enableFailPoint(FailPoints::gc_skip_update_safe_point); + + try + { + TiFlashStorageTestBasic::SetUp(); + setupDMStore(); + // Split into 4 segments. + helper = std::make_unique(*db_context); + helper->prepareSegments(store, 50, DMTestEnv::PkType::CommonHandle); + } + CATCH + } + + void TearDown() override + { + TiFlashStorageTestBasic::TearDown(); + FailPointHelper::disableFailPoint(FailPoints::gc_skip_update_safe_point); + } + + void setupDMStore() + { + auto cols = DMTestEnv::getDefaultColumns(DMTestEnv::PkType::CommonHandle); + store = std::make_shared(*db_context, + false, + "test", + DB::base::TiFlashStorageTestBasic::getCurrentFullTestName(), + 101, + *cols, + (*cols)[0], + true, + 1, + DeltaMergeStore::Settings()); + dm_context = store->newDMContext(*db_context, db_context->getSettingsRef(), DB::base::TiFlashStorageTestBasic::getCurrentFullTestName()); + } + +protected: + std::unique_ptr helper{}; + DeltaMergeStorePtr store; + DMContextPtr dm_context; +}; + +TEST_F(DeltaMergeStoreBackgroundTest, GCWillMergeMultipleSegments) +try +{ + ASSERT_EQ(store->segments.size(), 4); + auto gc_n = store->onSyncGc(1); + ASSERT_EQ(store->segments.size(), 1); + ASSERT_EQ(gc_n, 1); +} +CATCH + +TEST_F(DeltaMergeStoreBackgroundTest, GCOnlyMergeSmallSegments) +try +{ + UInt64 gc_n = 0; + + // Note: initially we have 4 segments, each segment contains 50 rows. + + ASSERT_EQ(store->segments.size(), 4); + db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 10; + gc_n = store->onSyncGc(100); + ASSERT_EQ(store->segments.size(), 4); + ASSERT_EQ(gc_n, 0); + + // In this case, merge two segments will exceed small_segment_rows, so no merge will happen + db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 55 * 3; + gc_n = store->onSyncGc(100); + ASSERT_EQ(store->segments.size(), 4); + ASSERT_EQ(gc_n, 0); + + // In this case, we will only merge two segments and then stop. + // [50, 50, 50, 50] => [100, 100] + db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 105 * 3; + gc_n = store->onSyncGc(100); + ASSERT_EQ(store->segments.size(), 2); + ASSERT_EQ(gc_n, 2); + helper->resetExpectedRows(); + ASSERT_EQ(helper->rows_by_segments[0], 100); + ASSERT_EQ(helper->rows_by_segments[1], 100); + + gc_n = store->onSyncGc(100); + ASSERT_EQ(store->segments.size(), 2); + ASSERT_EQ(gc_n, 0); + helper->verifyExpectedRowsForAllSegments(); +} +CATCH + +TEST_F(DeltaMergeStoreBackgroundTest, GCMergeAndStop) +try +{ + UInt64 gc_n = 0; + + // Note: initially we have 4 segments, each segment contains 50 rows. + + ASSERT_EQ(store->segments.size(), 4); + + // In this case, we will only merge two segments and then stop. + // [50, 50, 50, 50] => [100, 50, 50] + db_context->getGlobalContext().getSettingsRef().dt_segment_limit_rows = 105 * 3; + gc_n = store->onSyncGc(1); + ASSERT_EQ(store->segments.size(), 3); + ASSERT_EQ(gc_n, 1); + helper->resetExpectedRows(); + ASSERT_EQ(helper->rows_by_segments[0], 100); + ASSERT_EQ(helper->rows_by_segments[1], 50); + ASSERT_EQ(helper->rows_by_segments[2], 50); +} +CATCH + +TEST_F(DeltaMergeStoreBackgroundTest, GCMergeWhileFlushing) +try +{ + ASSERT_EQ(store->segments.size(), 4); + + Block block = DMTestEnv::prepareSimpleWriteBlock(0, 500, false, DMTestEnv::PkType::CommonHandle, 10 /* new tso */); + store->write(*db_context, db_context->getSettingsRef(), block); + + // Currently, when there is a flush in progress, the segment merge in GC thread will be blocked. + + auto sp_flush_commit = SyncPointCtl::enableInScope("before_ColumnFileFlushTask::commit"); + auto sp_merge_flush_retry = SyncPointCtl::enableInScope("before_DeltaMergeStore::segmentMerge|retry_flush"); + + auto th_flush = std::async([&]() { + auto result = store->segments.begin()->second->flushCache(*dm_context); + ASSERT_TRUE(result); + }); + + sp_flush_commit.waitAndPause(); + + auto th_gc = std::async([&]() { + auto gc_n = store->onSyncGc(1); + ASSERT_EQ(gc_n, 1); + ASSERT_EQ(store->segments.size(), 1); + }); + + // Expect merge triggered by GC is retrying... because there is a flush in progress. + sp_merge_flush_retry.waitAndPause(); + + // Finish the flush. + sp_flush_commit.next(); + sp_flush_commit.disable(); + th_flush.wait(); + + // The merge in GC should continue without any further retries. + sp_merge_flush_retry.next(); + th_gc.wait(); +} +CATCH + + } // namespace tests } // namespace DM } // namespace DB From 5ddccf4c8a1f7ed475be731daf0ecdfb24eed0f4 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Thu, 15 Sep 2022 17:52:59 +0800 Subject: [PATCH 09/17] Test: Refine astToExecutor (#5895) ref pingcap/tiflash#4609 --- dbms/CMakeLists.txt | 1 + .../{ => MockExecutor}/astToExecutor.cpp | 168 ++---------------- .../Debug/{ => MockExecutor}/astToExecutor.h | 29 +-- .../Debug/MockExecutor/astToExecutorUtils.cpp | 74 ++++++++ .../Debug/MockExecutor/astToExecutorUtils.h | 50 ++++++ dbms/src/Debug/MockExecutor/funcSigs.h | 102 +++++++++++ dbms/src/Debug/dbgFuncCoprocessor.cpp | 2 +- dbms/src/Debug/dbgFuncCoprocessor.h | 2 +- dbms/src/Server/FlashGrpcServerHolder.h | 2 +- dbms/src/TestUtils/ColumnsToTiPBExpr.cpp | 2 +- dbms/src/TestUtils/mockExecutor.cpp | 2 +- dbms/src/TestUtils/mockExecutor.h | 2 +- 12 files changed, 253 insertions(+), 183 deletions(-) rename dbms/src/Debug/{ => MockExecutor}/astToExecutor.cpp (91%) rename dbms/src/Debug/{ => MockExecutor}/astToExecutor.h (95%) create mode 100644 dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp create mode 100644 dbms/src/Debug/MockExecutor/astToExecutorUtils.h create mode 100644 dbms/src/Debug/MockExecutor/funcSigs.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 376c8b48ec4..1b3a54eb50b 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -66,6 +66,7 @@ add_headers_and_sources(dbms src/DataStreams) add_headers_and_sources(dbms src/DataTypes) add_headers_and_sources(dbms src/Databases) add_headers_and_sources(dbms src/Debug) +add_headers_and_sources(dbms src/Debug/MockExecutor) add_headers_and_sources(dbms src/Dictionaries) add_headers_and_sources(dbms src/Dictionaries/Embedded) add_headers_and_sources(dbms src/Dictionaries/Embedded/GeodataProviders) diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/MockExecutor/astToExecutor.cpp similarity index 91% rename from dbms/src/Debug/astToExecutor.cpp rename to dbms/src/Debug/MockExecutor/astToExecutor.cpp index 0a8c78bfcd8..6a5f38de9e0 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/MockExecutor/astToExecutor.cpp @@ -16,7 +16,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -31,8 +32,16 @@ namespace DB { +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +extern const int NO_SUCH_COLUMN_IN_TABLE; +} // namespace ErrorCodes + using ASTPartitionByElement = ASTOrderByElement; using MockComputeServerManager = tests::MockComputeServerManager; + void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & val_field, tipb::Expr * expr, Int32 collator_id) { *(expr->mutable_field_type()) = columnInfoToFieldType(ci); @@ -120,96 +129,6 @@ void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & val_field, tipb } } -namespace -{ -std::unordered_map func_name_to_sig({ - {"plusint", tipb::ScalarFuncSig::PlusInt}, - {"minusint", tipb::ScalarFuncSig::MinusInt}, - {"equals", tipb::ScalarFuncSig::EQInt}, - {"notEquals", tipb::ScalarFuncSig::NEInt}, - {"and", tipb::ScalarFuncSig::LogicalAnd}, - {"or", tipb::ScalarFuncSig::LogicalOr}, - {"xor", tipb::ScalarFuncSig::LogicalXor}, - {"not", tipb::ScalarFuncSig::UnaryNotInt}, - {"greater", tipb::ScalarFuncSig::GTInt}, - {"greaterorequals", tipb::ScalarFuncSig::GEInt}, - {"less", tipb::ScalarFuncSig::LTInt}, - {"lessorequals", tipb::ScalarFuncSig::LEInt}, - {"in", tipb::ScalarFuncSig::InInt}, - {"notin", tipb::ScalarFuncSig::InInt}, - {"date_format", tipb::ScalarFuncSig::DateFormatSig}, - {"if", tipb::ScalarFuncSig::IfInt}, - {"from_unixtime", tipb::ScalarFuncSig::FromUnixTime2Arg}, - /// bit_and/bit_or/bit_xor is aggregated function in clickhouse/mysql - {"bitand", tipb::ScalarFuncSig::BitAndSig}, - {"bitor", tipb::ScalarFuncSig::BitOrSig}, - {"bitxor", tipb::ScalarFuncSig::BitXorSig}, - {"bitnot", tipb::ScalarFuncSig::BitNegSig}, - {"notequals", tipb::ScalarFuncSig::NEInt}, - {"like", tipb::ScalarFuncSig::LikeSig}, - {"cast_int_int", tipb::ScalarFuncSig::CastIntAsInt}, - {"cast_int_real", tipb::ScalarFuncSig::CastIntAsReal}, - {"cast_real_int", tipb::ScalarFuncSig::CastRealAsInt}, - {"cast_real_real", tipb::ScalarFuncSig::CastRealAsReal}, - {"cast_decimal_int", tipb::ScalarFuncSig::CastDecimalAsInt}, - {"cast_time_int", tipb::ScalarFuncSig::CastTimeAsInt}, - {"cast_string_int", tipb::ScalarFuncSig::CastStringAsInt}, - {"cast_int_decimal", tipb::ScalarFuncSig::CastIntAsDecimal}, - {"cast_real_decimal", tipb::ScalarFuncSig::CastRealAsDecimal}, - {"cast_decimal_decimal", tipb::ScalarFuncSig::CastDecimalAsDecimal}, - {"cast_time_decimal", tipb::ScalarFuncSig::CastTimeAsDecimal}, - {"cast_string_decimal", tipb::ScalarFuncSig::CastStringAsDecimal}, - {"cast_int_string", tipb::ScalarFuncSig::CastIntAsString}, - {"cast_real_string", tipb::ScalarFuncSig::CastRealAsString}, - {"cast_decimal_string", tipb::ScalarFuncSig::CastDecimalAsString}, - {"cast_time_string", tipb::ScalarFuncSig::CastTimeAsString}, - {"cast_string_string", tipb::ScalarFuncSig::CastStringAsString}, - {"cast_int_date", tipb::ScalarFuncSig::CastIntAsTime}, - {"cast_real_date", tipb::ScalarFuncSig::CastRealAsTime}, - {"cast_decimal_date", tipb::ScalarFuncSig::CastDecimalAsTime}, - {"cast_time_date", tipb::ScalarFuncSig::CastTimeAsTime}, - {"cast_string_date", tipb::ScalarFuncSig::CastStringAsTime}, - {"cast_int_datetime", tipb::ScalarFuncSig::CastIntAsTime}, - {"cast_real_datetime", tipb::ScalarFuncSig::CastRealAsTime}, - {"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime}, - {"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime}, - {"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime}, - {"concat", tipb::ScalarFuncSig::Concat}, - {"round_int", tipb::ScalarFuncSig::RoundInt}, - {"round_uint", tipb::ScalarFuncSig::RoundInt}, - {"round_dec", tipb::ScalarFuncSig::RoundDec}, - {"round_real", tipb::ScalarFuncSig::RoundReal}, - {"round_with_frac_int", tipb::ScalarFuncSig::RoundWithFracInt}, - {"round_with_frac_uint", tipb::ScalarFuncSig::RoundWithFracInt}, - {"round_with_frac_dec", tipb::ScalarFuncSig::RoundWithFracDec}, - {"round_with_frac_real", tipb::ScalarFuncSig::RoundWithFracReal}, -}); - -std::unordered_map agg_func_name_to_sig({ - {"min", tipb::ExprType::Min}, - {"max", tipb::ExprType::Max}, - {"count", tipb::ExprType::Count}, - {"sum", tipb::ExprType::Sum}, - {"first_row", tipb::ExprType::First}, - {"uniqRawRes", tipb::ExprType::ApproxCountDistinct}, - {"group_concat", tipb::ExprType::GroupConcat}, -}); - -std::unordered_map window_func_name_to_sig({ - {"RowNumber", tipb::ExprType::RowNumber}, - {"Rank", tipb::ExprType::Rank}, - {"DenseRank", tipb::ExprType::DenseRank}, - {"Lead", tipb::ExprType::Lead}, - {"Lag", tipb::ExprType::Lag}, -}); - -DAGColumnInfo toNullableDAGColumnInfo(const DAGColumnInfo & input) -{ - DAGColumnInfo output = input; - output.second.clearNotNullFlag(); - return output; -} - void literalToPB(tipb::Expr * expr, const Field & value, int32_t collator_id) { DataTypePtr type = applyVisitor(FieldToDataType(), value); @@ -311,19 +230,6 @@ void astToPB(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, int32_t col } } -auto checkSchema(const DAGSchema & input, String checked_column) -{ - auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { - auto [checked_db_name, checked_table_name, checked_column_name] = splitQualifiedName(checked_column); - auto [db_name, table_name, column_name] = splitQualifiedName(field.first); - if (checked_table_name.empty()) - return column_name == checked_column_name; - else - return table_name == checked_table_name && column_name == checked_column_name; - }); - return ft; -} - void functionToPB(const DAGSchema & input, ASTFunction * func, tipb::Expr * expr, int32_t collator_id, const Context & context) { /// aggregation function is handled in Aggregation, so just treated as a column @@ -345,8 +251,8 @@ void functionToPB(const DAGSchema & input, ASTFunction * func, tipb::Expr * expr // TODO: Support more functions. // TODO: Support type inference. - const auto it_sig = func_name_to_sig.find(func_name_lowercase); - if (it_sig == func_name_to_sig.end()) + const auto it_sig = tests::func_name_to_sig.find(func_name_lowercase); + if (it_sig == tests::func_name_to_sig.end()) { throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); } @@ -617,8 +523,8 @@ TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) { /// check function String func_name_lowercase = Poco::toLower(func->name); - const auto it_sig = func_name_to_sig.find(func_name_lowercase); - if (it_sig == func_name_to_sig.end()) + const auto it_sig = tests::func_name_to_sig.find(func_name_lowercase); + if (it_sig == tests::func_name_to_sig.end()) { throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); } @@ -789,42 +695,6 @@ void compileFilter(const DAGSchema & input, ASTPtr ast, std::vector & co conditions.push_back(ast); compileExpr(input, ast); } -} // namespace - -namespace Debug -{ -String LOCAL_HOST = "127.0.0.1:3930"; - -void setServiceAddr(const std::string & addr) -{ - LOCAL_HOST = addr; -} -} // namespace Debug - -ColumnName splitQualifiedName(const String & s) -{ - ColumnName ret; - Poco::StringTokenizer string_tokens(s, "."); - - switch (string_tokens.count()) - { - case 1: - ret.column_name = s; - break; - case 2: - ret.table_name = string_tokens[0]; - ret.column_name = string_tokens[1]; - break; - case 3: - ret.db_name = string_tokens[0]; - ret.table_name = string_tokens[1]; - ret.column_name = string_tokens[2]; - break; - default: - throw Exception("Invalid identifier name " + s); - } - return ret; -} namespace mock { @@ -1038,8 +908,8 @@ bool Aggregation::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collato tipb::Expr * arg_expr = agg_func->add_children(); astToPB(input_schema, arg, arg_expr, collator_id, context); } - auto agg_sig_it = agg_func_name_to_sig.find(func->name); - if (agg_sig_it == agg_func_name_to_sig.end()) + auto agg_sig_it = tests::agg_func_name_to_sig.find(func->name); + if (agg_sig_it == tests::agg_func_name_to_sig.end()) throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); auto agg_sig = agg_sig_it->second; agg_func->set_tp(agg_sig); @@ -1453,8 +1323,8 @@ bool Window::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, tipb::Expr * func = window_expr->add_children(); astToPB(input_schema, arg, func, collator_id, context); } - auto window_sig_it = window_func_name_to_sig.find(window_func->name); - if (window_sig_it == window_func_name_to_sig.end()) + auto window_sig_it = tests::window_func_name_to_sig.find(window_func->name); + if (window_sig_it == tests::window_func_name_to_sig.end()) throw Exception(fmt::format("Unsupported window function {}", window_func->name), ErrorCodes::LOGICAL_ERROR); auto window_sig = window_sig_it->second; window_expr->set_tp(window_sig); @@ -1905,7 +1775,7 @@ ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr fun } // TODO: add more window functions TiDB::ColumnInfo ci; - switch (window_func_name_to_sig[func->name]) + switch (tests::window_func_name_to_sig[func->name]) { case tipb::ExprType::RowNumber: case tipb::ExprType::Rank: diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/MockExecutor/astToExecutor.h similarity index 95% rename from dbms/src/Debug/astToExecutor.h rename to dbms/src/Debug/MockExecutor/astToExecutor.h index caa9116f2e1..aa82121a707 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/MockExecutor/astToExecutor.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -35,34 +36,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -extern const int LOGICAL_ERROR; -extern const int NO_SUCH_COLUMN_IN_TABLE; -} // namespace ErrorCodes - -using DAGColumnInfo = std::pair; -using DAGSchema = std::vector; - -namespace Debug -{ -extern String LOCAL_HOST; -void setServiceAddr(const std::string & addr); -} // namespace Debug - -// We use qualified format like "db_name.table_name.column_name" -// to identify one column of a table. -// We can split the qualified format into the ColumnName struct. -struct ColumnName -{ - String db_name; - String table_name; - String column_name; -}; - -ColumnName splitQualifiedName(const String & s); - struct MPPCtx { Timestamp start_ts; diff --git a/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp b/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp new file mode 100644 index 00000000000..5b935776a5e --- /dev/null +++ b/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp @@ -0,0 +1,74 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB +{ +ColumnName splitQualifiedName(const String & s) +{ + ColumnName ret; + Poco::StringTokenizer string_tokens(s, "."); + + switch (string_tokens.count()) + { + case 1: + ret.column_name = s; + break; + case 2: + ret.table_name = string_tokens[0]; + ret.column_name = string_tokens[1]; + break; + case 3: + ret.db_name = string_tokens[0]; + ret.table_name = string_tokens[1]; + ret.column_name = string_tokens[2]; + break; + default: + throw Exception("Invalid identifier name " + s); + } + return ret; +} + + +std::__wrap_iter *> checkSchema(const DAGSchema & input, String checked_column) +{ + auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { + auto [checked_db_name, checked_table_name, checked_column_name] = splitQualifiedName(checked_column); + auto [db_name, table_name, column_name] = splitQualifiedName(field.first); + if (checked_table_name.empty()) + return column_name == checked_column_name; + else + return table_name == checked_table_name && column_name == checked_column_name; + }); + return ft; +} + +DAGColumnInfo toNullableDAGColumnInfo(const DAGColumnInfo & input) +{ + DAGColumnInfo output = input; + output.second.clearNotNullFlag(); + return output; +} + +namespace Debug +{ +String LOCAL_HOST = "127.0.0.1:3930"; + +void setServiceAddr(const std::string & addr) +{ + LOCAL_HOST = addr; +} +} // namespace Debug +} // namespace DB diff --git a/dbms/src/Debug/MockExecutor/astToExecutorUtils.h b/dbms/src/Debug/MockExecutor/astToExecutorUtils.h new file mode 100644 index 00000000000..95689bc0895 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/astToExecutorUtils.h @@ -0,0 +1,50 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +using DAGColumnInfo = std::pair; +using DAGSchema = std::vector; + +// We use qualified format like "db_name.table_name.column_name" +// to identify one column of a table. +// We can split the qualified format into the ColumnName struct. +struct ColumnName +{ + String db_name; + String table_name; + String column_name; +}; + +ColumnName splitQualifiedName(const String & s); + +std::__wrap_iter *> checkSchema(const DAGSchema & input, String checked_column); + +DAGColumnInfo toNullableDAGColumnInfo(const DAGColumnInfo & input); + +namespace Debug +{ +extern String LOCAL_HOST; + +void setServiceAddr(const std::string & addr); +} // namespace Debug +} // namespace DB diff --git a/dbms/src/Debug/MockExecutor/funcSigs.h b/dbms/src/Debug/MockExecutor/funcSigs.h new file mode 100644 index 00000000000..4c45a4a5736 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/funcSigs.h @@ -0,0 +1,102 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB::tests +{ +std::unordered_map func_name_to_sig({ + {"plusint", tipb::ScalarFuncSig::PlusInt}, + {"minusint", tipb::ScalarFuncSig::MinusInt}, + {"equals", tipb::ScalarFuncSig::EQInt}, + {"notEquals", tipb::ScalarFuncSig::NEInt}, + {"and", tipb::ScalarFuncSig::LogicalAnd}, + {"or", tipb::ScalarFuncSig::LogicalOr}, + {"xor", tipb::ScalarFuncSig::LogicalXor}, + {"not", tipb::ScalarFuncSig::UnaryNotInt}, + {"greater", tipb::ScalarFuncSig::GTInt}, + {"greaterorequals", tipb::ScalarFuncSig::GEInt}, + {"less", tipb::ScalarFuncSig::LTInt}, + {"lessorequals", tipb::ScalarFuncSig::LEInt}, + {"in", tipb::ScalarFuncSig::InInt}, + {"notin", tipb::ScalarFuncSig::InInt}, + {"date_format", tipb::ScalarFuncSig::DateFormatSig}, + {"if", tipb::ScalarFuncSig::IfInt}, + {"from_unixtime", tipb::ScalarFuncSig::FromUnixTime2Arg}, + /// bit_and/bit_or/bit_xor is aggregated function in clickhouse/mysql + {"bitand", tipb::ScalarFuncSig::BitAndSig}, + {"bitor", tipb::ScalarFuncSig::BitOrSig}, + {"bitxor", tipb::ScalarFuncSig::BitXorSig}, + {"bitnot", tipb::ScalarFuncSig::BitNegSig}, + {"notequals", tipb::ScalarFuncSig::NEInt}, + {"like", tipb::ScalarFuncSig::LikeSig}, + {"cast_int_int", tipb::ScalarFuncSig::CastIntAsInt}, + {"cast_int_real", tipb::ScalarFuncSig::CastIntAsReal}, + {"cast_real_int", tipb::ScalarFuncSig::CastRealAsInt}, + {"cast_real_real", tipb::ScalarFuncSig::CastRealAsReal}, + {"cast_decimal_int", tipb::ScalarFuncSig::CastDecimalAsInt}, + {"cast_time_int", tipb::ScalarFuncSig::CastTimeAsInt}, + {"cast_string_int", tipb::ScalarFuncSig::CastStringAsInt}, + {"cast_int_decimal", tipb::ScalarFuncSig::CastIntAsDecimal}, + {"cast_real_decimal", tipb::ScalarFuncSig::CastRealAsDecimal}, + {"cast_decimal_decimal", tipb::ScalarFuncSig::CastDecimalAsDecimal}, + {"cast_time_decimal", tipb::ScalarFuncSig::CastTimeAsDecimal}, + {"cast_string_decimal", tipb::ScalarFuncSig::CastStringAsDecimal}, + {"cast_int_string", tipb::ScalarFuncSig::CastIntAsString}, + {"cast_real_string", tipb::ScalarFuncSig::CastRealAsString}, + {"cast_decimal_string", tipb::ScalarFuncSig::CastDecimalAsString}, + {"cast_time_string", tipb::ScalarFuncSig::CastTimeAsString}, + {"cast_string_string", tipb::ScalarFuncSig::CastStringAsString}, + {"cast_int_date", tipb::ScalarFuncSig::CastIntAsTime}, + {"cast_real_date", tipb::ScalarFuncSig::CastRealAsTime}, + {"cast_decimal_date", tipb::ScalarFuncSig::CastDecimalAsTime}, + {"cast_time_date", tipb::ScalarFuncSig::CastTimeAsTime}, + {"cast_string_date", tipb::ScalarFuncSig::CastStringAsTime}, + {"cast_int_datetime", tipb::ScalarFuncSig::CastIntAsTime}, + {"cast_real_datetime", tipb::ScalarFuncSig::CastRealAsTime}, + {"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime}, + {"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime}, + {"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime}, + {"concat", tipb::ScalarFuncSig::Concat}, + {"round_int", tipb::ScalarFuncSig::RoundInt}, + {"round_uint", tipb::ScalarFuncSig::RoundInt}, + {"round_dec", tipb::ScalarFuncSig::RoundDec}, + {"round_real", tipb::ScalarFuncSig::RoundReal}, + {"round_with_frac_int", tipb::ScalarFuncSig::RoundWithFracInt}, + {"round_with_frac_uint", tipb::ScalarFuncSig::RoundWithFracInt}, + {"round_with_frac_dec", tipb::ScalarFuncSig::RoundWithFracDec}, + {"round_with_frac_real", tipb::ScalarFuncSig::RoundWithFracReal}, +}); + +std::unordered_map agg_func_name_to_sig({ + {"min", tipb::ExprType::Min}, + {"max", tipb::ExprType::Max}, + {"count", tipb::ExprType::Count}, + {"sum", tipb::ExprType::Sum}, + {"first_row", tipb::ExprType::First}, + {"uniqRawRes", tipb::ExprType::ApproxCountDistinct}, + {"group_concat", tipb::ExprType::GroupConcat}, +}); + +std::unordered_map window_func_name_to_sig({ + {"RowNumber", tipb::ExprType::RowNumber}, + {"Rank", tipb::ExprType::Rank}, + {"DenseRank", tipb::ExprType::DenseRank}, + {"Lead", tipb::ExprType::Lead}, + {"Lag", tipb::ExprType::Lag}, +}); +} // namespace DB::tests diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 1af655825e3..dacc459b81c 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include #include #include #include diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index 59042dfc61e..51db6865011 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Server/FlashGrpcServerHolder.h b/dbms/src/Server/FlashGrpcServerHolder.h index b939a6bd08e..ae1edddca40 100644 --- a/dbms/src/Server/FlashGrpcServerHolder.h +++ b/dbms/src/Server/FlashGrpcServerHolder.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp index ea19ff08dd3..73168ba2cec 100644 --- a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp +++ b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp @@ -13,7 +13,7 @@ // limitations under the License. #include -#include +#include #include #include #include diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index 6921538b1c2..0e4232ac7e7 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -13,7 +13,7 @@ // limitations under the License. #include -#include +#include #include #include #include diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 2e9f8180034..55ad992ca06 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -15,8 +15,8 @@ #pragma once #include +#include #include -#include #include #include #include From a5c047f76ccf192acceedd64b8b179ef4110adbc Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Thu, 15 Sep 2022 19:44:59 +0800 Subject: [PATCH 10/17] Fix crash when do gc on table with empty stable (#5896) close pingcap/tiflash#5892 --- dbms/src/Storages/DeltaMerge/StableValueSpace.cpp | 6 ++++-- dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp | 8 +++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 2dc338b557e..1c3e8de30ab 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -420,11 +420,13 @@ std::pair StableValueSpace::Snapshot::isFirstAndLastPackIncludedInRa const auto & use_packs = filter.getUsePacks(); if (i == 0) { - first_pack_included = use_packs[0]; + // TODO: this check may not be correct when support multiple files in a stable, let's just keep it now for simplicity + first_pack_included = use_packs.empty() || use_packs[0]; } if (i == stable->files.size() - 1) { - last_pack_included = use_packs[use_packs.size() - 1]; + // TODO: this check may not be correct when support multiple files in a stable, let's just keep it now for simplicity + last_pack_included = use_packs.empty() || use_packs.back(); } } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp index 3a776e82ad3..03bf00fcaee 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -455,10 +455,16 @@ try reloadWithOptions(options); } + auto invalid_data_ratio_threshold = dm_context->db_context.getSettingsRef().dt_bg_gc_delta_delete_ratio_to_trigger_gc; + { + auto segment = segments[DELTA_MERGE_FIRST_SEGMENT_ID]; + auto snap = segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); + ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, segment, snap, /* prev_seg */ nullptr, /* next_seg */ nullptr, invalid_data_ratio_threshold, log)); + } + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1000); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); - auto invalid_data_ratio_threshold = dm_context->db_context.getSettingsRef().dt_bg_gc_delta_delete_ratio_to_trigger_gc; { auto segment = segments[DELTA_MERGE_FIRST_SEGMENT_ID]; auto snap = segment->createSnapshot(*dm_context, /* for_update */ true, CurrentMetrics::DT_SnapshotOfDeltaMerge); From f333965cc0e7e4d5aee399008f425659740cd253 Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Thu, 15 Sep 2022 20:56:59 +0800 Subject: [PATCH 11/17] storage: Allow specifying split point and split method in the segment API (#5890) ref pingcap/tiflash#5237 --- dbms/src/Common/FailPoint.cpp | 94 ++- dbms/src/Common/TiFlashMetrics.h | 7 +- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 2 +- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 60 +- .../DeltaMerge/DeltaMergeStore_InternalBg.cpp | 2 +- .../DeltaMergeStore_InternalSegment.cpp | 58 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 152 ++-- dbms/src/Storages/DeltaMerge/Segment.h | 91 ++- .../DeltaMerge/tests/MultiSegmentTestUtil.h | 2 +- .../tests/gtest_dm_delta_merge_store.cpp | 4 +- .../DeltaMerge/tests/gtest_segment.cpp | 692 +++++++++++++++--- .../tests/gtest_segment_test_basic.cpp | 280 ++++--- .../tests/gtest_segment_test_basic.h | 79 +- .../tests/gtest_segment_test_randomized.cpp | 261 +++++++ 14 files changed, 1329 insertions(+), 455 deletions(-) create mode 100644 dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_randomized.cpp diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index f14d7a2d91a..14dbca82bcf 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -27,55 +27,51 @@ namespace DB { std::unordered_map> FailPointHelper::fail_point_wait_channels; -#define APPLY_FOR_FAILPOINTS_ONCE(M) \ - M(exception_between_drop_meta_and_data) \ - M(exception_between_alter_data_and_meta) \ - M(exception_drop_table_during_remove_meta) \ - M(exception_between_rename_table_data_and_metadata) \ - M(exception_between_create_database_meta_and_directory) \ - M(exception_before_rename_table_old_meta_removed) \ - M(exception_after_step_1_in_exchange_partition) \ - M(exception_before_step_2_rename_in_exchange_partition) \ - M(exception_after_step_2_in_exchange_partition) \ - M(exception_before_step_3_rename_in_exchange_partition) \ - M(exception_after_step_3_in_exchange_partition) \ - M(region_exception_after_read_from_storage_some_error) \ - M(region_exception_after_read_from_storage_all_error) \ - M(exception_before_dmfile_remove_encryption) \ - M(exception_before_dmfile_remove_from_disk) \ - M(force_enable_region_persister_compatible_mode) \ - M(force_disable_region_persister_compatible_mode) \ - M(force_triggle_background_merge_delta) \ - M(force_triggle_foreground_flush) \ - M(exception_before_mpp_register_non_root_mpp_task) \ - M(exception_before_mpp_register_tunnel_for_non_root_mpp_task) \ - M(exception_during_mpp_register_tunnel_for_non_root_mpp_task) \ - M(exception_before_mpp_non_root_task_run) \ - M(exception_during_mpp_non_root_task_run) \ - M(exception_before_mpp_register_root_mpp_task) \ - M(exception_before_mpp_register_tunnel_for_root_mpp_task) \ - M(exception_before_mpp_root_task_run) \ - M(exception_during_mpp_root_task_run) \ - M(exception_during_mpp_write_err_to_tunnel) \ - M(exception_during_mpp_close_tunnel) \ - M(exception_during_write_to_storage) \ - M(force_set_sst_to_dtfile_block_size) \ - M(force_set_sst_decode_rand) \ - M(exception_before_page_file_write_sync) \ - M(force_set_segment_ingest_packs_fail) \ - M(segment_merge_after_ingest_packs) \ - M(force_formal_page_file_not_exists) \ - M(force_legacy_or_checkpoint_page_file_exists) \ - M(exception_in_creating_set_input_stream) \ - M(exception_when_read_from_log) \ - M(exception_mpp_hash_build) \ - M(exception_before_drop_segment) \ - M(exception_after_drop_segment) \ - M(exception_between_schema_change_in_the_same_diff) \ - /* try to use logical split, could fall back to physical split */ \ - M(try_segment_logical_split) \ - /* must perform logical split, otherwise throw exception */ \ - M(force_segment_logical_split) +#define APPLY_FOR_FAILPOINTS_ONCE(M) \ + M(exception_between_drop_meta_and_data) \ + M(exception_between_alter_data_and_meta) \ + M(exception_drop_table_during_remove_meta) \ + M(exception_between_rename_table_data_and_metadata) \ + M(exception_between_create_database_meta_and_directory) \ + M(exception_before_rename_table_old_meta_removed) \ + M(exception_after_step_1_in_exchange_partition) \ + M(exception_before_step_2_rename_in_exchange_partition) \ + M(exception_after_step_2_in_exchange_partition) \ + M(exception_before_step_3_rename_in_exchange_partition) \ + M(exception_after_step_3_in_exchange_partition) \ + M(region_exception_after_read_from_storage_some_error) \ + M(region_exception_after_read_from_storage_all_error) \ + M(exception_before_dmfile_remove_encryption) \ + M(exception_before_dmfile_remove_from_disk) \ + M(force_enable_region_persister_compatible_mode) \ + M(force_disable_region_persister_compatible_mode) \ + M(force_triggle_background_merge_delta) \ + M(force_triggle_foreground_flush) \ + M(exception_before_mpp_register_non_root_mpp_task) \ + M(exception_before_mpp_register_tunnel_for_non_root_mpp_task) \ + M(exception_during_mpp_register_tunnel_for_non_root_mpp_task) \ + M(exception_before_mpp_non_root_task_run) \ + M(exception_during_mpp_non_root_task_run) \ + M(exception_before_mpp_register_root_mpp_task) \ + M(exception_before_mpp_register_tunnel_for_root_mpp_task) \ + M(exception_before_mpp_root_task_run) \ + M(exception_during_mpp_root_task_run) \ + M(exception_during_mpp_write_err_to_tunnel) \ + M(exception_during_mpp_close_tunnel) \ + M(exception_during_write_to_storage) \ + M(force_set_sst_to_dtfile_block_size) \ + M(force_set_sst_decode_rand) \ + M(exception_before_page_file_write_sync) \ + M(force_set_segment_ingest_packs_fail) \ + M(segment_merge_after_ingest_packs) \ + M(force_formal_page_file_not_exists) \ + M(force_legacy_or_checkpoint_page_file_exists) \ + M(exception_in_creating_set_input_stream) \ + M(exception_when_read_from_log) \ + M(exception_mpp_hash_build) \ + M(exception_before_drop_segment) \ + M(exception_after_drop_segment) \ + M(exception_between_schema_change_in_the_same_diff) #define APPLY_FOR_FAILPOINTS(M) \ M(skip_check_segment_update) \ diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 5ab33efec85..20ad1e2482f 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -113,7 +113,9 @@ namespace DB F(type_delta_merge_manual, {"type", "delta_merge_manual"}), \ F(type_delta_compact, {"type", "delta_compact"}), \ F(type_delta_flush, {"type", "delta_flush"}), \ - F(type_seg_split, {"type", "seg_split"}), F(type_seg_split_fg, {"type", "seg_split_fg"}), \ + F(type_seg_split_bg, {"type", "seg_split_bg"}), \ + F(type_seg_split_fg, {"type", "seg_split_fg"}), \ + F(type_seg_split_ingest, {"type", "seg_split_ingest"}), \ F(type_seg_merge, {"type", "seg_merge"}), F(type_seg_merge_fg, {"type", "seg_merge_fg"}), \ F(type_place_index_update, {"type", "place_index_update"})) \ M(tiflash_storage_subtask_duration_seconds, "Bucketed histogram of storage's sub task duration", Histogram, \ @@ -123,8 +125,9 @@ namespace DB F(type_delta_merge_manual, {{"type", "delta_merge_manual"}}, ExpBuckets{0.001, 2, 20}), \ F(type_delta_compact, {{"type", "delta_compact"}}, ExpBuckets{0.001, 2, 20}), \ F(type_delta_flush, {{"type", "delta_flush"}}, ExpBuckets{0.001, 2, 20}), \ - F(type_seg_split, {{"type", "seg_split"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split_bg, {{"type", "seg_split_bg"}}, ExpBuckets{0.001, 2, 20}), \ F(type_seg_split_fg, {{"type", "seg_split_fg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split_ingest, {{"type", "seg_split_ingest"}}, ExpBuckets{0.001, 2, 20}), \ F(type_seg_merge, {{"type", "seg_merge"}}, ExpBuckets{0.001, 2, 20}), \ F(type_seg_merge_fg, {{"type", "seg_merge_fg"}}, ExpBuckets{0.001, 2, 20}), \ F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.001, 2, 20})) \ diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 255997e1d5b..fc6cbb49795 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -1298,7 +1298,7 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const GET_METRIC(tiflash_storage_write_stall_duration_seconds, type_split).Observe(watch.elapsedSeconds()); }); - return segmentSplit(*dm_context, my_segment, true).first != nullptr; + return segmentSplit(*dm_context, my_segment, SegmentSplitReason::ForegroundWrite).first != nullptr; } return false; }; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index cc61ca052b9..d936920e422 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -179,14 +179,6 @@ class DeltaMergeStore : private boost::noncopyable PlaceIndex, }; - enum MergeDeltaReason - { - BackgroundThreadPool, - BackgroundGCThread, - ForegroundWrite, - Manual, - }; - struct BackgroundTask { TaskType type; @@ -437,11 +429,45 @@ class DeltaMergeStore : private boost::noncopyable */ void checkSegmentUpdate(const DMContextPtr & context, const SegmentPtr & segment, ThreadType thread_type); + enum class SegmentSplitReason + { + ForegroundWrite, + Background, + IngestBySplit, + }; + + /** + * Note: This enum simply shadows Segment::SplitMode without introducing the whole Segment into this header. + */ + enum class SegmentSplitMode + { + /** + * Split according to settings. + * + * If logical split is allowed in the settings, logical split will be tried first. + * Logical split may fall back to physical split when calculating split point failed. + */ + Auto, + + /** + * Do logical split. If split point is not specified and cannot be calculated out, + * the split will fail. + */ + Logical, + + /** + * Do physical split. + */ + Physical, + }; + /** * Split the segment into two. * After splitting, the segment will be abandoned (with `segment->hasAbandoned() == true`) and the new two segments will be returned. + * + * When `opt_split_at` is not specified, this function will try to find a mid point for splitting, and may lead to failures. */ - SegmentPair segmentSplit(DMContext & dm_context, const SegmentPtr & segment, bool is_foreground); + SegmentPair segmentSplit(DMContext & dm_context, const SegmentPtr & segment, SegmentSplitReason reason, std::optional opt_split_at = std::nullopt, SegmentSplitMode opt_split_mode = SegmentSplitMode::Auto); /** * Merge multiple continuous segments (order by segment start key) into one. @@ -451,6 +477,14 @@ class DeltaMergeStore : private boost::noncopyable */ SegmentPtr segmentMerge(DMContext & dm_context, const std::vector & ordered_segments, bool is_foreground); + enum class MergeDeltaReason + { + BackgroundThreadPool, + BackgroundGCThread, + ForegroundWrite, + Manual, + }; + /** * Merge the delta (major compaction) in the segment. * After delta-merging, the segment will be abandoned (with `segment->hasAbandoned() == true`) and a new segment will be returned. @@ -461,10 +495,6 @@ class DeltaMergeStore : private boost::noncopyable MergeDeltaReason reason, SegmentSnapshotPtr segment_snap = nullptr); - bool updateGCSafePoint(); - - bool handleBackgroundTask(bool heavy); - // isSegmentValid should be protected by lock on `read_write_mutex` inline bool isSegmentValid(const std::shared_lock &, const SegmentPtr & segment) { @@ -476,6 +506,10 @@ class DeltaMergeStore : private boost::noncopyable } bool doIsSegmentValid(const SegmentPtr & segment); + bool updateGCSafePoint(); + + bool handleBackgroundTask(bool heavy); + void restoreStableFiles(); SegmentReadTasks getReadTasksByRanges(DMContext & dm_context, diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp index cee4553cb7e..4979bc793f3 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp @@ -232,7 +232,7 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) switch (task.type) { case TaskType::Split: - std::tie(left, right) = segmentSplit(*task.dm_context, task.segment, false); + std::tie(left, right) = segmentSplit(*task.dm_context, task.segment, SegmentSplitReason::Background); type = ThreadType::BG_Split; break; case TaskType::MergeDelta: diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp index 11616c9a9ae..1a0da119a7c 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp @@ -37,12 +37,14 @@ namespace DB namespace DM { -SegmentPair DeltaMergeStore::segmentSplit(DMContext & dm_context, const SegmentPtr & segment, bool is_foreground) +SegmentPair DeltaMergeStore::segmentSplit(DMContext & dm_context, const SegmentPtr & segment, SegmentSplitReason reason, std::optional opt_split_at, SegmentSplitMode opt_split_mode) { LOG_FMT_INFO( log, - "Split - Begin, is_foreground={} safe_point={} segment={}", - is_foreground, + "Split - Begin, mode={} reason={}{} safe_point={} segment={}", + magic_enum::enum_name(opt_split_mode), + magic_enum::enum_name(reason), + (opt_split_at.has_value() ? fmt::format(" force_split_at={}", opt_split_at->toDebugString()) : ""), dm_context.min_version, segment->info()); @@ -75,28 +77,62 @@ SegmentPair DeltaMergeStore::segmentSplit(DMContext & dm_context, const SegmentP size_t duplicated_rows = 0; CurrentMetrics::Increment cur_dm_segments{CurrentMetrics::DT_SegmentSplit}; - if (is_foreground) + switch (reason) + { + case SegmentSplitReason::ForegroundWrite: GET_METRIC(tiflash_storage_subtask_count, type_seg_split_fg).Increment(); - else - GET_METRIC(tiflash_storage_subtask_count, type_seg_split).Increment(); + break; + case SegmentSplitReason::Background: + GET_METRIC(tiflash_storage_subtask_count, type_seg_split_bg).Increment(); + break; + case SegmentSplitReason::IngestBySplit: + GET_METRIC(tiflash_storage_subtask_count, type_seg_split_ingest).Increment(); + break; + } + Stopwatch watch_seg_split; SCOPE_EXIT({ - if (is_foreground) + switch (reason) + { + case SegmentSplitReason::ForegroundWrite: GET_METRIC(tiflash_storage_subtask_duration_seconds, type_seg_split_fg).Observe(watch_seg_split.elapsedSeconds()); - else - GET_METRIC(tiflash_storage_subtask_duration_seconds, type_seg_split).Observe(watch_seg_split.elapsedSeconds()); + break; + case SegmentSplitReason::Background: + GET_METRIC(tiflash_storage_subtask_duration_seconds, type_seg_split_bg).Observe(watch_seg_split.elapsedSeconds()); + break; + case SegmentSplitReason::IngestBySplit: + GET_METRIC(tiflash_storage_subtask_duration_seconds, type_seg_split_ingest).Observe(watch_seg_split.elapsedSeconds()); + break; + } }); WriteBatches wbs(*storage_pool, dm_context.getWriteLimiter()); + Segment::SplitMode seg_split_mode; + switch (opt_split_mode) + { + case SegmentSplitMode::Auto: + seg_split_mode = Segment::SplitMode::Auto; + break; + case SegmentSplitMode::Logical: + seg_split_mode = Segment::SplitMode::Logical; + break; + case SegmentSplitMode::Physical: + seg_split_mode = Segment::SplitMode::Physical; + break; + default: + seg_split_mode = Segment::SplitMode::Auto; + break; + } + auto range = segment->getRowKeyRange(); - auto split_info_opt = segment->prepareSplit(dm_context, schema_snap, segment_snap, wbs); + auto split_info_opt = segment->prepareSplit(dm_context, schema_snap, segment_snap, opt_split_at, seg_split_mode, wbs); if (!split_info_opt.has_value()) { // Likely we can not find an appropriate split point for this segment later, forbid the split until this segment get updated through applying delta-merge. Or it will slow down the write a lot. segment->forbidSplit(); - LOG_FMT_WARNING(log, "Split - Give up segmentSplit and forbid later split because of prepare split failed, segment={}", segment->simpleInfo()); + LOG_FMT_WARNING(log, "Split - Give up segmentSplit and forbid later auto split because prepare split failed, segment={}", segment->simpleInfo()); return {}; } diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index fdaef7f8401..ca702dc38f9 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -95,12 +95,6 @@ extern const int LOGICAL_ERROR; extern const int UNKNOWN_FORMAT_VERSION; } // namespace ErrorCodes -namespace FailPoints -{ -extern const char try_segment_logical_split[]; -extern const char force_segment_logical_split[]; -} // namespace FailPoints - namespace DM { const static size_t SEGMENT_BUFFER_SIZE = 128; // More than enough. @@ -712,14 +706,14 @@ SegmentPtr Segment::applyMergeDelta(const Segment::Lock &, // return new_me; } -SegmentPair Segment::split(DMContext & dm_context, const ColumnDefinesPtr & schema_snap) const +SegmentPair Segment::split(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, std::optional opt_split_at, SplitMode opt_split_mode) const { WriteBatches wbs(dm_context.storage_pool, dm_context.getWriteLimiter()); auto segment_snap = createSnapshot(dm_context, true, CurrentMetrics::DT_SnapshotOfSegmentSplit); if (!segment_snap) return {}; - auto split_info_opt = prepareSplit(dm_context, schema_snap, segment_snap, wbs); + auto split_info_opt = prepareSplit(dm_context, schema_snap, segment_snap, opt_split_at, opt_split_mode, wbs); if (!split_info_opt.has_value()) return {}; @@ -746,7 +740,7 @@ std::optional Segment::getSplitPointFast(DMContext & dm_context, co EventRecorder recorder(ProfileEvents::DMSegmentGetSplitPoint, ProfileEvents::DMSegmentGetSplitPointNS); auto stable_rows = stable_snap->getRows(); if (unlikely(!stable_rows)) - throw Exception("No stable rows"); + return {}; size_t split_row_index = stable_rows / 2; @@ -918,63 +912,104 @@ std::optional Segment::getSplitPointSlow( return {split_point}; } +bool isSplitPointValid(const RowKeyRange & segment_range, const RowKeyValueRef & split_point) +{ + return segment_range.check(split_point) && // + compare(split_point, segment_range.getStart()) != 0; +} + std::optional Segment::prepareSplit(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, const SegmentSnapshotPtr & segment_snap, + std::optional opt_split_at, + Segment::SplitMode split_mode, WriteBatches & wbs) const { SYNC_FOR("before_Segment::prepareSplit"); - bool try_logical_split = dm_context.enable_logical_split // - && segment_snap->stable->getPacks() > 3 // - && segment_snap->delta->getRows() <= segment_snap->stable->getRows(); -#ifdef FIU_ENABLE - bool force_logical_split = false; - fiu_do_on(FailPoints::try_segment_logical_split, { try_logical_split = true; }); - fiu_do_on(FailPoints::force_segment_logical_split, { try_logical_split = true; force_logical_split = true; }); -#endif + if (opt_split_at.has_value()) + { + if (!isSplitPointValid(rowkey_range, opt_split_at->toRowKeyValueRef())) + { + LOG_FMT_WARNING(log, "Split - Split skipped because the specified split point is invalid, split_point={}", opt_split_at.value().toDebugString()); + return std::nullopt; + } + } - if (!try_logical_split) + SplitMode try_split_mode = split_mode; + // We will only try either LogicalSplit or PhysicalSplit. + if (split_mode == SplitMode::Auto) { - return prepareSplitPhysical(dm_context, schema_snap, segment_snap, wbs); + if (opt_split_at.has_value()) + { + if (dm_context.enable_logical_split) + try_split_mode = SplitMode::Logical; + else + try_split_mode = SplitMode::Physical; + } + else + { + // When split point is not specified, there are some preconditions in order to use logical split. + if (!dm_context.enable_logical_split // + || segment_snap->stable->getPacks() <= 3 // + || segment_snap->delta->getRows() > segment_snap->stable->getRows()) + { + try_split_mode = SplitMode::Physical; + } + else + { + try_split_mode = SplitMode::Logical; + } + } } - else + + switch (try_split_mode) + { + case SplitMode::Logical: { - auto split_point_opt = getSplitPointFast(dm_context, segment_snap->stable); + auto [split_info_or_null, status] = prepareSplitLogical(dm_context, schema_snap, segment_snap, opt_split_at, wbs); + if (status == PrepareSplitLogicalStatus::FailCalculateSplitPoint && split_mode == SplitMode::Auto) + // Fallback to use physical split if possible. + return prepareSplitPhysical(dm_context, schema_snap, segment_snap, std::nullopt, wbs); + else + return split_info_or_null; + } + case SplitMode::Physical: + return prepareSplitPhysical(dm_context, schema_snap, segment_snap, opt_split_at, wbs); + default: + RUNTIME_CHECK(false, try_split_mode); + } +} - bool bad_split_point = !split_point_opt.has_value() || !rowkey_range.check(split_point_opt->toRowKeyValueRef()) - || compare(split_point_opt->toRowKeyValueRef(), rowkey_range.getStart()) == 0; - if (bad_split_point) +std::pair, Segment::PrepareSplitLogicalStatus> // +Segment::prepareSplitLogical(DMContext & dm_context, // + const ColumnDefinesPtr & /*schema_snap*/, + const SegmentSnapshotPtr & segment_snap, + std::optional opt_split_point, + WriteBatches & wbs) const +{ + LOG_FMT_DEBUG(log, "Split - SplitLogical - Begin prepare, opt_split_point={}", opt_split_point.has_value() ? opt_split_point->toDebugString() : "(null)"); + + if (!opt_split_point.has_value()) + { + opt_split_point = getSplitPointFast(dm_context, segment_snap->stable); + if (!opt_split_point.has_value() || !isSplitPointValid(rowkey_range, opt_split_point->toRowKeyValueRef())) { LOG_FMT_INFO( log, - "Split - Got bad split point, fall back to split physical, split_point={} segment={}", - (split_point_opt.has_value() ? split_point_opt->toRowKeyValueRef().toDebugString() : "no value"), + "Split - SplitLogical - Fail to calculate out a valid split point, calculated_split_point={} segment={}", + (opt_split_point.has_value() ? opt_split_point->toDebugString() : "(null)"), info()); -#ifdef FIU_ENABLE - RUNTIME_CHECK_MSG(!force_logical_split, "Can not perform logical split while failpoint `force_segment_logical_split` is true"); -#endif - return prepareSplitPhysical(dm_context, schema_snap, segment_snap, wbs); + return {std::nullopt, PrepareSplitLogicalStatus::FailCalculateSplitPoint}; } - else - return prepareSplitLogical(dm_context, schema_snap, segment_snap, split_point_opt.value(), wbs); } -} - -std::optional Segment::prepareSplitLogical(DMContext & dm_context, - const ColumnDefinesPtr & /*schema_snap*/, - const SegmentSnapshotPtr & segment_snap, - RowKeyValue & split_point, - WriteBatches & wbs) const -{ - LOG_FMT_DEBUG(log, "Split - SplitLogical - Begin prepare"); EventRecorder recorder(ProfileEvents::DMSegmentSplit, ProfileEvents::DMSegmentSplitNS); auto & storage_pool = dm_context.storage_pool; - RowKeyRange my_range(rowkey_range.start, split_point, is_common_handle, rowkey_column_size); - RowKeyRange other_range(split_point, rowkey_range.end, is_common_handle, rowkey_column_size); + RowKeyRange my_range(rowkey_range.start, opt_split_point.value(), is_common_handle, rowkey_column_size); + RowKeyRange other_range(opt_split_point.value(), rowkey_range.end, is_common_handle, rowkey_column_size); if (my_range.none() || other_range.none()) { @@ -983,7 +1018,7 @@ std::optional Segment::prepareSplitLogical(DMContext & dm_co "Split - SplitLogical - Unexpected range, aborted, my_range: {}, other_range: {}", my_range.toDebugString(), other_range.toDebugString()); - return {}; + return {std::nullopt, PrepareSplitLogicalStatus::FailOther}; } GenPageId log_gen_page_id = [&]() { @@ -1035,26 +1070,34 @@ std::optional Segment::prepareSplitLogical(DMContext & dm_co my_stable->setFiles(my_stable_files, my_range, &dm_context); other_stable->setFiles(other_stable_files, other_range, &dm_context); - LOG_FMT_DEBUG(log, "Split - SplitLogical - Finish prepare, segment={} split_point={}", info(), split_point.toDebugString()); + LOG_FMT_DEBUG(log, "Split - SplitLogical - Finish prepare, segment={} split_point={}", info(), opt_split_point->toDebugString()); - return {SplitInfo{true, split_point, my_stable, other_stable}}; + return {SplitInfo{ + .is_logical = true, + .split_point = opt_split_point.value(), + .my_stable = my_stable, + .other_stable = other_stable}, + PrepareSplitLogicalStatus::Success}; } std::optional Segment::prepareSplitPhysical(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, const SegmentSnapshotPtr & segment_snap, + std::optional opt_split_point, WriteBatches & wbs) const { - LOG_FMT_DEBUG(log, "Split - SplitPhysical - Begin prepare"); + LOG_FMT_DEBUG(log, "Split - SplitPhysical - Begin prepare, opt_split_point={}", opt_split_point.has_value() ? opt_split_point->toDebugString() : "(null)"); EventRecorder recorder(ProfileEvents::DMSegmentSplit, ProfileEvents::DMSegmentSplitNS); auto read_info = getReadInfo(dm_context, *schema_snap, segment_snap, {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - auto split_point_opt = getSplitPointSlow(dm_context, read_info, segment_snap); - if (!split_point_opt.has_value()) + + if (!opt_split_point.has_value()) + opt_split_point = getSplitPointSlow(dm_context, read_info, segment_snap); + if (!opt_split_point.has_value()) return {}; - const auto & split_point = split_point_opt.value(); + const auto & split_point = opt_split_point.value(); RowKeyRange my_range(rowkey_range.start, split_point, is_common_handle, rowkey_column_size); RowKeyRange other_range(split_point, rowkey_range.end, is_common_handle, rowkey_column_size); @@ -1066,7 +1109,7 @@ std::optional Segment::prepareSplitPhysical(DMContext & dm_c "Split - SplitPhysical - Unexpected range, aborted, my_range: {}, other_range: {}", my_range.toDebugString(), other_range.toDebugString()); - return {}; + return std::nullopt; } StableValueSpacePtr my_new_stable; @@ -1139,7 +1182,12 @@ std::optional Segment::prepareSplitPhysical(DMContext & dm_c LOG_FMT_DEBUG(log, "Split - SplitPhysical - Finish prepare, segment={} split_point={}", info(), split_point.toDebugString()); - return {SplitInfo{false, split_point, my_new_stable, other_stable}}; + return SplitInfo{ + .is_logical = false, + .split_point = split_point, + .my_stable = my_new_stable, + .other_stable = other_stable, + }; } SegmentPair Segment::applySplit(const Segment::Lock &, // diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index 422bb87b125..318a5150068 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -196,18 +196,54 @@ class Segment : private boost::noncopyable /// For those split, merge and mergeDelta methods, we should use prepareXXX/applyXXX combo in real production. /// split(), merge() and mergeDelta() are only used in test cases. + /** + * Note: There is also DeltaMergeStore::SegmentSplitMode, which shadows this enum. + */ + enum class SplitMode + { + /** + * Split according to settings. + * + * If logical split is allowed in the settings, logical split will be tried first. + * Logical split may fall back to physical split when calculating split point failed. + */ + Auto, + + /** + * Do logical split. If split point is not specified and cannot be calculated out, + * the split will fail. + */ + Logical, + + /** + * Do physical split. + */ + Physical, + }; + /** * Only used in tests as a shortcut. * Normally you should use `prepareSplit` and `applySplit`. */ - [[nodiscard]] SegmentPair split(DMContext & dm_context, const ColumnDefinesPtr & schema_snap) const; + [[nodiscard]] SegmentPair split(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, std::optional opt_split_at = std::nullopt, SplitMode opt_split_mode = SplitMode::Auto) const; std::optional prepareSplit( DMContext & dm_context, const ColumnDefinesPtr & schema_snap, const SegmentSnapshotPtr & segment_snap, + std::optional opt_split_at, + SplitMode split_mode, WriteBatches & wbs) const; + std::optional prepareSplit( + DMContext & dm_context, + const ColumnDefinesPtr & schema_snap, + const SegmentSnapshotPtr & segment_snap, + WriteBatches & wbs) const + { + return prepareSplit(dm_context, schema_snap, segment_snap, std::nullopt, SplitMode::Auto, wbs); + } + /** * Should be protected behind the Segment update lock. */ @@ -218,6 +254,36 @@ class Segment : private boost::noncopyable WriteBatches & wbs, SplitInfo & split_info) const; + /// Merge delta & stable, and then take the middle one. + std::optional getSplitPointSlow( + DMContext & dm_context, + const ReadInfo & read_info, + const SegmentSnapshotPtr & segment_snap) const; + /// Only look up in the stable vs. + std::optional getSplitPointFast( + DMContext & dm_context, + const StableSnapshotPtr & stable_snap) const; + + enum class PrepareSplitLogicalStatus + { + Success, + FailCalculateSplitPoint, + FailOther, + }; + + std::pair, PrepareSplitLogicalStatus> prepareSplitLogical( + DMContext & dm_context, + const ColumnDefinesPtr & schema_snap, + const SegmentSnapshotPtr & segment_snap, + std::optional opt_split_point, + WriteBatches & wbs) const; + std::optional prepareSplitPhysical( + DMContext & dm_context, + const ColumnDefinesPtr & schema_snap, + const SegmentSnapshotPtr & segment_snap, + std::optional opt_split_point, + WriteBatches & wbs) const; + /** * Only used in tests as a shortcut. * Normally you should use `prepareMerge` and `applyMerge`. @@ -367,29 +433,6 @@ class Segment : private boost::noncopyable size_t expected_block_size, UInt64 max_version = std::numeric_limits::max()); - /// Merge delta & stable, and then take the middle one. - std::optional getSplitPointSlow( - DMContext & dm_context, - const ReadInfo & read_info, - const SegmentSnapshotPtr & segment_snap) const; - /// Only look up in the stable vs. - std::optional getSplitPointFast( - DMContext & dm_context, - const StableSnapshotPtr & stable_snap) const; - - std::optional prepareSplitLogical( - DMContext & dm_context, - const ColumnDefinesPtr & schema_snap, - const SegmentSnapshotPtr & segment_snap, - RowKeyValue & split_point, - WriteBatches & wbs) const; - std::optional prepareSplitPhysical( - DMContext & dm_context, - const ColumnDefinesPtr & schema_snap, - const SegmentSnapshotPtr & segment_snap, - WriteBatches & wbs) const; - - /// Make sure that all delta packs have been placed. /// Note that the index returned could be partial index, and cannot be updated to shared index. /// Returns diff --git a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h index 429def67d41..be563c38053 100644 --- a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h +++ b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h @@ -150,7 +150,7 @@ class MultiSegmentTestUtil : private boost::noncopyable store->read_write_mutex.lock(); auto seg = std::next(store->segments.begin(), segment_idx)->second; store->read_write_mutex.unlock(); - auto result = store->segmentSplit(*dm_context, seg, /*is_foreground*/ true); + auto result = store->segmentSplit(*dm_context, seg, DeltaMergeStore::SegmentSplitReason::ForegroundWrite); if (result.first) { break; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index c10dbd6df35..4422a9e368c 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -1161,7 +1161,7 @@ try SegmentPtr seg; std::tie(std::ignore, seg) = *store->segments.begin(); - store->segmentSplit(*dm_context, seg, /*is_foreground*/ true); + store->segmentSplit(*dm_context, seg, DeltaMergeStore::SegmentSplitReason::ForegroundWrite); } const UInt64 tso2 = 10; @@ -3335,7 +3335,7 @@ try // Split segment1 into 2. auto dm_context = store->newDMContext(*db_context, db_context->getSettingsRef(), "test"); auto segment1 = std::next(store->segments.begin())->second; - auto result = store->segmentSplit(*dm_context, segment1, /*is_foreground*/ true); + auto result = store->segmentSplit(*dm_context, segment1, DeltaMergeStore::SegmentSplitReason::ForegroundWrite); ASSERT_NE(result.second, nullptr); helper->resetExpectedRows(); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp index 03bf00fcaee..814e5443258 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #include #include #include @@ -31,12 +32,6 @@ extern const Metric DT_SnapshotOfDeltaMerge; namespace DB { -namespace FailPoints -{ -extern const char try_segment_logical_split[]; -extern const char force_segment_logical_split[]; -} // namespace FailPoints - namespace DM { namespace GC @@ -54,22 +49,12 @@ namespace tests class SegmentOperationTest : public SegmentTestBasic { protected: - static void SetUpTestCase() {} - - void SetUp() override - { - log = DB::Logger::get("SegmentOperationTest"); - } - - DB::LoggerPtr log; + DB::LoggerPtr log = DB::Logger::get("SegmentOperationTest"); }; TEST_F(SegmentOperationTest, Issue4956) try { - SegmentTestOptions options; - reloadWithOptions(options); - // flush data, make the segment can be split. writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); @@ -86,8 +71,6 @@ CATCH TEST_F(SegmentOperationTest, TestSegment) try { - SegmentTestOptions options; - reloadWithOptions(options); writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); @@ -106,29 +89,9 @@ try } CATCH -TEST_F(SegmentOperationTest, TestSegmentMemTableDataAfterSplit) -try -{ - SegmentTestOptions options; - reloadWithOptions(options); - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); - flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); - mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); - - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 70); // Write data without flush - auto segment_id_2nd = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); - ASSERT_TRUE(segment_id_2nd.has_value()); - ASSERT_EQ(segments.size(), 2); - // The mem table data may be fallen in either segment (as we write randomly). - ASSERT_EQ(getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID) + getSegmentRowNum(*segment_id_2nd), 170); -} -CATCH - TEST_F(SegmentOperationTest, TestSegmentMergeTwo) try { - SegmentTestOptions options; - reloadWithOptions(options); writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); @@ -160,8 +123,6 @@ CATCH TEST_F(SegmentOperationTest, TestSegmentMergeThree) try { - SegmentTestOptions options; - reloadWithOptions(options); writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); @@ -191,8 +152,6 @@ CATCH TEST_F(SegmentOperationTest, TestSegmentMergeInvalid) try { - SegmentTestOptions options; - reloadWithOptions(options); writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); @@ -205,21 +164,10 @@ try } CATCH -TEST_F(SegmentOperationTest, TestSegmentRandom) -try -{ - SegmentTestOptions options; - options.is_common_handle = true; - reloadWithOptions(options); - randomSegmentTest(100); -} -CATCH TEST_F(SegmentOperationTest, WriteDuringSegmentMergeDelta) try { - SegmentTestOptions options; - reloadWithOptions(options); writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); @@ -276,8 +224,6 @@ CATCH TEST_F(SegmentOperationTest, WriteDuringSegmentSplit) try { - SegmentTestOptions options; - reloadWithOptions(options); writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); @@ -289,7 +235,7 @@ try auto sp_seg_split_apply = SyncPointCtl::enableInScope("before_Segment::applySplit"); PageId new_seg_id; auto th_seg_split = std::async([&]() { - auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, /* check_rows */ false); + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Auto, /* check_rows */ false); ASSERT_TRUE(new_seg_id_opt.has_value()); new_seg_id = new_seg_id_opt.value(); }); @@ -338,8 +284,6 @@ CATCH TEST_F(SegmentOperationTest, WriteDuringSegmentMerge) try { - SegmentTestOptions options; - reloadWithOptions(options); writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); @@ -397,16 +341,6 @@ try } CATCH -// run in CI weekly -TEST_F(SegmentOperationTest, DISABLED_TestSegmentRandomForCI) -try -{ - SegmentTestOptions options; - options.is_common_handle = true; - reloadWithOptions(options); - randomSegmentTest(10000); -} -CATCH TEST_F(SegmentOperationTest, SegmentLogicalSplit) try @@ -414,22 +348,21 @@ try { SegmentTestOptions options; options.db_settings.dt_segment_stable_pack_rows = 100; + options.db_settings.dt_enable_logical_split = true; reloadWithOptions(options); } - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 400, /* at */ 0); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); // non flushed pack before split, should be ref in new splitted segments - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); - FailPointHelper::enableFailPoint(FailPoints::force_segment_logical_split); - auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 10); + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); ASSERT_TRUE(new_seg_id_opt.has_value()); - + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); + ASSERT_EQ(300, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(200, getSegmentRowNumWithoutMVCC(*new_seg_id_opt)); for (size_t test_round = 0; test_round < 20; ++test_round) { @@ -439,13 +372,12 @@ try LOG_FMT_TRACE(&Poco::Logger::root(), "test_round={} seg={} nrows={}", test_round, rand_seg_id, seg_nrows); writeSegment(rand_seg_id, 150); flushSegmentCache(rand_seg_id); - - FailPointHelper::enableFailPoint(FailPoints::try_segment_logical_split); - splitSegment(rand_seg_id); + splitSegment(rand_seg_id, Segment::SplitMode::Auto); } } CATCH + TEST_F(SegmentOperationTest, GCCheckAfterSegmentLogicalSplit) try { @@ -471,8 +403,7 @@ try ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, segment, snap, /* prev_seg */ nullptr, /* next_seg */ nullptr, invalid_data_ratio_threshold, log)); } - FailPointHelper::enableFailPoint(FailPoints::force_segment_logical_split); - auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); ASSERT_TRUE(new_seg_id_opt.has_value()); auto left_segment_id = DELTA_MERGE_FIRST_SEGMENT_ID; auto right_segment_id = new_seg_id_opt.value(); @@ -485,8 +416,7 @@ try ASSERT_FALSE(GC::shouldCompactStableWithTooMuchDataOutOfSegmentRange(*dm_context, right_segment, right_snap, /* prev_seg */ left_segment, /* next_seg */ nullptr, invalid_data_ratio_threshold, log)); } - FailPointHelper::enableFailPoint(FailPoints::force_segment_logical_split); - auto new_seg_id_opt2 = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto new_seg_id_opt2 = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); ASSERT_TRUE(new_seg_id_opt2.has_value()); auto middle_segment_id = new_seg_id_opt2.value(); { @@ -517,6 +447,7 @@ try } CATCH + TEST_F(SegmentOperationTest, Issue5570) try { @@ -527,14 +458,13 @@ try reloadWithOptions(options); } - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); - writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 200); flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); - FailPointHelper::enableFailPoint(FailPoints::force_segment_logical_split); - auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); auto new_seg_id = new_seg_id_opt.value(); LOG_DEBUG(log, "beginSegmentMerge"); @@ -558,24 +488,23 @@ try LOG_DEBUG(log, "finishApplyMerge"); // logical split - FailPointHelper::enableFailPoint(FailPoints::force_segment_logical_split); - auto new_seg_id2_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto new_seg_id2_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); ASSERT_TRUE(new_seg_id2_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id2_opt})); auto new_seg_id2 = new_seg_id2_opt.value(); { // further logical split on the left - FailPointHelper::enableFailPoint(FailPoints::force_segment_logical_split); - auto further_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto further_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); ASSERT_TRUE(further_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *further_seg_id_opt})); } { // further logical split on the right(it fall back to physical split cause by current // implement of getSplitPointFast) - FailPointHelper::enableFailPoint(FailPoints::try_segment_logical_split); - auto further_seg_id_opt = splitSegment(new_seg_id2); - ASSERT_TRUE(further_seg_id_opt.has_value()); + auto further_seg_id_opt = splitSegment(new_seg_id2, Segment::SplitMode::Logical); + ASSERT_FALSE(further_seg_id_opt.has_value()); } } CATCH @@ -588,6 +517,7 @@ try SegmentTestOptions options; // a smaller pack rows for logical split options.db_settings.dt_segment_stable_pack_rows = 100; + options.db_settings.dt_enable_logical_split = true; reloadWithOptions(options); } @@ -596,9 +526,9 @@ try flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); - FailPointHelper::enableFailPoint(FailPoints::force_segment_logical_split); - auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); auto new_seg_id = new_seg_id_opt.value(); std::random_device rd; @@ -631,7 +561,6 @@ try LOG_DEBUG(log, "finishApplyMerge"); // logical split - FailPointHelper::enableFailPoint(FailPoints::try_segment_logical_split); auto new_seg_id2_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); ASSERT_TRUE(new_seg_id2_opt.has_value()); new_seg_id = new_seg_id2_opt.value(); @@ -672,6 +601,573 @@ try CATCH +class SegmentEnableLogicalSplitTest : public SegmentOperationTest +{ +protected: + void SetUp() override + { + SegmentOperationTest::SetUp(); + SegmentTestOptions options; + options.db_settings.dt_segment_stable_pack_rows = 100; + options.db_settings.dt_enable_logical_split = true; + reloadWithOptions(options); + ASSERT_TRUE(dm_context->enable_logical_split); + } +}; + + +TEST_F(SegmentEnableLogicalSplitTest, AutoModeLogicalSplit) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1000); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Auto); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); +} +CATCH + + +TEST_F(SegmentEnableLogicalSplitTest, AutoModePhysicalSplitWhenStableIsEmpty) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1000); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Auto); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_FALSE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); +} +CATCH + + +TEST_F(SegmentEnableLogicalSplitTest, AutoModePhysicalSplitWhenStablePacksAreFew) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 200); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Auto); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_FALSE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); +} +CATCH + + +TEST_F(SegmentEnableLogicalSplitTest, AutoModePhysicalSplitWhenDeltaIsLarger) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1000); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 2000); + // Note: If we don't flush, then there will be logical split because mem table is not counted + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Auto); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_FALSE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); +} +CATCH + + +class SegmentSplitTest : public SegmentTestBasic +{ +}; + + +TEST_F(SegmentSplitTest, AutoModePhycialSplitByDefault) +try +{ + SegmentTestOptions options; + options.db_settings.dt_segment_stable_pack_rows = 100; + reloadWithOptions(options); + ASSERT_FALSE(dm_context->enable_logical_split); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1000); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Auto); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_FALSE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); +} +CATCH + + +TEST_F(SegmentSplitTest, PhysicalSplitMode) +try +{ + SegmentTestOptions options; + options.db_settings.dt_segment_stable_pack_rows = 100; + // Even if we explicitly set enable_logical_split, we will still do physical split in SplitMode::Physical. + options.db_settings.dt_enable_logical_split = true; + reloadWithOptions(options); + ASSERT_TRUE(dm_context->enable_logical_split); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1000); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Physical); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_FALSE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); +} +CATCH + + +TEST_F(SegmentSplitTest, LogicalSplitWithMemTableData) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 5000, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 10); // Write data without flush + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_EQ(segments.size(), 2); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); + ASSERT_EQ(2600, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(2500, getSegmentRowNumWithoutMVCC(*new_seg_id_opt)); + ASSERT_EQ(2500, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(2500, getSegmentRowNum(*new_seg_id_opt)); +} +CATCH + + +TEST_F(SegmentSplitTest, PhysicalSplitWithMemTableData) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 70, /* at */ 300); // Write data without flush + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Physical); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_EQ(segments.size(), 2); + ASSERT_FALSE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); + ASSERT_EQ(50, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(50 + 70, getSegmentRowNum(*new_seg_id_opt)); +} +CATCH + + +TEST_F(SegmentSplitTest, LogicalSplitModeDoesLogicalSplit) +try +{ + SegmentTestOptions options; + options.db_settings.dt_segment_stable_pack_rows = 100; + reloadWithOptions(options); + // Logical split will be performed if we use logical split mode, even when enable_logical_split is false. + ASSERT_FALSE(dm_context->enable_logical_split); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1000); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); +} +CATCH + + +TEST_F(SegmentSplitTest, LogicalSplitModeDoesNotFallbackWhenNoStable) +try +{ + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_FALSE(new_seg_id_opt.has_value()); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 50); + new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_FALSE(new_seg_id_opt.has_value()); + + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_FALSE(new_seg_id_opt.has_value()); + + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); +} +CATCH + + +TEST_F(SegmentSplitTest, LogicalSplitModeOnePackInStable) +try +{ + SegmentTestOptions options; + options.db_settings.dt_segment_stable_pack_rows = 100; + reloadWithOptions(options); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 50); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); + ASSERT_EQ(25, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(25, getSegmentRowNumWithoutMVCC(*new_seg_id_opt)); +} +CATCH + + +TEST_F(SegmentSplitTest, LogicalSplitModeOnePackWithHoleInStable) +try +{ + SegmentTestOptions options; + options.db_settings.dt_segment_stable_pack_rows = 100; + reloadWithOptions(options); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 10, /* at */ 0); + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 10, /* at */ 90); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); + + // Calculated Split Point + // │ + // │ new_seg + // │ ↓ ↓ + // Pack: [0~10 .... (Empty) .... ↓ 90~100] + // ↑ ↑ + // DELTA_MERGE_FIRST_SEGMENT + ASSERT_EQ(10, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(10, getSegmentRowNumWithoutMVCC(*new_seg_id_opt)); + + // Now, let's split them again! We will still get the same split point (which is invalid). + { + auto seg_2 = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_FALSE(seg_2.has_value()); + } + { + auto seg_2 = splitSegment(*new_seg_id_opt, Segment::SplitMode::Logical); + ASSERT_FALSE(seg_2.has_value()); + } +} +CATCH + + +TEST_F(SegmentSplitTest, LogicalSplitModeOneRowInStable) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 1); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id_opt = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID, Segment::SplitMode::Logical); + ASSERT_TRUE(new_seg_id_opt.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id_opt})); + ASSERT_EQ(0, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(1, getSegmentRowNumWithoutMVCC(*new_seg_id_opt)); +} +CATCH + +class SegmentSplitAtTest : public SegmentTestBasic +{ +}; + + +TEST_F(SegmentSplitAtTest, AutoModeDisableLogicalSplit) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + ASSERT_FALSE(dm_context->enable_logical_split); + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 25, Segment::SplitMode::Auto); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_FALSE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(25, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(75, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +TEST_F(SegmentSplitAtTest, AutoModeEnableLogicalSplit) +try +{ + SegmentTestOptions options; + options.db_settings.dt_enable_logical_split = true; + reloadWithOptions(options); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + ASSERT_TRUE(dm_context->enable_logical_split); + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 25, Segment::SplitMode::Auto); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(25, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(75, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +TEST_F(SegmentSplitAtTest, LogicalSplitMode) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + // We will do logical split even if enable_logical_split == false when SplitMode is specified as LogicalSplit. + ASSERT_FALSE(dm_context->enable_logical_split); + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 25, Segment::SplitMode::Logical); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(25, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(75, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +TEST_F(SegmentSplitAtTest, PhysicalSplitMode) +try +{ + SegmentTestOptions options; + options.db_settings.dt_enable_logical_split = true; + reloadWithOptions(options); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + // We will do physical split even if enable_logical_split == true when SplitMode is specified as PhysicalSplit. + ASSERT_TRUE(dm_context->enable_logical_split); + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 25, Segment::SplitMode::Physical); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_FALSE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(25, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(75, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +class SegmentSplitAtModeTest : public SegmentTestBasic + , public testing::WithParamInterface +{ +public: + SegmentSplitAtModeTest() + { + auto is_logical_split = GetParam(); + if (is_logical_split) + split_mode = Segment::SplitMode::Logical; + else + split_mode = Segment::SplitMode::Physical; + } + +protected: + Segment::SplitMode split_mode = Segment::SplitMode::Auto; +}; + +INSTANTIATE_TEST_CASE_P( + IsLogicalSplit, + SegmentSplitAtModeTest, + testing::Bool()); + +TEST_P(SegmentSplitAtModeTest, EmptySegment) +try +{ + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 100, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(0, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(0, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, SplitAtBoundary) +try +{ + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 100, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + { + auto r = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 100, split_mode); + ASSERT_FALSE(r.has_value()); + } + { + auto r = splitSegmentAt(*new_seg_id, 100, split_mode); + ASSERT_FALSE(r.has_value()); + } +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, SplitAtMemTableKey) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 30, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(30, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(70, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, SplitAtDeltaKey) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 30, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(30, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(70, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, SplitAtStableKey) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 30, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(30, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(70, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, SplitAtEmptyKey) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 150, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(100, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(0, getSegmentRowNumWithoutMVCC(*new_seg_id)); +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, StableWithMemTable) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 60, /* at */ -30); + + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 10, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(50, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(110, getSegmentRowNumWithoutMVCC(*new_seg_id)); + ASSERT_EQ(40, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(90, getSegmentRowNum(*new_seg_id)); +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, FlushMemTableAfterSplit) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 60, /* at */ -30); + + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 10, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(50, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(110, getSegmentRowNumWithoutMVCC(*new_seg_id)); + ASSERT_EQ(40, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(90, getSegmentRowNum(*new_seg_id)); + + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(50, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(110, getSegmentRowNumWithoutMVCC(*new_seg_id)); + ASSERT_EQ(40, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(90, getSegmentRowNum(*new_seg_id)); + + // Split again at 50. + { + auto right_id = splitSegmentAt(*new_seg_id, 50, split_mode); + ASSERT_TRUE(right_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *right_id})); + ASSERT_EQ(50, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(40 + 20, getSegmentRowNumWithoutMVCC(*new_seg_id)); + ASSERT_EQ(50, getSegmentRowNumWithoutMVCC(*right_id)); + } +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, EmptySegmentSplitMultipleTimes) +try +{ + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 100, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + for (Int64 split_at = 99; split_at > -10; --split_at) + { + auto right_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, split_at, split_mode); + ASSERT_TRUE(right_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *right_id})); + } +} +CATCH + + +TEST_P(SegmentSplitAtModeTest, MemTableSplitMultipleTimes) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 300, /* at */ 0); + + auto new_seg_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 100, split_mode); + ASSERT_TRUE(new_seg_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *new_seg_id})); + ASSERT_EQ(100, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(200, getSegmentRowNumWithoutMVCC(*new_seg_id)); + ASSERT_EQ(100, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(200, getSegmentRowNum(*new_seg_id)); + + for (Int64 split_at = 99; split_at >= 0; --split_at) + { + auto right_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, split_at, split_mode); + ASSERT_TRUE(right_id.has_value()); + ASSERT_EQ(split_mode == Segment::SplitMode::Logical, areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *right_id})); + ASSERT_EQ(1, getSegmentRowNumWithoutMVCC(*right_id)); + ASSERT_EQ(1, getSegmentRowNum(*right_id)); + } +} +CATCH + + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp index 0eee83b54c7..084a88de0f7 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -25,6 +25,8 @@ #include #include +#include + namespace DB { namespace DM @@ -58,52 +60,33 @@ void SegmentTestBasic::reloadWithOptions(SegmentTestOptions config) segments[DELTA_MERGE_FIRST_SEGMENT_ID] = root_segment; } -PageId SegmentTestBasic::createNewSegmentWithSomeData() -{ - SegmentPtr new_segment; - std::tie(root_segment, new_segment) = root_segment->split(dmContext(), tableColumns()); - - const size_t num_rows_write_per_batch = 100; - { - // write to segment and flush - Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write_per_batch, false); - new_segment->write(dmContext(), std::move(block), true); - } - { - // write to segment and don't flush - Block block = DMTestEnv::prepareSimpleWriteBlock(num_rows_write_per_batch, 2 * num_rows_write_per_batch, false); - new_segment->write(dmContext(), std::move(block), false); - } - return new_segment->segmentId(); -} - size_t SegmentTestBasic::getSegmentRowNumWithoutMVCC(PageId segment_id) { auto segment = segments[segment_id]; - auto in = segment->getInputStreamRaw(dmContext(), *tableColumns()); + auto in = segment->getInputStreamRaw(*dm_context, *tableColumns()); return getInputStreamNRows(in); } size_t SegmentTestBasic::getSegmentRowNum(PageId segment_id) { auto segment = segments[segment_id]; - auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); + auto in = segment->getInputStream(*dm_context, *tableColumns(), {segment->getRowKeyRange()}); return getInputStreamNRows(in); } -std::optional SegmentTestBasic::splitSegment(PageId segment_id, bool check_rows) +std::optional SegmentTestBasic::splitSegment(PageId segment_id, Segment::SplitMode split_mode, bool check_rows) { - LOG_FMT_INFO(logger_op, "splitSegment, segment_id={}", segment_id); + LOG_FMT_INFO(logger_op, "splitSegment, segment_id={} split_mode={}", segment_id, magic_enum::enum_name(split_mode)); auto origin_segment = segments[segment_id]; size_t origin_segment_row_num = getSegmentRowNum(segment_id); - LOG_FMT_DEBUG(logger, "begin split, segment_id={} rows={}", segment_id, origin_segment_row_num); + LOG_FMT_DEBUG(logger, "begin split, segment_id={} split_mode={} rows={}", segment_id, magic_enum::enum_name(split_mode), origin_segment_row_num); - auto [left, right] = origin_segment->split(dmContext(), tableColumns()); + auto [left, right] = origin_segment->split(*dm_context, tableColumns(), /* use a calculated split point */ std::nullopt, split_mode); if (!left && !right) { - LOG_FMT_DEBUG(logger, "split not succeeded, segment_id={} rows={}", segment_id, origin_segment_row_num); + LOG_FMT_DEBUG(logger, "split not succeeded, segment_id={} split_mode={} rows={}", segment_id, magic_enum::enum_name(split_mode), origin_segment_row_num); return std::nullopt; } @@ -119,6 +102,53 @@ std::optional SegmentTestBasic::splitSegment(PageId segment_id, bool che EXPECT_EQ(origin_segment_row_num, left_rows + right_rows); LOG_FMT_DEBUG(logger, "split finish, left_id={} left_rows={} right_id={} right_rows={}", left->segmentId(), left_rows, right->segmentId(), right_rows); + operation_statistics[fmt::format("split{}", magic_enum::enum_name(split_mode))]++; + + return right->segmentId(); +} + +std::optional SegmentTestBasic::splitSegmentAt(PageId segment_id, Int64 split_at, Segment::SplitMode split_mode, bool check_rows) +{ + LOG_FMT_INFO(logger_op, "splitSegmentAt, segment_id={} split_at={} split_mode={}", segment_id, split_at, magic_enum::enum_name(split_mode)); + + RowKeyValue split_at_key; + if (options.is_common_handle) + { + WriteBufferFromOwnString ss; + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(split_at, ss); + split_at_key = RowKeyValue{true, std::make_shared(ss.releaseStr()), split_at}; + } + else + { + split_at_key = RowKeyValue::fromHandle(split_at); + } + + auto origin_segment = segments[segment_id]; + size_t origin_segment_row_num = getSegmentRowNum(segment_id); + + LOG_FMT_DEBUG(logger, "begin splitAt, segment_id={} split_at={} split_at_key={} split_mode={} rows={}", segment_id, split_at, split_at_key.toDebugString(), magic_enum::enum_name(split_mode), origin_segment_row_num); + + auto [left, right] = origin_segment->split(*dm_context, tableColumns(), split_at_key, split_mode); + if (!left && !right) + { + LOG_FMT_DEBUG(logger, "splitAt not succeeded, segment_id={} split_at={} split_mode={} rows={}", segment_id, split_at, magic_enum::enum_name(split_mode), origin_segment_row_num); + return std::nullopt; + } + + RUNTIME_CHECK(left && right); + RUNTIME_CHECK(left->segmentId() == segment_id, segment_id, left->info()); + segments[left->segmentId()] = left; // The left segment is updated + segments[right->segmentId()] = right; + + auto left_rows = getSegmentRowNum(segment_id); + auto right_rows = getSegmentRowNum(right->segmentId()); + + if (check_rows) + EXPECT_EQ(origin_segment_row_num, left_rows + right_rows); + + LOG_FMT_DEBUG(logger, "splitAt finish, left_id={} left_rows={} right_id={} right_rows={}", left->segmentId(), left_rows, right->segmentId(), right_rows); + operation_statistics[fmt::format("splitAt{}", magic_enum::enum_name(split_mode))]++; return right->segmentId(); } @@ -148,7 +178,7 @@ void SegmentTestBasic::mergeSegment(const std::vector & segments_id, boo LOG_FMT_DEBUG(logger, "begin merge, segments=[{}] each_rows=[{}]", fmt::join(segments_id, ","), fmt::join(segments_rows, ",")); - SegmentPtr merged_segment = Segment::merge(dmContext(), tableColumns(), segments_to_merge); + SegmentPtr merged_segment = Segment::merge(*dm_context, tableColumns(), segments_to_merge); if (!merged_segment) { LOG_FMT_DEBUG(logger, "merge not succeeded, segments=[{}] each_rows=[{}]", fmt::join(segments_id, ","), fmt::join(segments_rows, ",")); @@ -163,6 +193,10 @@ void SegmentTestBasic::mergeSegment(const std::vector & segments_id, boo EXPECT_EQ(getSegmentRowNum(merged_segment->segmentId()), merged_rows); LOG_FMT_DEBUG(logger, "merge finish, merged_segment_id={} merge_from_segments=[{}] merged_rows={}", merged_segment->segmentId(), fmt::join(segments_id, ","), merged_rows); + if (segments_id.size() > 2) + operation_statistics["mergeMultiple"]++; + else + operation_statistics["mergeTwo"]++; } void SegmentTestBasic::mergeSegmentDelta(PageId segment_id, bool check_rows) @@ -171,12 +205,13 @@ void SegmentTestBasic::mergeSegmentDelta(PageId segment_id, bool check_rows) auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNum(segment_id); - SegmentPtr merged_segment = segment->mergeDelta(dmContext(), tableColumns()); + SegmentPtr merged_segment = segment->mergeDelta(*dm_context, tableColumns()); segments[merged_segment->segmentId()] = merged_segment; if (check_rows) { EXPECT_EQ(getSegmentRowNum(merged_segment->segmentId()), segment_row_num); } + operation_statistics["mergeDelta"]++; } void SegmentTestBasic::flushSegmentCache(PageId segment_id) @@ -185,12 +220,17 @@ void SegmentTestBasic::flushSegmentCache(PageId segment_id) auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNum(segment_id); - segment->flushCache(dmContext()); + segment->flushCache(*dm_context); EXPECT_EQ(getSegmentRowNum(segment_id), segment_row_num); + operation_statistics["flush"]++; } -std::pair SegmentTestBasic::getSegmentKeyRange(SegmentPtr segment) +std::pair SegmentTestBasic::getSegmentKeyRange(PageId segment_id) { + auto segment_it = segments.find(segment_id); + EXPECT_TRUE(segment_it != segments.end()); + const auto & segment = segment_it->second; + Int64 start_key, end_key; if (!options.is_common_handle) { @@ -223,7 +263,7 @@ std::pair SegmentTestBasic::getSegmentKeyRange(SegmentPtr segment) return {start_key, end_key}; } -void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows) +void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows, std::optional begin_key) { LOG_FMT_INFO(logger_op, "writeSegment, segment_id={} rows={}", segment_id, write_rows); @@ -235,7 +275,7 @@ void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows) auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); - auto [start_key, end_key] = getSegmentKeyRange(segment); + auto [start_key, end_key] = getSegmentKeyRange(segment_id); LOG_FMT_DEBUG(logger, "write to segment, segment={} segment_rows={} start_key={} end_key={}", segment->info(), segment_row_num, start_key, end_key); @@ -249,21 +289,31 @@ void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows) UInt64 remain_row_num = 0; if (segment_max_rows > write_rows) { - // The segment range is large enough, let's randomly pick a start key: - // Suppose we have segment range = [0, 11), which could contain at most 11 rows. - // Now we want to write 10 rows -- The write start key could be randomized in [0, 1]. - start_key = std::uniform_int_distribution{start_key, end_key - static_cast(write_rows)}(random); + if (begin_key.has_value()) + { + RUNTIME_CHECK(begin_key >= start_key, *begin_key, start_key); + RUNTIME_CHECK(begin_key < end_key, *begin_key, end_key); + start_key = *begin_key; + } + else + { + // The segment range is large enough, let's randomly pick a start key: + // Suppose we have segment range = [0, 11), which could contain at most 11 rows. + // Now we want to write 10 rows -- The write start key could be randomized in [0, 1]. + start_key = std::uniform_int_distribution{start_key, end_key - static_cast(write_rows)}(random); + } end_key = start_key + write_rows; } else { remain_row_num = write_rows - segment_max_rows; + RUNTIME_CHECK(!begin_key.has_value()); // Currently we don't support specifying start key when segment is small } { // write to segment and not flush LOG_FMT_DEBUG(logger, "write block to segment, block_range=[{}, {})", start_key, end_key); Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); - segment->write(dmContext(), std::move(block), false); + segment->write(*dm_context, std::move(block), false); version++; } while (remain_row_num > 0) @@ -271,11 +321,12 @@ void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows) UInt64 write_num = std::min(remain_row_num, segment_max_rows); LOG_FMT_DEBUG(logger, "write block to segment, block_range=[{}, {})", start_key, write_num + start_key); Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); - segment->write(dmContext(), std::move(block), false); + segment->write(*dm_context, std::move(block), false); remain_row_num -= write_num; version++; } EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); + operation_statistics["write"]++; } void SegmentTestBasic::ingestDTFileIntoSegment(PageId segment_id, UInt64 write_rows) @@ -318,7 +369,7 @@ void SegmentTestBasic::ingestDTFileIntoSegment(PageId segment_id, UInt64 write_r auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); - auto [start_key, end_key] = getSegmentKeyRange(segment); + auto [start_key, end_key] = getSegmentKeyRange(segment_id); auto segment_max_rows = static_cast(end_key - start_key); if (segment_max_rows == 0) @@ -354,6 +405,7 @@ void SegmentTestBasic::ingestDTFileIntoSegment(PageId segment_id, UInt64 write_r version++; } EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); + operation_statistics["ingest"]++; } void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id, UInt64 write_rows) @@ -362,7 +414,7 @@ void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id, UInt64 wri auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); - auto [start_key, end_key] = getSegmentKeyRange(segment); + auto [start_key, end_key] = getSegmentKeyRange(segment_id); auto segment_max_rows = static_cast(end_key - start_key); if (segment_max_rows == 0) @@ -385,7 +437,7 @@ void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id, UInt64 wri // write to segment and not flush LOG_FMT_DEBUG(logger, "write block to segment, block_range=[{}, {})", start_key, end_key); Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); - segment->write(dmContext(), std::move(block), true); + segment->write(*dm_context, std::move(block), true); version++; } while (remain_row_num > 0) @@ -393,11 +445,12 @@ void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id, UInt64 wri UInt64 write_num = std::min(remain_row_num, segment_max_rows); LOG_FMT_DEBUG(logger, "write block to segment, block_range=[{}, {})", start_key, write_num + start_key); Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); - segment->write(dmContext(), std::move(block), true); + segment->write(*dm_context, std::move(block), true); remain_row_num -= write_num; version++; } EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); + operation_statistics["writeDelete"]++; } void SegmentTestBasic::deleteRangeSegment(PageId segment_id) @@ -405,86 +458,21 @@ void SegmentTestBasic::deleteRangeSegment(PageId segment_id) LOG_FMT_INFO(logger_op, "deleteRangeSegment, segment_id={}", segment_id); auto segment = segments[segment_id]; - segment->write(dmContext(), /*delete_range*/ segment->getRowKeyRange()); + segment->write(*dm_context, /*delete_range*/ segment->getRowKeyRange()); EXPECT_EQ(getSegmentRowNum(segment_id), 0); + operation_statistics["deleteRange"]++; } -void SegmentTestBasic::writeRandomSegment() -{ - if (segments.empty()) - return; - PageId random_segment_id = getRandomSegmentId(); - auto write_rows = std::uniform_int_distribution{20, 100}(random); - LOG_FMT_DEBUG(logger, "start random write, segment_id={} write_rows={} all_segments={}", random_segment_id, write_rows, segments.size()); - writeSegment(random_segment_id, write_rows); -} - -void SegmentTestBasic::writeRandomSegmentWithDeletedPack() +bool SegmentTestBasic::areSegmentsSharingStable(const std::vector & segments_id) { - if (segments.empty()) - return; - PageId random_segment_id = getRandomSegmentId(); - auto write_rows = std::uniform_int_distribution{20, 100}(random); - LOG_FMT_DEBUG(logger, "start random write delete, segment_id={} write_rows={} all_segments={}", random_segment_id, write_rows, segments.size()); - writeSegmentWithDeletedPack(random_segment_id, write_rows); -} - -void SegmentTestBasic::deleteRangeRandomSegment() -{ - if (segments.empty()) - return; - PageId random_segment_id = getRandomSegmentId(); - LOG_FMT_DEBUG(logger, "start random delete range, segment_id={} all_segments={}", random_segment_id, segments.size()); - deleteRangeSegment(random_segment_id); -} - -void SegmentTestBasic::splitRandomSegment() -{ - if (segments.empty()) - return; - PageId random_segment_id = getRandomSegmentId(); - LOG_FMT_DEBUG(logger, "start random split, segment_id={} all_segments={}", random_segment_id, segments.size()); - splitSegment(random_segment_id); -} - -void SegmentTestBasic::mergeRandomSegment() -{ - if (segments.size() < 2) - return; - auto segments_id = getRandomMergeableSegments(); - LOG_FMT_DEBUG(logger, "start random merge, segments_id=[{}] all_segments={}", fmt::join(segments_id, ","), segments.size()); - mergeSegment(segments_id); -} - -void SegmentTestBasic::mergeDeltaRandomSegment() -{ - if (segments.empty()) - return; - PageId random_segment_id = getRandomSegmentId(); - LOG_FMT_DEBUG(logger, "start random merge delta, segment_id={} all_segments={}", random_segment_id, segments.size()); - mergeSegmentDelta(random_segment_id); -} - -void SegmentTestBasic::flushCacheRandomSegment() -{ - if (segments.empty()) - return; - PageId random_segment_id = getRandomSegmentId(); - LOG_FMT_DEBUG(logger, "start random flush cache, segment_id={} all_segments={}", random_segment_id, segments.size()); - flushSegmentCache(random_segment_id); -} - -void SegmentTestBasic::randomSegmentTest(size_t operator_count) -{ - auto probabilities = std::vector{}; - std::transform(segment_operator_entries.begin(), segment_operator_entries.end(), std::back_inserter(probabilities), [](auto v) { return v.first; }); - - auto dist = std::discrete_distribution{probabilities.begin(), probabilities.end()}; - for (size_t i = 0; i < operator_count; i++) + RUNTIME_CHECK(segments_id.size() >= 2); + auto base_stable = segments[segments_id[0]]->getStable()->getDMFilesString(); + for (size_t i = 1; i < segments_id.size(); i++) { - auto op_idx = dist(random); - segment_operator_entries[op_idx].second(); + if (base_stable != segments[segments_id[i]]->getStable()->getDMFilesString()) + return false; } + return true; } PageId SegmentTestBasic::getRandomSegmentId() // Complexity is O(n) @@ -500,44 +488,6 @@ PageId SegmentTestBasic::getRandomSegmentId() // Complexity is O(n) return segment_id; } -std::vector SegmentTestBasic::getRandomMergeableSegments() -{ - RUNTIME_CHECK(segments.size() >= 2, segments.size()); - - // Merge 2~6 segments (at most 1/2 of all segments). - auto max_merge_segments = std::uniform_int_distribution{2, std::clamp(static_cast(segments.size()) / 2, 2, 6)}(random); - - std::vector segments_id; - segments_id.reserve(max_merge_segments); - - while (true) - { - segments_id.clear(); - segments_id.push_back(getRandomSegmentId()); - - for (int i = 1; i < max_merge_segments; i++) - { - auto last_segment_id = segments_id.back(); - RUNTIME_CHECK(segments.find(last_segment_id) != segments.end(), last_segment_id); - auto last_segment = segments[last_segment_id]; - if (last_segment->getRowKeyRange().isEndInfinite()) - break; - - auto next_segment_id = last_segment->nextSegmentId(); - RUNTIME_CHECK(segments.find(next_segment_id) != segments.end(), last_segment->info()); - auto next_segment = segments[next_segment_id]; - RUNTIME_CHECK(next_segment->segmentId() == next_segment_id, next_segment->info(), next_segment_id); - RUNTIME_CHECK(compare(last_segment->getRowKeyRange().getEnd(), next_segment->getRowKeyRange().getStart()) == 0, last_segment->info(), next_segment->info()); - segments_id.push_back(next_segment_id); - } - - if (segments_id.size() >= 2) - break; - } - - return segments_id; -} - SegmentPtr SegmentTestBasic::reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns, DB::Settings && db_settings) { TiFlashStorageTestBasic::reload(std::move(db_settings)); @@ -550,10 +500,8 @@ SegmentPtr SegmentTestBasic::reload(bool is_common_handle, const ColumnDefinesPt return Segment::newSegment(*dm_context, table_columns, RowKeyRange::newAll(is_common_handle, 1), storage_pool->newMetaPageId(), 0); } -void SegmentTestBasic::setColumns(const ColumnDefinesPtr & columns) +void SegmentTestBasic::reloadDMContext() { - *table_columns = *columns; - dm_context = std::make_unique(*db_context, *storage_path_pool, *storage_pool, @@ -563,6 +511,24 @@ void SegmentTestBasic::setColumns(const ColumnDefinesPtr & columns) 1, db_context->getSettingsRef()); } + +void SegmentTestBasic::setColumns(const ColumnDefinesPtr & columns) +{ + *table_columns = *columns; + reloadDMContext(); +} + +void SegmentTestBasic::printFinishedOperations() +{ + LOG_FMT_INFO(logger, "======= Begin Finished Operations Statistics ======="); + LOG_FMT_INFO(logger, "Operation Kinds: {}", operation_statistics.size()); + for (auto [name, n] : operation_statistics) + { + LOG_FMT_INFO(logger, "{}: {}", name, n); + } + LOG_FMT_INFO(logger, "======= End Finished Operations Statistics ======="); +} + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h index 5a07066cea9..0f79e1e6985 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h @@ -39,36 +39,47 @@ class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic DB::Settings db_settings; }; + void SetUp() override + { + TiFlashStorageTestBasic::SetUp(); + reloadWithOptions({}); + } + public: void reloadWithOptions(SegmentTestOptions config); - // When `check_rows` is true, it will compare the rows num before and after the segment update. - // So if there is some write during the segment update, it will report false failure if `check_rows` is true. - std::optional splitSegment(PageId segment_id, bool check_rows = true); + /** + * When `check_rows` is true, it will compare the rows num before and after the segment update. + * So if there is some write during the segment update, it will report false failure if `check_rows` is true. + */ + std::optional splitSegment(PageId segment_id, Segment::SplitMode split_mode = Segment::SplitMode::Auto, bool check_rows = true); + std::optional splitSegmentAt(PageId segment_id, Int64 split_at, Segment::SplitMode split_mode = Segment::SplitMode::Auto, bool check_rows = true); void mergeSegment(const std::vector & segments, bool check_rows = true); void mergeSegmentDelta(PageId segment_id, bool check_rows = true); - void flushSegmentCache(PageId segment_id); - void writeSegment(PageId segment_id, UInt64 write_rows = 100); + + /** + * When begin_key is specified, new rows will be written from specified key. Otherwise, new rows may be + * written randomly in the segment range. + */ + void writeSegment(PageId segment_id, UInt64 write_rows = 100, std::optional begin_key = std::nullopt); void ingestDTFileIntoSegment(PageId segment_id, UInt64 write_rows = 100); void writeSegmentWithDeletedPack(PageId segment_id, UInt64 write_rows = 100); void deleteRangeSegment(PageId segment_id); + size_t getSegmentRowNumWithoutMVCC(PageId segment_id); + size_t getSegmentRowNum(PageId segment_id); - void writeRandomSegment(); - void writeRandomSegmentWithDeletedPack(); - void deleteRangeRandomSegment(); - void splitRandomSegment(); - void mergeRandomSegment(); - void mergeDeltaRandomSegment(); - void flushCacheRandomSegment(); + PageId getRandomSegmentId(); - void randomSegmentTest(size_t operator_count); + /** + * You must pass at least 2 segments. Checks whether all segments passed in are sharing the same stable. + */ + bool areSegmentsSharingStable(const std::vector & segments_id); - PageId createNewSegmentWithSomeData(); - size_t getSegmentRowNumWithoutMVCC(PageId segment_id); - size_t getSegmentRowNum(PageId segment_id); - std::pair getSegmentKeyRange(SegmentPtr segment); + std::pair getSegmentKeyRange(PageId segment_id); + + void printFinishedOperations(); protected: std::mt19937 random; @@ -76,32 +87,8 @@ class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic // std::map segments; - const std::vector>> segment_operator_entries = { - {1.0, [this] { - writeRandomSegment(); - }}, - {0.25, [this] { - deleteRangeRandomSegment(); - }}, - {1.0, [this] { - splitRandomSegment(); - }}, - {0.5, [this] { - mergeRandomSegment(); - }}, - {1.0, [this] { - mergeDeltaRandomSegment(); - }}, - {1.0, [this] { - flushCacheRandomSegment(); - }}, - {0.25, [this] { - writeRandomSegmentWithDeletedPack(); - }}}; - - PageId getRandomSegmentId(); - - std::vector getRandomMergeableSegments(); + // + std::map operation_statistics; SegmentPtr reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns, DB::Settings && db_settings); @@ -110,7 +97,11 @@ class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic const ColumnDefinesPtr & tableColumns() const { return table_columns; } - DMContext & dmContext() { return *dm_context; } + /** + * Reload a new DMContext according to latest storage status. + * For example, if you have changed the settings, you should grab a new DMContext. + */ + void reloadDMContext(); protected: /// all these var lives as ref in dm_context diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_randomized.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_randomized.cpp new file mode 100644 index 00000000000..4b4cf19bf82 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_randomized.cpp @@ -0,0 +1,261 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +namespace DB +{ +namespace DM +{ +namespace tests +{ + +class SegmentRandomizedTest : public SegmentTestBasic +{ +public: + void run(size_t action_n, Int64 rand_min, Int64 rand_max) + { + // Hack: Before doing any operations, let's limit the segment to a smaller range, to make operations related with data more effective. + { + RUNTIME_CHECK(rand_min < rand_max, rand_min, rand_max); + auto id_1 = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, rand_min, Segment::SplitMode::Physical); + RUNTIME_CHECK(id_1.has_value()); + auto id_2 = splitSegmentAt(*id_1, rand_max, Segment::SplitMode::Physical); + RUNTIME_CHECK(id_2.has_value()); + + outbound_left_seg = DELTA_MERGE_FIRST_SEGMENT_ID; + outbound_right_seg = *id_2; + segments.erase(outbound_left_seg); + segments.erase(outbound_right_seg); + + const auto [seg_min, seg_max] = getSegmentKeyRange(*id_1); + RUNTIME_CHECK(seg_min == rand_min); + RUNTIME_CHECK(seg_max == rand_max); + } + + // Workload body: Iterate n times for all possible actions. + { + auto probabilities = std::vector{}; + std::transform(actions.begin(), actions.end(), std::back_inserter(probabilities), [](auto v) { return v.first; }); + + auto dist = std::discrete_distribution{probabilities.begin(), probabilities.end()}; + for (size_t i = 0; i < action_n; i++) + { + auto op_idx = dist(random); + actions[op_idx].second(this); + } + } + + // TODO (wenxuan): Add some post checks, like, whether PageStorage has leaks. + + printFinishedOperations(); + } + +protected: + const std::vector>> actions = { + {1.0, &SegmentRandomizedTest::writeRandomSegment}, + {0.1, &SegmentRandomizedTest::deleteRangeRandomSegment}, + {1.0, &SegmentRandomizedTest::splitRandomSegment}, + {1.0, &SegmentRandomizedTest::splitAtRandomSegment}, + {0.25, &SegmentRandomizedTest::mergeRandomSegment}, + {1.0, &SegmentRandomizedTest::mergeDeltaRandomSegment}, + {1.0, &SegmentRandomizedTest::flushCacheRandomSegment}, + {0.25, &SegmentRandomizedTest::writeRandomSegmentWithDeletedPack}}; + + /** + * (-∞, rand_min). Hack: This segment is intentionally removed from the "segments" map to avoid being picked up. + */ + PageId outbound_left_seg{}; + + /** + * [rand_max, +∞). Hack: This segment is intentionally removed from the "segments" map to avoid being picked up. + */ + PageId outbound_right_seg{}; + + void writeRandomSegment() + { + if (segments.empty()) + return; + auto segment_id = getRandomSegmentId(); + auto write_rows = std::uniform_int_distribution{20, 100}(random); + LOG_FMT_DEBUG(logger, "start random write, segment_id={} write_rows={} all_segments={}", segment_id, write_rows, segments.size()); + writeSegment(segment_id, write_rows); + } + + void writeRandomSegmentWithDeletedPack() + { + if (segments.empty()) + return; + auto segment_id = getRandomSegmentId(); + auto write_rows = std::uniform_int_distribution{20, 100}(random); + LOG_FMT_DEBUG(logger, "start random write delete, segment_id={} write_rows={} all_segments={}", segment_id, write_rows, segments.size()); + writeSegmentWithDeletedPack(segment_id, write_rows); + } + + void deleteRangeRandomSegment() + { + if (segments.empty()) + return; + auto segment_id = getRandomSegmentId(); + LOG_FMT_DEBUG(logger, "start random delete range, segment_id={} all_segments={}", segment_id, segments.size()); + deleteRangeSegment(segment_id); + } + + void splitRandomSegment() + { + if (segments.empty()) + return; + auto segment_id = getRandomSegmentId(); + auto split_mode = getRandomSplitMode(); + LOG_FMT_DEBUG(logger, "start random split, segment_id={} mode={} all_segments={}", segment_id, magic_enum::enum_name(split_mode), segments.size()); + splitSegment(segment_id, split_mode); + } + + void splitAtRandomSegment() + { + if (segments.empty()) + return; + auto segment_id = getRandomSegmentId(); + auto split_mode = getRandomSplitMode(); + const auto [start, end] = getSegmentKeyRange(segment_id); + if (end - start <= 1) + return; + auto split_at = std::uniform_int_distribution{start, end - 1}(random); + LOG_FMT_DEBUG(logger, "start random split at, segment_id={} split_at={} mode={} all_segments={}", segment_id, split_at, magic_enum::enum_name(split_mode), segments.size()); + splitSegmentAt(segment_id, split_at, split_mode); + } + + void mergeRandomSegment() + { + if (segments.size() < 2) + return; + auto segments_id = getRandomMergeableSegments(); + LOG_FMT_DEBUG(logger, "start random merge, segments_id=[{}] all_segments={}", fmt::join(segments_id, ","), segments.size()); + mergeSegment(segments_id); + } + + void mergeDeltaRandomSegment() + { + if (segments.empty()) + return; + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_DEBUG(logger, "start random merge delta, segment_id={} all_segments={}", random_segment_id, segments.size()); + mergeSegmentDelta(random_segment_id); + } + + void flushCacheRandomSegment() + { + if (segments.empty()) + return; + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_DEBUG(logger, "start random flush cache, segment_id={} all_segments={}", random_segment_id, segments.size()); + flushSegmentCache(random_segment_id); + } + + Segment::SplitMode getRandomSplitMode() + { + int mode = std::uniform_int_distribution{1, 2}(random); + switch (mode) + { + case 1: + return Segment::SplitMode::Physical; + case 2: + return Segment::SplitMode::Logical; + default: + throw DB::Exception("Unexpected mode"); + } + } + + std::vector getRandomMergeableSegments() + { + RUNTIME_CHECK(segments.size() >= 2, segments.size()); + + // Merge 2~6 segments (at most 1/2 of all segments). + auto max_merge_segments = std::uniform_int_distribution{2, std::clamp(static_cast(segments.size()) / 2, 2, 6)}(random); + + std::vector segments_id; + segments_id.reserve(max_merge_segments); + + while (true) + { + segments_id.clear(); + segments_id.push_back(getRandomSegmentId()); + + for (int i = 1; i < max_merge_segments; i++) + { + auto last_segment_id = segments_id.back(); + RUNTIME_CHECK(segments.find(last_segment_id) != segments.end(), last_segment_id); + auto last_segment = segments[last_segment_id]; + if (last_segment->getRowKeyRange().isEndInfinite()) + break; + if (last_segment->nextSegmentId() == outbound_right_seg) + break; + + auto next_segment_id = last_segment->nextSegmentId(); + RUNTIME_CHECK(segments.find(next_segment_id) != segments.end(), last_segment->info()); + auto next_segment = segments[next_segment_id]; + RUNTIME_CHECK(next_segment->segmentId() == next_segment_id, next_segment->info(), next_segment_id); + RUNTIME_CHECK(compare(last_segment->getRowKeyRange().getEnd(), next_segment->getRowKeyRange().getStart()) == 0, last_segment->info(), next_segment->info()); + segments_id.push_back(next_segment_id); + } + + if (segments_id.size() >= 2) + break; + } + + return segments_id; + } +}; + + +TEST_F(SegmentRandomizedTest, FastCommonHandle) +try +{ + SegmentTestOptions options; + options.is_common_handle = true; + reloadWithOptions(options); + run(/* n */ 500, /* min key */ -50000, /* max key */ 50000); +} +CATCH + + +TEST_F(SegmentRandomizedTest, FastIntHandle) +try +{ + SegmentTestOptions options; + options.is_common_handle = false; + reloadWithOptions(options); + run(/* n */ 500, /* min key */ -50000, /* max key */ 50000); +} +CATCH + + +// TODO: Run it in CI as a long-running test. +TEST_F(SegmentRandomizedTest, DISABLED_ForCI) +try +{ + SegmentTestOptions options; + options.is_common_handle = true; + reloadWithOptions(options); + run(50000, /* min key */ -5000000, /* max key */ 5000000); +} +CATCH + + +} // namespace tests +} // namespace DM +} // namespace DB From 478f3050cf03151512985fb53343e1c47219913b Mon Sep 17 00:00:00 2001 From: jinhelin Date: Thu, 15 Sep 2022 22:44:59 +0800 Subject: [PATCH 12/17] fix unittests tsan warnings. (#5882) close pingcap/tiflash#5881 --- dbms/src/Server/Server.cpp | 4 ++++ dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.cpp | 6 ++++++ dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.h | 4 ++++ dbms/src/TestUtils/gtests_dbms_main.cpp | 4 ++++ 4 files changed, 18 insertions(+) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 30598a37b8f..34f63b32579 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -1223,6 +1223,10 @@ int Server::main(const std::vector & /*args*/) * table engines could use Context on destroy. */ LOG_FMT_INFO(log, "Shutting down storages."); + // `SegmentReader` threads may hold a segment and its delta-index for read. + // `Context::shutdown()` will destroy `DeltaIndexManager`. + // So, stop threads explicitly before `TiFlashTestEnv::shutdown()`. + DB::DM::SegmentReaderPoolManager::instance().stop(); global_context->shutdown(); LOG_FMT_DEBUG(log, "Shutted down storages."); }); diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.cpp b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.cpp index 216527e506f..25c18b3000c 100644 --- a/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.cpp +++ b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.cpp @@ -237,4 +237,10 @@ bool SegmentReaderPoolManager::isSegmentReader() const { return reader_ids.find(std::this_thread::get_id()) != reader_ids.end(); } + +void SegmentReaderPoolManager::stop() +{ + reader_pools.clear(); + reader_ids.clear(); +} } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.h b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.h index fc2a26b254f..65d24865d3d 100644 --- a/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.h +++ b/dbms/src/Storages/DeltaMerge/ReadThread/SegmentReader.h @@ -66,6 +66,10 @@ class SegmentReaderPoolManager void addTask(MergedTaskPtr && task); bool isSegmentReader() const; + // Explicitly, release reader_pools and reader_ids. + // Threads need to be stop before Context::shutdown(). + void stop(); + private: SegmentReaderPoolManager(); std::vector> reader_pools; diff --git a/dbms/src/TestUtils/gtests_dbms_main.cpp b/dbms/src/TestUtils/gtests_dbms_main.cpp index eab4340def8..3bcf1659eb2 100644 --- a/dbms/src/TestUtils/gtests_dbms_main.cpp +++ b/dbms/src/TestUtils/gtests_dbms_main.cpp @@ -77,6 +77,10 @@ int main(int argc, char ** argv) auto ret = RUN_ALL_TESTS(); + // `SegmentReader` threads may hold a segment and its delta-index for read. + // `TiFlashTestEnv::shutdown()` will destroy `DeltaIndexManager`. + // Stop threads explicitly before `TiFlashTestEnv::shutdown()`. + DB::DM::SegmentReaderPoolManager::instance().stop(); DB::tests::TiFlashTestEnv::shutdown(); return ret; From 8a5bdafd54812054f0727cc4dff613fae4b7a914 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Fri, 16 Sep 2022 00:18:59 +0800 Subject: [PATCH 13/17] replace some `enumToString` by `magic_enum::enum_name` (#5901) ref pingcap/tiflash#5758 --- dbms/src/Flash/Mpp/MPPTask.cpp | 22 +++----------- dbms/src/Flash/Mpp/MPPTask.h | 1 - dbms/src/Flash/Mpp/MPPTaskManager.cpp | 3 +- dbms/src/Flash/Mpp/MPPTaskStatistics.cpp | 4 ++- dbms/src/Flash/Mpp/TaskStatus.cpp | 38 ------------------------ dbms/src/Flash/Mpp/TaskStatus.h | 2 -- dbms/src/Flash/Planner/PlanType.cpp | 27 ++--------------- 7 files changed, 12 insertions(+), 85 deletions(-) delete mode 100644 dbms/src/Flash/Mpp/TaskStatus.cpp diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index 121352595aa..f859c7eba1f 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -38,6 +38,7 @@ #include #include +#include #include namespace DB @@ -54,21 +55,6 @@ extern const char force_no_local_region_for_mpp_task[]; extern const char random_task_lifecycle_failpoint[]; } // namespace FailPoints -String abortTypeToString(AbortType abort_type) -{ - String ret; - switch (abort_type) - { - case AbortType::ONCANCELLATION: - ret = "ONCANCELLATION"; - break; - case AbortType::ONERROR: - ret = "ONERROR"; - break; - } - return ret; -} - MPPTask::MPPTask(const mpp::TaskMeta & meta_, const ContextPtr & context_) : context(context_) , meta(meta_) @@ -438,7 +424,7 @@ void MPPTask::runImpl() if (switchStatus(RUNNING, FINISHED)) LOG_INFO(log, "finish task"); else - LOG_FMT_WARNING(log, "finish task which is in {} state", taskStatusToString(status)); + LOG_FMT_WARNING(log, "finish task which is in {} state", magic_enum::enum_name(status.load())); if (status == FINISHED) { // todo when error happens, should try to update the metrics if it is available @@ -502,7 +488,7 @@ void MPPTask::handleError(const String & error_msg) void MPPTask::abort(const String & message, AbortType abort_type) { - String abort_type_string = abortTypeToString(abort_type); + auto abort_type_string = magic_enum::enum_name(abort_type); TaskStatus next_task_status; switch (abort_type) { @@ -519,7 +505,7 @@ void MPPTask::abort(const String & message, AbortType abort_type) auto previous_status = status.load(); if (previous_status == FINISHED || previous_status == CANCELLED || previous_status == FAILED) { - LOG_FMT_WARNING(log, "task already in {} state", taskStatusToString(previous_status)); + LOG_FMT_WARNING(log, "task already in {} state", magic_enum::enum_name(previous_status)); return; } else if (previous_status == INITIALIZING && switchStatus(INITIALIZING, next_task_status)) diff --git a/dbms/src/Flash/Mpp/MPPTask.h b/dbms/src/Flash/Mpp/MPPTask.h index 524d09a42aa..863e815e02b 100644 --- a/dbms/src/Flash/Mpp/MPPTask.h +++ b/dbms/src/Flash/Mpp/MPPTask.h @@ -45,7 +45,6 @@ enum class AbortType ONCANCELLATION, ONERROR, }; -String abortTypeToString(AbortType abort_type); class MPPTask : public std::enable_shared_from_this , private boost::noncopyable diff --git a/dbms/src/Flash/Mpp/MPPTaskManager.cpp b/dbms/src/Flash/Mpp/MPPTaskManager.cpp index 808255a506d..6d5c51a198b 100644 --- a/dbms/src/Flash/Mpp/MPPTaskManager.cpp +++ b/dbms/src/Flash/Mpp/MPPTaskManager.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -92,7 +93,7 @@ class MPPTaskCancelHelper void MPPTaskManager::abortMPPQuery(UInt64 query_id, const String & reason, AbortType abort_type) { - LOG_WARNING(log, fmt::format("Begin to abort query: {}, abort type: {}, reason: {}", query_id, abortTypeToString(abort_type), reason)); + LOG_WARNING(log, fmt::format("Begin to abort query: {}, abort type: {}, reason: {}", query_id, magic_enum::enum_name(abort_type), reason)); MPPQueryTaskSetPtr task_set; { /// abort task may take a long time, so first diff --git a/dbms/src/Flash/Mpp/MPPTaskStatistics.cpp b/dbms/src/Flash/Mpp/MPPTaskStatistics.cpp index 8191099a5dc..28cb7428eb4 100644 --- a/dbms/src/Flash/Mpp/MPPTaskStatistics.cpp +++ b/dbms/src/Flash/Mpp/MPPTaskStatistics.cpp @@ -21,6 +21,8 @@ #include #include +#include + namespace DB { MPPTaskStatistics::MPPTaskStatistics(const MPPTaskId & id_, String address_) @@ -121,7 +123,7 @@ void MPPTaskStatistics::logTracingJson() local_input_bytes, remote_input_bytes, output_bytes, - taskStatusToString(status), + magic_enum::enum_name(status), error_message, working_time, memory_peak); diff --git a/dbms/src/Flash/Mpp/TaskStatus.cpp b/dbms/src/Flash/Mpp/TaskStatus.cpp deleted file mode 100644 index c87ae2b8eb4..00000000000 --- a/dbms/src/Flash/Mpp/TaskStatus.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -namespace DB -{ -StringRef taskStatusToString(const TaskStatus & status) -{ - switch (status) - { - case INITIALIZING: - return "INITIALIZING"; - case RUNNING: - return "RUNNING"; - case FINISHED: - return "FINISHED"; - case CANCELLED: - return "CANCELLED"; - case FAILED: - return "FAILED"; - default: - throw Exception("Unknown TaskStatus"); - } -} -} // namespace DB diff --git a/dbms/src/Flash/Mpp/TaskStatus.h b/dbms/src/Flash/Mpp/TaskStatus.h index 0997c8adc52..b745cfd7ec8 100644 --- a/dbms/src/Flash/Mpp/TaskStatus.h +++ b/dbms/src/Flash/Mpp/TaskStatus.h @@ -26,6 +26,4 @@ enum TaskStatus CANCELLED, FAILED, }; - -StringRef taskStatusToString(const TaskStatus & status); } // namespace DB diff --git a/dbms/src/Flash/Planner/PlanType.cpp b/dbms/src/Flash/Planner/PlanType.cpp index 867b4845e9b..89609acd377 100644 --- a/dbms/src/Flash/Planner/PlanType.cpp +++ b/dbms/src/Flash/Planner/PlanType.cpp @@ -12,35 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include +#include + namespace DB { String PlanType::toString() const { - switch (enum_value) - { -#define M(t) \ - case t: \ - return #t; - M(Limit) - M(TopN) - M(Filter) - M(Aggregation) - M(ExchangeSender) - M(MockExchangeSender) - M(ExchangeReceiver) - M(MockExchangeReceiver) - M(Projection) - M(Window) - M(WindowSort) - M(TableScan) - M(MockTableScan) - M(Join) -#undef M - default: - throw TiFlashException("Unknown PlanType", Errors::Planner::Internal); - } + return String(magic_enum::enum_name(enum_value)); } } // namespace DB From 1e43f9fd9381283b138c504c14e216665876911b Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 16 Sep 2022 09:08:59 +0800 Subject: [PATCH 14/17] update proxy of master to raftstore-proxy-6.2 by proxy pr 171 (#5903) ref pingcap/tiflash#4982 --- contrib/tiflash-proxy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index 1ffec9a6b5c..21fa71692fc 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit 1ffec9a6b5ce7f081151f4462ca6a6d618dde9eb +Subproject commit 21fa71692fcbc2f99acdaa9703d9a899bdea1080 From 720bfc1787117c6ab29ed6ed822eb1923499d8f6 Mon Sep 17 00:00:00 2001 From: Meng Xin Date: Fri, 16 Sep 2022 13:34:59 +0800 Subject: [PATCH 15/17] fix that the result of expression cast(Real/Decimal)AsTime is inconsistent with TiDB (#5799) close pingcap/tiflash#3779 --- dbms/src/Common/MyTime.cpp | 171 ++++++++++++++++-- dbms/src/Common/MyTime.h | 17 +- dbms/src/Common/tests/gtest_mytime.cpp | 6 +- dbms/src/Functions/FunctionsDateTime.h | 6 +- dbms/src/Functions/FunctionsTiDBConversion.h | 155 ++-------------- .../Functions/tests/gtest_tidb_conversion.cpp | 117 +++++++++--- tests/fullstack-test/expr/cast_as_time.test | 50 +++++ 7 files changed, 337 insertions(+), 185 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index c527d5ff8df..4ddd313edda 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -553,6 +553,11 @@ const String & MyTimeBase::monthName() const } bool checkTimeValid(Int32 year, Int32 month, Int32 day, Int32 hour, Int32 minute, Int32 second) +{ + return month != 0 && day != 0 && checkTimeValidAllowMonthAndDayZero(year, month, day, hour, minute, second); +} + +bool checkTimeValidAllowMonthAndDayZero(Int32 year, Int32 month, Int32 day, Int32 hour, Int32 minute, Int32 second) { if (year > 9999 || month < 0 || month > 12 || day < 0 || day > 31 || hour > 23 || minute > 59 || second > 59) { @@ -561,7 +566,119 @@ bool checkTimeValid(Int32 year, Int32 month, Int32 day, Int32 hour, Int32 minute return day <= getLastDay(year, month); } -std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t fsp, bool need_check_time_valid, bool ignore_zero_date) +bool noNeedCheckTime(Int32, Int32, Int32, Int32, Int32, Int32) +{ + return true; +} + +// Return true if the time is invalid. +inline bool getDatetime(const Int64 & num, MyDateTime & result) +{ + UInt64 ymd = num / 1000000; + UInt64 hms = num - ymd * 1000000; + + UInt64 year = ymd / 10000; + ymd %= 10000; + UInt64 month = ymd / 100; + UInt64 day = ymd % 100; + + UInt64 hour = hms / 10000; + hms %= 10000; + UInt64 minute = hms / 100; + UInt64 second = hms % 100; + + if (toCoreTimeChecked(year, month, day, hour, minute, second, 0, result)) + { + return true; + } + return !result.isValid(true, false); +} + +// Convert a integer number to DateTime and return true if the result is NULL. +// If number is invalid(according to SQL_MODE), return NULL and handle the error with DAGContext. +// This function may throw exception. +inline bool numberToDateTime(Int64 number, MyDateTime & result, bool allowZeroDate) +{ + MyDateTime datetime(0); + if (number == 0) + { + if (allowZeroDate) + { + result = datetime; + return false; + } + return true; + } + + // datetime type + if (number >= 10000101000000) + { + return getDatetime(number, result); + } + + // check MMDD + if (number < 101) + { + return true; + } + + // check YYMMDD: 2000-2069 + if (number <= 69 * 10000 + 1231) + { + number = (number + 20000000) * 1000000; + return getDatetime(number, result); + } + + if (number < 70 * 10000 + 101) + { + return true; + } + + // check YYMMDD + if (number <= 991231) + { + number = (number + 19000000) * 1000000; + return getDatetime(number, result); + } + + // check hour/min/second + if (number <= 99991231) + { + number *= 1000000; + return getDatetime(number, result); + } + + // check MMDDHHMMSS + if (number < 101000000) + { + return true; + } + + // check YYMMDDhhmmss: 2000-2069 + if (number <= 69 * 10000000000 + 1231235959) + { + number += 20000000000000; + return getDatetime(number, result); + } + + // check YYYYMMDDhhmmss + if (number < 70 * 10000000000 + 101000000) + { + return true; + } + + // check YYMMDDHHMMSS + if (number <= 991231235959) + { + number += 19000000000000; + return getDatetime(number, result); + } + + return getDatetime(number, result); +} + +// isFloat is true means that the input string is float format like "1212.111" +std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t fsp, CheckTimeFunc checkTimeFunc, bool isFloat) { Int32 year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0, delta_hour = 0, delta_minute = 0; @@ -582,7 +699,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t return seps.size() > 5 || (seps.size() == 1 && seps[0].size() > 4); }; - if (!frac_str.empty()) + if (!frac_str.empty() && !isFloat) { if (!no_absorb(seps)) { @@ -616,6 +733,24 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t case 1: { size_t l = seps[0].size(); + if (isFloat) + { + MyDateTime date_time(0); + if (seps[0] == "0") + { + return {date_time.toPackedUInt(), is_date}; + } + if (numberToDateTime(std::stoll(seps[0]), date_time)) + { + return {Field(), is_date}; + } + std::tie(year, month, day, hour, minute, second) = std::tuple(date_time.year, date_time.month, date_time.day, date_time.hour, date_time.minute, date_time.second); + if (l >= 9 && l <= 14) + { + hhmmss = true; + } + break; + } switch (l) { case 14: // YYYYMMDDHHMMSS @@ -772,7 +907,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t // If str is sepereated by delimiters, the first one is year, and if the year is 2 digit, // we should adjust it. // TODO: adjust year is very complex, now we only consider the simplest way. - if (seps[0].size() == 2) + if (seps[0].size() <= 2 && !isFloat) { if (year == 0 && month == 0 && day == 0 && hour == 0 && minute == 0 && second == 0 && frac_str.empty()) { @@ -820,7 +955,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } } - if ((need_check_time_valid && !checkTimeValid(year, month, day, hour, minute, second)) || (!ignore_zero_date && (month == 0 || day == 0))) + if (!checkTimeFunc(year, month, day, hour, minute, second)) { return {Field(), is_date}; } @@ -863,9 +998,14 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } // TODO: support parse time from float string -Field parseMyDateTime(const String & str, int8_t fsp, bool need_check_time_valid, bool ignore_zero_date) +Field parseMyDateTime(const String & str, int8_t fsp, CheckTimeFunc checkTimeFunc) +{ + return parseMyDateTimeAndJudgeIsDate(str, fsp, checkTimeFunc).first; +} + +Field parseMyDateTimeFromFloat(const String & str, int8_t fsp, CheckTimeFunc checkTimeFunc) { - return parseMyDateTimeAndJudgeIsDate(str, fsp, need_check_time_valid, ignore_zero_date).first; + return parseMyDateTimeAndJudgeIsDate(str, fsp, checkTimeFunc, true).first; } String MyDateTime::toString(int fsp) const @@ -1097,7 +1237,7 @@ bool MyTimeBase::isValid(bool allow_zero_in_date, bool allow_invalid_date) const } } - if (year >= 9999 || month > 12) + if (year > 9999 || month > 12) { return false; } @@ -1105,18 +1245,21 @@ bool MyTimeBase::isValid(bool allow_zero_in_date, bool allow_invalid_date) const UInt8 max_day = 31; if (!allow_invalid_date) { - if (month < 1) - { - return false; - } constexpr static UInt8 max_days_in_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; static auto is_leap_year = [](UInt16 _year) { return ((_year % 4 == 0) && (_year % 100 != 0)) || (_year % 400 == 0); }; - max_day = max_days_in_month[month - 1]; - if (month == 2 && is_leap_year(year)) + if (allow_zero_in_date && month == 0) { - max_day = 29; + max_day = 31; + } + else + { + max_day = max_days_in_month[month - 1]; // NOLINT + if (month == 2 && is_leap_year(year)) + { + max_day = 29; + } } } if (day > max_day) diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index 58f4a197e2c..cdfad663540 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -150,6 +150,8 @@ struct MyDate : public MyTimeBase } }; +bool numberToDateTime(Int64 number, MyDateTime & result, bool allowZeroDate = true); + struct MyDateTimeFormatter { std::vector> formatters; @@ -181,12 +183,19 @@ struct MyDateTimeParser std::vector parsers; }; +bool checkTimeValid(Int32 year, Int32 month, Int32 day, Int32 hour, Int32 minute, Int32 second); +bool checkTimeValidAllowMonthAndDayZero(Int32 year, Int32 month, Int32 day, Int32 hour, Int32 minute, Int32 second); +bool noNeedCheckTime(Int32, Int32, Int32, Int32, Int32, Int32); + +using CheckTimeFunc = std::function; + static const int8_t DefaultFsp = 6; -static const bool DefaultNeedCheckTimeValid = false; -static const bool DefaultIgnoreZeroDate = true; +static bool DefaultIsFloat = false; +static CheckTimeFunc DefaultCheckTimeFunc = noNeedCheckTime; -Field parseMyDateTime(const String & str, int8_t fsp = DefaultFsp, bool need_check_time_valid = DefaultNeedCheckTimeValid, bool ignore_zero_date = DefaultIgnoreZeroDate); -std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t fsp = DefaultFsp, bool need_check_time_valid = DefaultNeedCheckTimeValid, bool ignore_zero_date = DefaultIgnoreZeroDate); +Field parseMyDateTime(const String & str, int8_t fsp = DefaultFsp, CheckTimeFunc checkTimeFunc = DefaultCheckTimeFunc); +Field parseMyDateTimeFromFloat(const String & str, int8_t fsp = DefaultFsp, CheckTimeFunc checkTimeFunc = DefaultCheckTimeFunc); +std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t fsp = DefaultFsp, CheckTimeFunc checkTimeFunc = DefaultCheckTimeFunc, bool isFloat = DefaultIsFloat); void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & time_zone_from, const DateLUTImpl & time_zone_to, bool throw_exception = false); diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index 6bb574ea817..10f356711c7 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -71,19 +71,19 @@ class TestMyTime : public testing::Test } } - static void checkNumberToMyDateTime(const Int64 & input, const MyDateTime & expected, bool expect_error, DAGContext * ctx) + static void checkNumberToMyDateTime(const Int64 & input, const MyDateTime & expected, bool expect_error, DAGContext *) { if (expect_error) { MyDateTime datetime(0, 0, 0, 0, 0, 0, 0); - EXPECT_TRUE(numberToDateTime(input, datetime, ctx)); + EXPECT_TRUE(numberToDateTime(input, datetime)); return; } try { MyDateTime source(0, 0, 0, 0, 0, 0, 0); - numberToDateTime(input, source, ctx); + numberToDateTime(input, source); EXPECT_EQ(source.year, expected.year) << "Original time number: " << input; EXPECT_EQ(source.month, expected.month) << "Original time number: " << input; EXPECT_EQ(source.day, expected.day) << "Original time number: " << input; diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index 8823916c608..fa33a54f811 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -899,7 +899,7 @@ struct AddSecondsImpl // TODO: need do these in vector mode in the future static inline String execute(String str, Int64 delta, const DateLUTImpl & time_zone) { - Field packed_uint_value = parseMyDateTime(str, 6, true, false); + Field packed_uint_value = parseMyDateTime(str, 6, checkTimeValid); UInt64 packed_uint = packed_uint_value.template safeGet(); UInt64 result = AddSecondsImpl::execute(packed_uint, delta, time_zone); MyDateTime myDateTime(result); @@ -983,7 +983,7 @@ struct AddDaysImpl static inline String execute(String str, Int64 delta, const DateLUTImpl & time_zone) { - auto value_and_is_date = parseMyDateTimeAndJudgeIsDate(str, 6, true, false); + auto value_and_is_date = parseMyDateTimeAndJudgeIsDate(str, 6, checkTimeValid); Field packed_uint_value = value_and_is_date.first; bool is_date = value_and_is_date.second; UInt64 packed_uint = packed_uint_value.template safeGet(); @@ -1051,7 +1051,7 @@ struct AddMonthsImpl static inline String execute(String str, Int64 delta, const DateLUTImpl & time_zone) { - auto value_and_is_date = parseMyDateTimeAndJudgeIsDate(str, 6, true, false); + auto value_and_is_date = parseMyDateTimeAndJudgeIsDate(str, 6, checkTimeValid); Field packed_uint_value = value_and_is_date.first; bool is_date = value_and_is_date.second; UInt64 packed_uint = packed_uint_value.template safeGet(); diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index d65359d42c6..cfb1fc9684f 100755 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -1307,7 +1307,7 @@ struct TiDBConvertToTime size_t result, bool, const tipb::FieldType &, - const Context & context) + const Context &) { size_t size = block.getByPosition(arguments[0]).column->size(); auto col_to = ColumnUInt64::create(size, 0); @@ -1347,7 +1347,7 @@ struct TiDBConvertToTime StringRef string_ref(&(*chars)[current_offset], string_size); String string_value = string_ref.toString(); - Field packed_uint_value = parseMyDateTime(string_value, to_fsp, true); + Field packed_uint_value = parseMyDateTime(string_value, to_fsp, checkTimeValidAllowMonthAndDayZero); if (packed_uint_value.isNull()) { @@ -1431,7 +1431,7 @@ struct TiDBConvertToTime for (size_t i = 0; i < size; ++i) { MyDateTime datetime(0, 0, 0, 0, 0, 0, 0); - bool is_null = numberToDateTime(vec_from[i], datetime, context.getDAGContext()); + bool is_null = numberToDateTime(vec_from[i], datetime, false); if (is_null) { @@ -1468,34 +1468,26 @@ struct TiDBConvertToTime // Convert to string and then parse to time String value_str = toString(value); - if (value_str == "0") + Field packed_uint_value = parseMyDateTimeFromFloat(value_str, to_fsp, noNeedCheckTime); + + if (packed_uint_value.isNull()) { + // Fill NULL if cannot parse (*vec_null_map_to)[i] = 1; vec_to[i] = 0; + continue; + } + + UInt64 packed_uint = packed_uint_value.template safeGet(); + MyDateTime datetime(packed_uint); + if constexpr (std::is_same_v) + { + MyDate date(datetime.year, datetime.month, datetime.day); + vec_to[i] = date.toPackedUInt(); } else { - Field packed_uint_value = parseMyDateTime(value_str, to_fsp, true); - - if (packed_uint_value.isNull()) - { - // Fill NULL if cannot parse - (*vec_null_map_to)[i] = 1; - vec_to[i] = 0; - continue; - } - - UInt64 packed_uint = packed_uint_value.template safeGet(); - MyDateTime datetime(packed_uint); - if constexpr (std::is_same_v) - { - MyDate date(datetime.year, datetime.month, datetime.day); - vec_to[i] = date.toPackedUInt(); - } - else - { - vec_to[i] = packed_uint; - } + vec_to[i] = packed_uint; } } } @@ -1508,7 +1500,8 @@ struct TiDBConvertToTime for (size_t i = 0; i < size; i++) { String value_str = vec_from[i].toString(type.getScale()); - Field value = parseMyDateTime(value_str, to_fsp, true); + + Field value = parseMyDateTimeFromFloat(value_str, to_fsp, noNeedCheckTime); if (value.getType() == Field::Types::Null) { @@ -1661,116 +1654,6 @@ struct TiDBConvertToDuration } }; -// Return true if the time is invalid. -inline bool getDatetime(const Int64 & num, MyDateTime & result, DAGContext * ctx) -{ - UInt64 ymd = num / 1000000; - UInt64 hms = num - ymd * 1000000; - - UInt64 year = ymd / 10000; - ymd %= 10000; - UInt64 month = ymd / 100; - UInt64 day = ymd % 100; - - UInt64 hour = hms / 10000; - hms %= 10000; - UInt64 minute = hms / 100; - UInt64 second = hms % 100; - - if (toCoreTimeChecked(year, month, day, hour, minute, second, 0, result)) - { - return true; - } - if (ctx) - { - return !result.isValid(ctx->allowZeroInDate(), ctx->allowInvalidDate()); - } - else - { - return !result.isValid(false, false); - } - return false; -} - -// Convert a integer number to DateTime and return true if the result is NULL. -// If number is invalid(according to SQL_MODE), return NULL and handle the error with DAGContext. -// This function may throw exception. -inline bool numberToDateTime(Int64 number, MyDateTime & result, DAGContext * ctx) -{ - MyDateTime datetime(0); - if (number == 0) - { - result = datetime; - return true; - } - - // datetime type - if (number >= 10000101000000) - { - return getDatetime(number, result, ctx); - } - - // check MMDD - if (number < 101) - { - return true; - } - - // check YYMMDD: 2000-2069 - if (number <= 69 * 10000 + 1231) - { - number = (number + 20000000) * 1000000; - return getDatetime(number, result, ctx); - } - - if (number < 70 * 10000 + 101) - { - return true; - } - - // check YYMMDD - if (number <= 991231) - { - number = (number + 19000000) * 1000000; - return getDatetime(number, result, ctx); - } - - // check hour/min/second - if (number <= 99991231) - { - number *= 1000000; - return getDatetime(number, result, ctx); - } - - // check MMDDHHMMSS - if (number < 101000000) - { - return true; - } - - // check YYMMDDhhmmss: 2000-2069 - if (number <= 69 * 10000000000 + 1231235959) - { - number += 20000000000000; - return getDatetime(number, result, ctx); - } - - // check YYYYMMDDhhmmss - if (number < 70 * 10000000000 + 101000000) - { - return true; - } - - // check YYMMDDHHMMSS - if (number <= 991231235959) - { - number += 19000000000000; - return getDatetime(number, result, ctx); - } - - return getDatetime(number, result, ctx); -} - template class ExecutableFunctionTiDBCast : public IExecutableFunction { diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp index 5dc06fa9e26..f4ab8316b08 100644 --- a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp @@ -102,6 +102,22 @@ class TestTidbConversion : public DB::tests::FunctionTest inner_test(false); } + template + typename std::enable_if, void>::type testNotOnlyNull(const DecimalField & input, const MyDateTime & output, int fraction) + { + auto meta = std::make_tuple(19, input.getScale()); + auto inner_test = [&](bool is_const) { + ASSERT_COLUMN_EQ( + is_const ? createDateTimeColumnConst(1, output, fraction) : createDateTimeColumn({output}, fraction), + executeFunction( + func_name, + {is_const ? createConstColumn>(meta, 1, input) : createColumn>(meta, {input}), + createCastTypeConstColumn(fmt::format("Nullable(MyDateTime({}))", fraction))})); + }; + inner_test(true); + inner_test(false); + } + template typename std::enable_if, void>::type testNotOnlyNull(const Input & input, const MyDateTime & output, int fraction) { @@ -178,6 +194,21 @@ class TestTidbConversion : public DB::tests::FunctionTest inner_test(false); } + template + typename std::enable_if, void>::type testReturnNull(const DecimalField & input, const std::tuple & meta, int fraction) + { + auto inner_test = [&](bool is_const) { + ASSERT_COLUMN_EQ( + is_const ? createDateTimeColumnConst(1, {}, fraction) : createDateTimeColumn({{}}, fraction), + executeFunction( + func_name, + {is_const ? createConstColumn>(meta, 1, input) : createColumn>(meta, {input}), + createCastTypeConstColumn(fmt::format("Nullable(MyDateTime({}))", fraction))})); + }; + inner_test(true); + inner_test(false); + } + template void testOnlyNull() { @@ -1208,42 +1239,72 @@ try testOnlyNull(); // TODO add tests after non-expected results fixed - - // mysql: null, warning. - // tiflash: null, no warning. - // tidb: 0000-00-00 00:00:00 - testReturnNull(0, 6); testReturnNull(12.213, 6); testReturnNull(-12.213, 6); testReturnNull(MAX_FLOAT32, 6); testReturnNull(MIN_FLOAT32, 6); - // mysql: 2000-01-11 00:00:00 - // tiflash / tidb: null, warnings - // testNotOnlyNull(111, {2000, 1, 11, 0, 0, 0, 0}, 6); + + testNotOnlyNull(0, {0, 0, 0, 0, 0, 0, 0}, 6); + testNotOnlyNull(111, {2000, 1, 11, 0, 0, 0, 0}, 6); testReturnNull(-111, 6); - // mysql: 2000-01-11 00:00:00 - // tiflash / tidb: null, warnings - // testNotOnlyNull(111.1, {2000, 1, 11, 0, 0, 0, 0}, 6); - - // mysql: null, warning. - // tiflash: null, no warning. - // tidb: 0000-00-00 00:00:00 - // testReturnNull(0, 6); + testNotOnlyNull(111.1, {2000, 1, 11, 0, 0, 0, 0}, 6); + testReturnNull(12.213, 6); testReturnNull(-12.213, 6); testReturnNull(MAX_FLOAT64, 6); testReturnNull(MIN_FLOAT64, 6); - // mysql: 2000-01-11 00:00:00 - // tiflash / tidb: null, warnings - // testNotOnlyNull(111, {2000, 1, 11, 0, 0, 0, 0}, 6); + testReturnNull(1.1, 6); + testReturnNull(48.1, 6); + testReturnNull(100.1, 6); + testReturnNull(1301.11, 6); + testReturnNull(1131.111, 6); + testReturnNull(100001111.111, 6); + testReturnNull(20121212121260.1111111, 6); + testReturnNull(20121212126012.1111111, 6); + testReturnNull(20121212241212.1111111, 6); + testNotOnlyNull(111, {2000, 1, 11, 0, 0, 0, 0}, 6); testReturnNull(-111, 6); - // mysql: 2000-01-11 00:00:00 - // tiflash / tidb: null, warnings - // testNotOnlyNull(111.1, {2000, 1, 11, 0, 0, 0, 0}, 6); + + testNotOnlyNull(0, {0, 0, 0, 0, 0, 0, 0}, 6); testNotOnlyNull(20210201, {2021, 2, 1, 0, 0, 0, 0}, 6); - // mysql: 2021-02-01 00:00:00 - // tiflash / tidb: 2021-02-01 01:00:00 - // testNotOnlyNull(20210201.1, {2021, 2, 1, 0, 0, 0, 0}, 6); + testNotOnlyNull(20210201.1, {2021, 2, 1, 0, 0, 0, 0}, 6); + testNotOnlyNull(20210000.1, {2021, 0, 0, 0, 0, 0, 0}, 6); + testNotOnlyNull(120012.1, {2012, 0, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(121200.1, {2012, 12, 00, 0, 0, 0, 0}, 6); + testNotOnlyNull(101.1, {2000, 1, 1, 0, 0, 0, 0}, 6); + testNotOnlyNull(111.1, {2000, 1, 11, 0, 0, 0, 0}, 6); + testNotOnlyNull(1122.1, {2000, 11, 22, 0, 0, 0, 0}, 6); + testNotOnlyNull(31212.111, {2003, 12, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(121212.1111, {2012, 12, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(1121212.111111, {112, 12, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(11121212.111111, {1112, 12, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(99991111.1111111, {9999, 11, 11, 0, 0, 0, 0}, 6); + testNotOnlyNull(1212121212.111111, {2000, 12, 12, 12, 12, 12, 111111}, 6); +} +CATCH + +TEST_F(TestTidbConversion, castDecimalAsReal) +try +{ + testReturnNull(DecimalField64(11, 1), std::make_tuple(19, 1), 6); + testReturnNull(DecimalField64(481, 1), std::make_tuple(19, 1), 6); + testReturnNull(DecimalField64(1001, 1), std::make_tuple(19, 1), 6); + testReturnNull(DecimalField64(130111, 2), std::make_tuple(19, 2), 6); + testReturnNull(DecimalField64(1131111, 3), std::make_tuple(19, 3), 6); + testReturnNull(DecimalField64(100001111111, 3), std::make_tuple(19, 3), 6); + testReturnNull(DecimalField64(12121212126011111, 5), std::make_tuple(19, 6), 6); + testReturnNull(DecimalField64(121212126012111111, 5), std::make_tuple(19, 4), 6); + testReturnNull(DecimalField64(12121224121211111, 5), std::make_tuple(19, 4), 6); + + testNotOnlyNull(DecimalField64(1011, 1), {2000, 1, 1, 0, 0, 0, 0}, 6); + testNotOnlyNull(DecimalField64(1111, 1), {2000, 1, 11, 0, 0, 0, 0}, 6); + testNotOnlyNull(DecimalField64(11221, 1), {2000, 11, 22, 0, 0, 0, 0}, 6); + testNotOnlyNull(DecimalField64(31212111, 3), {2003, 12, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(DecimalField64(30000111, 3), {2003, 0, 0, 0, 0, 0, 0}, 6); + testNotOnlyNull(DecimalField64(1212121111, 4), {2012, 12, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(DecimalField64(1121212111111, 6), {112, 12, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(DecimalField64(11121212111111, 6), {1112, 12, 12, 0, 0, 0, 0}, 6); + testNotOnlyNull(DecimalField64(99991111111111, 6), {9999, 11, 11, 0, 0, 0, 0}, 6); } CATCH @@ -1929,6 +1990,12 @@ try { ASSERT_COLUMN_EQ(expect_date_column, vector_result); } + + ASSERT_COLUMN_EQ( + createDateTimeColumn({{{2012, 0, 0, 0, 0, 0, 0}}}, 6), + executeFunction(func_name, + {createColumn>({"20120000"}), + createCastTypeConstColumn("Nullable(MyDateTime(6))")})); } CATCH diff --git a/tests/fullstack-test/expr/cast_as_time.test b/tests/fullstack-test/expr/cast_as_time.test index 30b8378bfb7..4c0d28984ae 100644 --- a/tests/fullstack-test/expr/cast_as_time.test +++ b/tests/fullstack-test/expr/cast_as_time.test @@ -70,3 +70,53 @@ mysql> set @@tidb_isolation_read_engines='tiflash';select cast(a as date) ad, ca mysql> drop table if exists test.t +mysql> create table test.t(d1 double, f float, d2 decimal(24,8)) +mysql> alter table test.t set tiflash replica 1 location labels 'rack', 'host', 'abc' + +func> wait_table test t + +mysql> insert into test.t values(0, 0, 0) +mysql> set @@tidb_isolation_read_engines='tiflash';select cast(111.1 as datetime) from test.t ++-------------------------+ +| cast(111.1 as datetime) | ++-------------------------+ +| 2000-01-11 00:00:00 | ++-------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash';select cast(1311.1 as datetime) from test.t ++--------------------------+ +| cast(1311.1 as datetime) | ++--------------------------+ +| NULL | ++--------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash';select cast(d1 as datetime), cast(d2 as datetime), cast(f as datetime) from test.t ++----------------------+----------------------+---------------------+ +| cast(d1 as datetime) | cast(d2 as datetime) | cast(f as datetime) | ++----------------------+----------------------+---------------------+ +| 0000-00-00 00:00:00 | 0000-00-00 00:00:00 | 0000-00-00 00:00:00 | ++----------------------+----------------------+---------------------+ + +mysql> insert into test.t values(111.1, 1122.1, 31212.111) +mysql> insert into test.t values(121212.1111, 1121212.111111, 11121212.111111) +mysql> insert into test.t values(120012.1111, 1121200.111111, 11120000.111111) +mysql> insert into test.t values(99991111.1111111, 101.1111111, 20121212121212.1111111) +mysql> insert into test.t values(NULL, NULL, NULL) +mysql> insert into test.t values(1.1, 48.1, 100.1) +mysql> insert into test.t values(1301.11, 1131.111, 100001111.111) +mysql> insert into test.t values(20121212121260.1111111, 20121212126012.1111111, 20121212241212.1111111) +mysql> set @@tidb_isolation_read_engines='tiflash';set tidb_enforce_mpp=1;select cast(d1 as datetime), cast(f as datetime), cast(d2 as datetime) from test.t ++----------------------+---------------------+----------------------+ +| cast(d1 as datetime) | cast(f as datetime) | cast(d2 as datetime) | ++----------------------+---------------------+----------------------+ +| 0000-00-00 00:00:00 | 0000-00-00 00:00:00 | 0000-00-00 00:00:00 | +| 2000-01-11 00:00:00 | 2000-11-22 00:00:00 | 2003-12-12 00:00:00 | +| 2012-12-12 00:00:00 | 0112-12-12 00:00:00 | 1112-12-12 00:00:00 | +| 2012-00-12 00:00:00 | 0112-12-00 00:00:00 | 1112-00-00 00:00:00 | +| 9999-11-11 00:00:00 | 2000-01-01 00:00:00 | 2012-12-12 12:12:12 | +| NULL | NULL | NULL | +| NULL | NULL | NULL | +| NULL | NULL | NULL | +| NULL | NULL | NULL | ++----------------------+---------------------+----------------------+ + +mysql> drop table if exists test.t + From aae88b120da5fda1a2f578d8a4cbe33d6e82caff Mon Sep 17 00:00:00 2001 From: JaySon Date: Fri, 16 Sep 2022 15:21:00 +0800 Subject: [PATCH 16/17] tests: Fix RegionBlockReaderTest helper functions (#5899) ref pingcap/tiflash#5859 --- dbms/src/Storages/Transaction/RowCodec.cpp | 18 ++- .../Transaction/tests/RowCodecTestUtils.h | 45 ++++--- .../tests/gtest_region_block_reader.cpp | 116 +++++++++++++----- dbms/src/TestUtils/TiFlashTestBasic.cpp | 14 +++ dbms/src/TestUtils/TiFlashTestBasic.h | 9 ++ 5 files changed, 145 insertions(+), 57 deletions(-) diff --git a/dbms/src/Storages/Transaction/RowCodec.cpp b/dbms/src/Storages/Transaction/RowCodec.cpp index ea7f6b7c2da..8fc4ead5849 100644 --- a/dbms/src/Storages/Transaction/RowCodec.cpp +++ b/dbms/src/Storages/Transaction/RowCodec.cpp @@ -194,10 +194,18 @@ struct RowEncoderV2 /// Cache encoded individual columns. for (size_t i_col = 0, i_val = 0; i_col < table_info.columns.size(); i_col++) { + if (i_val == fields.size()) + break; + const auto & column_info = table_info.columns[i_col]; const auto & field = fields[i_val]; if ((table_info.pk_is_handle || table_info.is_common_handle) && column_info.hasPriKeyFlag()) + { + // for common handle/pk is handle table, + // the field with primary key flag is usually encoded to key instead of value continue; + } + if (column_info.id > std::numeric_limits::ColumnIDType>::max()) is_big = true; if (!field.isNull()) @@ -213,9 +221,6 @@ struct RowEncoderV2 null_column_ids.emplace(column_info.id); } i_val++; - - if (i_val == fields.size()) - break; } is_big = is_big || value_length > std::numeric_limits::ValueOffsetType>::max(); @@ -378,7 +383,8 @@ bool appendRowV2ToBlockImpl( is_null = idx_null < null_column_ids.size(); auto next_datum_column_id = is_null ? null_column_ids[idx_null] : not_null_column_ids[idx_not_null]; - if (column_ids_iter->first > next_datum_column_id) + const auto next_column_id = column_ids_iter->first; + if (next_column_id > next_datum_column_id) { // The next column id to read is bigger than the column id of next datum in encoded row. // It means this is the datum of extra column. May happen when reading after dropping @@ -391,7 +397,7 @@ bool appendRowV2ToBlockImpl( else idx_not_null++; } - else if (column_ids_iter->first < next_datum_column_id) + else if (next_column_id < next_datum_column_id) { // The next column id to read is less than the column id of next datum in encoded row. // It means this is the datum of missing column. May happen when reading after adding @@ -407,7 +413,7 @@ bool appendRowV2ToBlockImpl( { // If pk_handle_id is a valid column id, then it means the table's pk_is_handle is true // we can just ignore the pk value encoded in value part - if (unlikely(column_ids_iter->first == pk_handle_id)) + if (unlikely(next_column_id == pk_handle_id)) { column_ids_iter++; block_column_pos++; diff --git a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h index 34e0d3d4104..0cc5bfe6bff 100644 --- a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h +++ b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h @@ -15,7 +15,9 @@ #pragma once #include #include +#include #include +#include namespace DB::tests { @@ -146,7 +148,7 @@ struct ColumnIDValue { static constexpr bool value_is_null = true; using ValueType = std::decay_t; - ColumnIDValue(ColumnID id_) + explicit ColumnIDValue(ColumnID id_) : id(id_) {} ColumnID id; @@ -211,46 +213,55 @@ void getTableInfoFieldsInternal(OrderedColumnInfoFields & column_info_fields, Ty } template -std::pair> getTableInfoAndFields(ColumnIDs handle_ids, bool is_common_handle, Types &&... column_value_ids) +std::pair> getTableInfoAndFields(ColumnIDs pk_col_ids, bool is_common_handle, Types &&... column_value_ids) { OrderedColumnInfoFields column_info_fields; getTableInfoFieldsInternal(column_info_fields, std::forward(column_value_ids)...); TableInfo table_info; std::vector fields; + bool pk_is_handle = pk_col_ids.size() == 1 && pk_col_ids[0] != ::DB::TiDBPkColumnID; + for (auto & column_info_field : column_info_fields) { auto & column = std::get<0>(column_info_field.second); auto & field = std::get<1>(column_info_field.second); - if (std::find(handle_ids.begin(), handle_ids.end(), column.id) != handle_ids.end()) + if (std::find(pk_col_ids.begin(), pk_col_ids.end(), column.id) != pk_col_ids.end()) { column.setPriKeyFlag(); + if (column.tp != TiDB::TypeLong && column.tp != TiDB::TypeTiny && column.tp != TiDB::TypeLongLong && column.tp != TiDB::TypeShort && column.tp != TiDB::TypeInt24) + { + pk_is_handle = false; + } } table_info.columns.emplace_back(std::move(column)); fields.emplace_back(std::move(field)); } - if (!is_common_handle) - { - if (handle_ids[0] != EXTRA_HANDLE_COLUMN_ID) - table_info.pk_is_handle = true; - } - else + + table_info.pk_is_handle = pk_is_handle; + table_info.is_common_handle = is_common_handle; + if (is_common_handle) { table_info.is_common_handle = true; - TiDB::IndexInfo index_info; - for (auto handle_id : handle_ids) + // TiFlash maintains the column name of primary key + // for common handle table + TiDB::IndexInfo pk_index_info; + pk_index_info.is_primary = true; + pk_index_info.idx_name = "PRIMARY"; + pk_index_info.is_unique = true; + for (auto pk_col_id : pk_col_ids) { TiDB::IndexColumnInfo index_column_info; for (auto & column : table_info.columns) { - if (column.id == handle_id) + if (column.id == pk_col_id) { index_column_info.name = column.name; break; } } - index_info.idx_cols.emplace_back(index_column_info); + pk_index_info.idx_cols.emplace_back(index_column_info); } - table_info.index_infos.emplace_back(index_info); + table_info.index_infos.emplace_back(pk_index_info); } return std::make_pair(std::move(table_info), std::move(fields)); @@ -272,7 +283,7 @@ inline DecodingStorageSchemaSnapshotConstPtr getDecodingStorageSchemaSnapshot(co store_columns.emplace_back(VERSION_COLUMN_ID, VERSION_COLUMN_NAME, VERSION_COLUMN_TYPE); store_columns.emplace_back(TAG_COLUMN_ID, TAG_COLUMN_NAME, TAG_COLUMN_TYPE); ColumnID handle_id = EXTRA_HANDLE_COLUMN_ID; - for (auto & column_info : table_info.columns) + for (const auto & column_info : table_info.columns) { if (table_info.pk_is_handle) { @@ -301,7 +312,7 @@ size_t valueStartPos(const TableInfo & table_info) inline Block decodeRowToBlock(const String & row_value, DecodingStorageSchemaSnapshotConstPtr decoding_schema) { - auto & sorted_column_id_with_pos = decoding_schema->sorted_column_id_with_pos; + const auto & sorted_column_id_with_pos = decoding_schema->sorted_column_id_with_pos; auto iter = sorted_column_id_with_pos.begin(); const size_t value_column_num = 3; // skip first three column which is EXTRA_HANDLE_COLUMN, VERSION_COLUMN, TAG_COLUMN @@ -347,4 +358,4 @@ T getValueByRowV1(const T & v) return static_cast(std::move((*block.getByPosition(0).column)[0].template safeGet())); } -} // namespace DB::tests \ No newline at end of file +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp index d08b4dd3738..dd58f166dac 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp @@ -12,19 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include +#include #include -#include - -#include "RowCodecTestUtils.h" +#include +#include +#include +#include using TableInfo = TiDB::TableInfo; namespace DB::tests { using ColumnIDs = std::vector; -class RegionBlockReaderTestFixture : public ::testing::Test +class RegionBlockReaderTest : public ::testing::Test { +public: + RegionBlockReaderTest() + : logger(Logger::get("RegionBlockReaderTest")) + {} + protected: Int64 handle_value = 100; UInt8 del_mark_value = 0; @@ -34,6 +43,8 @@ class RegionBlockReaderTestFixture : public ::testing::Test RegionDataReadInfoList data_list_read; std::unordered_map fields_map; + LoggerPtr logger; + enum RowEncodeVersion { RowV1, @@ -49,7 +60,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test void TearDown() override {} - void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) + void encodeColumns(const TableInfo & table_info, const std::vector & fields, RowEncodeVersion row_version) { // for later check std::unordered_map column_name_columns_index_map; @@ -59,17 +70,24 @@ class RegionBlockReaderTestFixture : public ::testing::Test column_name_columns_index_map.emplace(table_info.columns[i].name, i); } - std::vector value_fields; - std::vector pk_fields; + std::vector value_encode_fields; + std::vector key_encode_fields; for (size_t i = 0; i < table_info.columns.size(); i++) { - if (!table_info.columns[i].hasPriKeyFlag()) - value_fields.emplace_back(fields[i]); + if (table_info.is_common_handle || table_info.pk_is_handle) + { + if (!table_info.columns[i].hasPriKeyFlag()) + value_encode_fields.emplace_back(fields[i]); + else + key_encode_fields.emplace_back(fields[i]); + } else - pk_fields.emplace_back(fields[i]); + { + value_encode_fields.emplace_back(fields[i]); + } } - // create PK + // create the RawTiDBPK section of encoded key WriteBufferFromOwnString pk_buf; if (table_info.is_common_handle) { @@ -77,7 +95,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) { auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; - EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); + EncodeDatum(key_encode_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); } } else @@ -85,21 +103,22 @@ class RegionBlockReaderTestFixture : public ::testing::Test DB::EncodeInt64(handle_value, pk_buf); } RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; - // create value + + // create encoded value WriteBufferFromOwnString value_buf; if (row_version == RowEncodeVersion::RowV1) { - encodeRowV1(table_info, value_fields, value_buf); + encodeRowV1(table_info, value_encode_fields, value_buf); } else if (row_version == RowEncodeVersion::RowV2) { - encodeRowV2(table_info, value_fields, value_buf); + encodeRowV2(table_info, value_encode_fields, value_buf); } else { throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); } - auto row_value = std::make_shared(std::move(value_buf.str())); + auto row_value = std::make_shared(value_buf.releaseStr()); for (size_t i = 0; i < rows; i++) data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } @@ -112,6 +131,14 @@ class RegionBlockReaderTestFixture : public ::testing::Test for (size_t pos = 0; pos < block.columns(); pos++) { const auto & column_element = block.getByPosition(pos); + auto gen_error_log = [&]() { + return fmt::format( + " when checking column\n id={}, name={}, nrow={}\n decoded block is:\n{}\n", + column_element.column_id, + column_element.name, + row, + getColumnsContent(block.getColumnsWithTypeAndName())); + }; if (row == 0) { ASSERT_EQ(column_element.column->size(), rows); @@ -120,24 +147,24 @@ class RegionBlockReaderTestFixture : public ::testing::Test { if (decoding_schema->is_common_handle) { - ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))); + ASSERT_FIELD_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))) << gen_error_log(); } else { - ASSERT_EQ((*column_element.column)[row], Field(handle_value)); + ASSERT_FIELD_EQ((*column_element.column)[row], Field(handle_value)) << gen_error_log(); } } else if (column_element.name == VERSION_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(version_value)); + ASSERT_FIELD_EQ((*column_element.column)[row], Field(version_value)) << gen_error_log(); } else if (column_element.name == TAG_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))); + ASSERT_FIELD_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))) << gen_error_log(); } else { - ASSERT_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)); + ASSERT_FIELD_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)) << gen_error_log(); } } } @@ -154,10 +181,10 @@ class RegionBlockReaderTestFixture : public ::testing::Test return true; } - std::pair> getNormalTableInfoFields(const ColumnIDs & handle_ids, bool is_common_handle) const + std::pair> getNormalTableInfoFields(const ColumnIDs & pk_col_ids, bool is_common_handle) const { return getTableInfoAndFields( - handle_ids, + pk_col_ids, is_common_handle, ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), @@ -241,31 +268,45 @@ class RegionBlockReaderTestFixture : public ::testing::Test } }; -TEST_F(RegionBlockReaderTestFixture, PKIsNotHandle) +TEST_F(RegionBlockReaderTest, PKIsNotHandle) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + ASSERT_EQ(table_info.is_common_handle, false); + ASSERT_EQ(table_info.pk_is_handle, false); + ASSERT_FALSE(table_info.getColumnInfo(2).hasPriKeyFlag()); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, PKIsHandle) +TEST_F(RegionBlockReaderTest, PKIsHandle) { auto [table_info, fields] = getNormalTableInfoFields({2}, false); + ASSERT_EQ(table_info.is_common_handle, false); + ASSERT_EQ(table_info.pk_is_handle, true); + ASSERT_TRUE(table_info.getColumnInfo(2).hasPriKeyFlag()); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, CommonHandle) +TEST_F(RegionBlockReaderTest, CommonHandle) { auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); + ASSERT_EQ(table_info.is_common_handle, true); + ASSERT_EQ(table_info.pk_is_handle, false); + ASSERT_TRUE(table_info.getColumnInfo(2).hasPriKeyFlag()); + ASSERT_TRUE(table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_TRUE(table_info.getColumnInfo(4).hasPriKeyFlag()); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, MissingColumnRowV2) +TEST_F(RegionBlockReaderTest, MissingColumnRowV2) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2); @@ -274,7 +315,7 @@ TEST_F(RegionBlockReaderTestFixture, MissingColumnRowV2) ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, false)); } -TEST_F(RegionBlockReaderTestFixture, MissingColumnRowV1) +TEST_F(RegionBlockReaderTest, MissingColumnRowV1) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV1); @@ -283,7 +324,7 @@ TEST_F(RegionBlockReaderTestFixture, MissingColumnRowV1) ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, false)); } -TEST_F(RegionBlockReaderTestFixture, ExtraColumnRowV2) +TEST_F(RegionBlockReaderTest, ExtraColumnRowV2) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2); @@ -293,7 +334,7 @@ TEST_F(RegionBlockReaderTestFixture, ExtraColumnRowV2) ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, ExtraColumnRowV1) +TEST_F(RegionBlockReaderTest, ExtraColumnRowV1) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV1); @@ -303,7 +344,7 @@ TEST_F(RegionBlockReaderTestFixture, ExtraColumnRowV1) ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, OverflowColumnRowV2) +TEST_F(RegionBlockReaderTest, OverflowColumnRowV2) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2); @@ -316,7 +357,7 @@ TEST_F(RegionBlockReaderTestFixture, OverflowColumnRowV2) ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, OverflowColumnRowV1) +TEST_F(RegionBlockReaderTest, OverflowColumnRowV1) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV1); @@ -329,17 +370,24 @@ TEST_F(RegionBlockReaderTestFixture, OverflowColumnRowV1) ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, InvalidNULLRowV2) +TEST_F(RegionBlockReaderTest, InvalidNULLRowV2) +try { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + ASSERT_FALSE(table_info.getColumnInfo(11).hasNotNullFlag()); // col 11 is nullable + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + auto new_table_info = getTableInfoFieldsForInvalidNULLTest({EXTRA_HANDLE_COLUMN_ID}, false); + ASSERT_TRUE(new_table_info.getColumnInfo(11).hasNotNullFlag()); // col 11 is not null + auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info); ASSERT_FALSE(decodeAndCheckColumns(new_decoding_schema, false)); ASSERT_ANY_THROW(decodeAndCheckColumns(new_decoding_schema, true)); } +CATCH -TEST_F(RegionBlockReaderTestFixture, InvalidNULLRowV1) +TEST_F(RegionBlockReaderTest, InvalidNULLRowV1) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV1); diff --git a/dbms/src/TestUtils/TiFlashTestBasic.cpp b/dbms/src/TestUtils/TiFlashTestBasic.cpp index 18c166f453e..47dc82f983a 100644 --- a/dbms/src/TestUtils/TiFlashTestBasic.cpp +++ b/dbms/src/TestUtils/TiFlashTestBasic.cpp @@ -13,6 +13,8 @@ // limitations under the License. #include +#include +#include namespace DB::tests { @@ -27,4 +29,16 @@ ::testing::AssertionResult DataTypeCompare( else return ::testing::internal::EqFailure(lhs_expr, rhs_expr, lhs->getName(), rhs->getName(), false); } + +::testing::AssertionResult fieldCompare( + const char * lhs_expr, + const char * rhs_expr, + const Field & lhs, + const Field & rhs) +{ + if (lhs == rhs) + return ::testing::AssertionSuccess(); + return ::testing::internal::EqFailure(lhs_expr, rhs_expr, lhs.toString(), rhs.toString(), false); +} + } // namespace DB::tests diff --git a/dbms/src/TestUtils/TiFlashTestBasic.h b/dbms/src/TestUtils/TiFlashTestBasic.h index 5de50a71424..f96a679308c 100644 --- a/dbms/src/TestUtils/TiFlashTestBasic.h +++ b/dbms/src/TestUtils/TiFlashTestBasic.h @@ -87,6 +87,15 @@ ::testing::AssertionResult DataTypeCompare( #define ASSERT_DATATYPE_EQ(val1, val2) ASSERT_PRED_FORMAT2(::DB::tests::DataTypeCompare, val1, val2) #define EXPECT_DATATYPE_EQ(val1, val2) EXPECT_PRED_FORMAT2(::DB::tests::DataTypeCompare, val1, val2) +::testing::AssertionResult fieldCompare( + const char * lhs_expr, + const char * rhs_expr, + const Field & lhs, + const Field & rhs); + +#define ASSERT_FIELD_EQ(val1, val2) ASSERT_PRED_FORMAT2(::DB::tests::fieldCompare, val1, val2) +#define EXPECT_FIELD_EQ(val1, val2) EXPECT_PRED_FORMAT2(::DB::tests::fieldCompare, val1, val2) + // A simple helper for getting DataType from type name inline DataTypePtr typeFromString(const String & str) { From 892f85229ed3a0992167e5bd9c0a93125c6593a3 Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Fri, 16 Sep 2022 16:47:00 +0800 Subject: [PATCH 17/17] storage: Introduce replaceData in the segment API (#5904) ref pingcap/tiflash#5237 --- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 20 + .../DeltaMergeStore_InternalSegment.cpp | 42 ++ dbms/src/Storages/DeltaMerge/File/DMFile.h | 15 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 59 +++ dbms/src/Storages/DeltaMerge/Segment.h | 27 +- .../DeltaMerge/tests/gtest_segment.cpp | 116 +++++ .../tests/gtest_segment_replace_data.cpp | 490 ++++++++++++++++++ .../tests/gtest_segment_test_basic.cpp | 289 ++++++----- .../tests/gtest_segment_test_basic.h | 19 +- .../tests/gtest_segment_test_randomized.cpp | 67 ++- 10 files changed, 1012 insertions(+), 132 deletions(-) create mode 100644 dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index d936920e422..59785233f2b 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -37,6 +37,8 @@ using LoggerPtr = std::shared_ptr; namespace DM { +class DMFile; +using DMFilePtr = std::shared_ptr; class Segment; using SegmentPtr = std::shared_ptr; using SegmentPair = std::pair; @@ -495,6 +497,24 @@ class DeltaMergeStore : private boost::noncopyable MergeDeltaReason reason, SegmentSnapshotPtr segment_snap = nullptr); + /** + * Discard all data in the segment, and use the specified DMFile as the stable instead. + * The specified DMFile is safe to be shared for multiple segments. + * + * Note 1: This function will not enable GC for the new_stable_file for you, in case of you may want to share the same + * stable file for multiple segments. It is your own duty to enable GC later. + * + * Note 2: You must ensure the specified new_stable_file has been managed by the storage pool, and has been written + * to the PageStorage's data. Otherwise there will be exceptions. + * + * Note 3: This API is subjected to be changed in future, as it relies on the knowledge that all current data + * in this segment is useless, which is a pretty tough requirement. + */ + SegmentPtr segmentDangerouslyReplaceData( + DMContext & dm_context, + const SegmentPtr & segment, + const DMFilePtr & data_file); + // isSegmentValid should be protected by lock on `read_write_mutex` inline bool isSegmentValid(const std::shared_lock &, const SegmentPtr & segment) { diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp index 1a0da119a7c..b16667ecd90 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalSegment.cpp @@ -487,6 +487,48 @@ SegmentPtr DeltaMergeStore::segmentMergeDelta( return new_segment; } +SegmentPtr DeltaMergeStore::segmentDangerouslyReplaceData( + DMContext & dm_context, + const SegmentPtr & segment, + const DMFilePtr & data_file) +{ + LOG_FMT_INFO(log, "ReplaceData - Begin, segment={} data_file={}", segment->info(), data_file->path()); + + WriteBatches wbs(*storage_pool, dm_context.getWriteLimiter()); + + SegmentPtr new_segment; + { + std::unique_lock lock(read_write_mutex); + if (!isSegmentValid(lock, segment)) + { + LOG_FMT_DEBUG(log, "ReplaceData - Give up segment replace data because segment not valid, segment={} data_file={}", segment->simpleInfo(), data_file->path()); + return {}; + } + + auto segment_lock = segment->mustGetUpdateLock(); + new_segment = segment->dangerouslyReplaceData(segment_lock, dm_context, data_file, wbs); + + RUNTIME_CHECK(compare(segment->getRowKeyRange().getEnd(), new_segment->getRowKeyRange().getEnd()) == 0, segment->info(), new_segment->info()); + RUNTIME_CHECK(segment->segmentId() == new_segment->segmentId(), segment->info(), new_segment->info()); + + wbs.writeLogAndData(); + wbs.writeMeta(); + + segment->abandon(dm_context); + segments[segment->getRowKeyRange().getEnd()] = new_segment; + id_to_segment[segment->segmentId()] = new_segment; + + LOG_FMT_INFO(log, "ReplaceData - Finish, old_segment={} new_segment={}", segment->info(), new_segment->info()); + } + + wbs.writeRemoves(); + + if constexpr (DM_RUN_CHECK) + check(dm_context.db_context); + + return new_segment; +} + bool DeltaMergeStore::doIsSegmentValid(const SegmentPtr & segment) { if (segment->hasAbandoned()) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.h b/dbms/src/Storages/DeltaMerge/File/DMFile.h index 48d4071d595..eb34d31feb9 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.h @@ -268,11 +268,16 @@ class DMFile : private boost::noncopyable bool isColumnExist(ColId col_id) const { return column_stats.find(col_id) != column_stats.end(); } bool isSingleFileMode() const { return mode == Mode::SINGLE_FILE; } - String toString() const - { - return "{DMFile, packs: " + DB::toString(getPacks()) + ", rows: " + DB::toString(getRows()) + ", bytes: " + DB::toString(getBytes()) - + ", file size: " + DB::toString(getBytesOnDisk()) + "}"; - } + /* + * TODO: This function is currently unused. We could use it when: + * 1. The content is polished (e.g. including at least file ID, and use a format easy for grep). + * 2. Unify the place where we are currently printing out DMFile's `path` or `file_id`. + */ + // String toString() const + // { + // return "{DMFile, packs: " + DB::toString(getPacks()) + ", rows: " + DB::toString(getRows()) + ", bytes: " + DB::toString(getBytes()) + // + ", file size: " + DB::toString(getBytesOnDisk()) + "}"; + // } DMConfigurationOpt & getConfiguration() { return configuration; } diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index ca702dc38f9..86dbec61db0 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -706,6 +706,65 @@ SegmentPtr Segment::applyMergeDelta(const Segment::Lock &, // return new_me; } +SegmentPtr Segment::dangerouslyReplaceDataForTest(DMContext & dm_context, // + const DMFilePtr & data_file) const +{ + WriteBatches wbs(dm_context.storage_pool, dm_context.getWriteLimiter()); + + auto lock = mustGetUpdateLock(); + auto new_segment = dangerouslyReplaceData(lock, dm_context, data_file, wbs); + + wbs.writeAll(); + return new_segment; +} + +SegmentPtr Segment::dangerouslyReplaceData(const Segment::Lock &, // + DMContext & dm_context, + const DMFilePtr & data_file, + WriteBatches & wbs) const +{ + LOG_FMT_DEBUG(log, "ReplaceData - Begin, data_file={}", data_file->path()); + + auto & storage_pool = dm_context.storage_pool; + auto delegate = dm_context.path_pool.getStableDiskDelegator(); + + RUNTIME_CHECK(delegate.getDTFilePath(data_file->fileId()) == data_file->parentPath()); + + // Always create a ref to the file to allow `data_file` being shared. + auto new_page_id = storage_pool.newDataPageIdForDTFile(delegate, __PRETTY_FUNCTION__); + // TODO: We could allow assigning multiple DMFiles in future. + auto ref_file = DMFile::restore( + dm_context.db_context.getFileProvider(), + data_file->fileId(), + new_page_id, + data_file->parentPath(), + DMFile::ReadMetaMode::all()); + wbs.data.putRefPage(new_page_id, data_file->pageId()); + + auto new_stable = std::make_shared(stable->getId()); + new_stable->setFiles({ref_file}, rowkey_range, &dm_context); + new_stable->saveMeta(wbs.meta); + + // Empty new delta + auto new_delta = std::make_shared(delta->getId()); + new_delta->saveMeta(wbs); + + auto new_me = std::make_shared(epoch + 1, // + rowkey_range, + segment_id, + next_segment_id, + new_delta, + new_stable); + new_me->serialize(wbs.meta); + + delta->recordRemoveColumnFilesPages(wbs); + stable->recordRemovePacksPages(wbs); + + LOG_FMT_DEBUG(log, "ReplaceData - Finish, old_me={} new_me={}", info(), new_me->info()); + + return new_me; +} + SegmentPair Segment::split(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, std::optional opt_split_at, SplitMode opt_split_mode) const { WriteBatches wbs(dm_context.storage_pool, dm_context.getWriteLimiter()); diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index 318a5150068..1bda20c8bf4 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -333,6 +333,31 @@ class Segment : private boost::noncopyable WriteBatches & wbs, const StableValueSpacePtr & new_stable) const; + /** + * Only used in tests as a shortcut. + * Normally you should use `dangerouslyReplaceData`. + */ + [[nodiscard]] SegmentPtr dangerouslyReplaceDataForTest(DMContext & dm_context, const DMFilePtr & data_file) const; + + /** + * Discard all data in the current delta and stable layer, and use the specified DMFile as the stable instead. + * This API does not have a prepare & apply pair, as it should be quick enough. The specified DMFile is safe + * to be shared for multiple segments. + * + * Note 1: Should be protected behind the Segment update lock to ensure no new data will be appended to this + * segment during the function call. Otherwise these new data will be lost in the new segment. + * + * Note 2: This function will not enable GC for the new_stable_file for you, in case of you may want to share the same + * stable file for multiple segments. It is your own duty to enable GC later. + * + * Note 3: You must ensure the specified new_stable_file has been managed by the storage pool, and has been written + * to the PageStorage's data. Otherwise there will be exceptions. + * + * Note 4: This API is subjected to be changed in future, as it relies on the knowledge that all current data + * in this segment is useless, which is a pretty tough requirement. + */ + [[nodiscard]] SegmentPtr dangerouslyReplaceData(const Lock &, DMContext & dm_context, const DMFilePtr & data_file, WriteBatches & wbs) const; + [[nodiscard]] SegmentPtr dropNextSegment(WriteBatches & wbs, const RowKeyRange & next_segment_range); /// Flush delta's cache packs. @@ -468,7 +493,7 @@ class Segment : private boost::noncopyable bool relevant_place) const; private: - /// The version of this segment. After split / merge / merge delta, epoch got increased by 1. + /// The version of this segment. After split / merge / mergeDelta / dangerouslyReplaceData, epoch got increased by 1. const UInt64 epoch; RowKeyRange rowkey_range; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp index 814e5443258..7c18b32a795 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,121 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange(const DMContext & conte } namespace tests { + +class SegmentFrameworkTest : public SegmentTestBasic +{ +}; + +TEST_F(SegmentFrameworkTest, PrepareWriteBlock) +try +{ + reloadWithOptions(SegmentTestOptions{.is_common_handle = false}); + + auto s1_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 10); + ASSERT_TRUE(s1_id.has_value()); + auto s2_id = splitSegmentAt(*s1_id, 20); + ASSERT_TRUE(s2_id.has_value()); + + // s1 has range [10, 20) + { + auto [begin, end] = getSegmentKeyRange(*s1_id); + ASSERT_EQ(10, begin); + ASSERT_EQ(20, end); + } + + { + // write_rows == segment_rows, start_key not specified + auto blocks = prepareWriteBlocksInSegmentRange(*s1_id, 10); + ASSERT_EQ(1, blocks.size()); + auto handle_column = blocks[0].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({10, 11, 12, 13, 14, 15, 16, 17, 18, 19}), handle_data); + } + { + // write_rows > segment_rows, start_key not specified + auto blocks = prepareWriteBlocksInSegmentRange(*s1_id, 13); + ASSERT_EQ(2, blocks.size()); + { + auto handle_column = blocks[0].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({10, 11, 12, 13, 14, 15, 16, 17, 18, 19}), handle_data); + } + { + auto handle_column = blocks[1].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({10, 11, 12}), handle_data); + } + } + { + // start_key specified, end_key - start_key < write_rows + auto blocks = prepareWriteBlocksInSegmentRange(*s1_id, 2, /* at */ 16); + ASSERT_EQ(1, blocks.size()); + const auto & handle_column = blocks[0].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({16, 17}), handle_data); + } + { + auto blocks = prepareWriteBlocksInSegmentRange(*s1_id, 4, /* at */ 16); + ASSERT_EQ(1, blocks.size()); + const auto & handle_column = blocks[0].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({16, 17, 18, 19}), handle_data); + } + { + auto blocks = prepareWriteBlocksInSegmentRange(*s1_id, 5, /* at */ 16); + ASSERT_EQ(2, blocks.size()); + { + const auto & handle_column = blocks[0].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({16, 17, 18, 19}), handle_data); + } + { + const auto & handle_column = blocks[1].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({16}), handle_data); + } + } + { + auto blocks = prepareWriteBlocksInSegmentRange(*s1_id, 10, /* at */ 16); + ASSERT_EQ(3, blocks.size()); + { + const auto & handle_column = blocks[0].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({16, 17, 18, 19}), handle_data); + } + { + const auto & handle_column = blocks[1].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({16, 17, 18, 19}), handle_data); + } + { + const auto & handle_column = blocks[2].getByName(EXTRA_HANDLE_COLUMN_NAME).column; + const auto & handle_data = typeid_cast &>(*handle_column).getData(); + ASSERT_EQ(PaddedPODArray({16, 17}), handle_data); + } + } + { + // write rows < segment rows, start key not specified, should choose a random start. + auto blocks = prepareWriteBlocksInSegmentRange(*s1_id, 3); + ASSERT_EQ(1, blocks.size()); + ASSERT_EQ(3, blocks[0].rows()); + } + { + // Let's check whether the generated handles will be starting from 12, for at least once. + auto start_from_12 = 0; + for (size_t i = 0; i < 100; i++) + { + auto blocks = prepareWriteBlocksInSegmentRange(*s1_id, 3); + if (blocks[0].getByName(EXTRA_HANDLE_COLUMN_NAME).column->getInt(0) == 12) + start_from_12++; + } + ASSERT_TRUE(start_from_12 > 0); // We should hit at least 1 times in 100 iters. + ASSERT_TRUE(start_from_12 < 50); // We should not hit 50 times in 100 iters :) + } +} +CATCH + + class SegmentOperationTest : public SegmentTestBasic { protected: diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp new file mode 100644 index 00000000000..8a09cc87594 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_replace_data.cpp @@ -0,0 +1,490 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace DM +{ + +extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, + const ColumnDefinesPtr & schema_snap, + const BlockInputStreamPtr & input_stream, + UInt64 file_id, + const String & parent_path, + DMFileBlockOutputStream::Flags flags); + +namespace tests +{ + +class SegmentReplaceDataTest : public SegmentTestBasic + , public testing::WithParamInterface +{ +public: + SegmentReplaceDataTest() + { + replace_to_rows = GetParam(); + } + +protected: + UInt64 replace_to_rows{}; +}; + +INSTANTIATE_TEST_CASE_P( + ReplaceToNRows, + SegmentReplaceDataTest, + testing::Values(0, 37)); // Note: some tests rely on the exact value of 37. Adding arbitrary values may break test. + +class SegmentReplaceDataBasicTest : public SegmentTestBasic +{ +}; + +TEST_F(SegmentReplaceDataBasicTest, ThrowWhenDMFileNotInDelegator) +try +{ + auto delegator = storage_path_pool->getStableDiskDelegator(); + auto file_id = storage_pool->newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); + auto input_stream = std::make_shared(Block{}); + auto dm_file = writeIntoNewDMFile( + *dm_context, + table_columns, + input_stream, + file_id, + delegator.choosePath(), + DMFileBlockOutputStream::Flags{}); + + ASSERT_THROW({ + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, dm_file); + }, + DB::Exception); +} +CATCH + + +TEST_F(SegmentReplaceDataBasicTest, ThrowWhenDMFileNotInPS) +try +{ + auto delegator = storage_path_pool->getStableDiskDelegator(); + auto file_id = storage_pool->newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); + auto input_stream = std::make_shared(Block{}); + auto dm_file = writeIntoNewDMFile( + *dm_context, + table_columns, + input_stream, + file_id, + delegator.choosePath(), + DMFileBlockOutputStream::Flags{}); + + delegator.addDTFile(file_id, dm_file->getBytesOnDisk(), dm_file->parentPath()); + + ASSERT_THROW({ + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, dm_file); + }, + DB::Exception); +} +CATCH + + +TEST_P(SegmentReplaceDataTest, Basic) +try +{ + // Data in memtable should be discarded after replaceData + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); + ASSERT_EQ(100, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + { + auto replace_block = prepareWriteBlock(/* from */ 0, /* to */ replace_to_rows); + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, replace_block); + } + ASSERT_EQ(replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + + // Even flush will not "rescue" these memtable data. + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_EQ(replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + + ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); + storage_pool->log_storage_v3->gc(/* not_skip */ true); + storage_pool->data_storage_v3->gc(/* not_skip */ true); + ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); + ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 2); // 1 DMFile, 1 Ref + PageId replaced_stable_id{}; + { + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); + replaced_stable_id = *stable_page_ids.begin(); + } + + // Write some data and create a new stable. + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 47, /* at */ replace_to_rows + 100); + ASSERT_EQ(47 + replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_EQ(47 + replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_EQ(47 + replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + + storage_pool->log_storage_v3->gc(/* not_skip */ true); + storage_pool->data_storage_v3->gc(/* not_skip */ true); + ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); + ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 1); + + auto const stable_files = segments[DELTA_MERGE_FIRST_SEGMENT_ID]->getStable()->getDMFiles(); + { + // Only the new stable DMFile is alive (and we should have a different DMFile). + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); + ASSERT_TRUE(stable_page_ids.count(stable_files[0]->fileId())); + ASSERT_FALSE(stable_page_ids.count(replaced_stable_id)); + } + + // Now let's replace data again. Everything in the current stable will be discarded. + { + auto replace_block = prepareWriteBlock(/* from */ 0, /* to */ replace_to_rows); + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, replace_block); + } + ASSERT_EQ(replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + { + storage_pool->data_storage_v3->gc(/* not_skip */ true); + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); + // The stable before replaceData should be not alive anymore. + ASSERT_FALSE(stable_page_ids.count(stable_files[0]->fileId())); + } +} +CATCH + +TEST_P(SegmentReplaceDataTest, WriteAfterReplace) +try +{ + if (replace_to_rows == 0) + { + return; + } + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 100); + ASSERT_EQ(100, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + { + auto replace_block = prepareWriteBlock(/* from */ 0, /* to */ replace_to_rows); + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, replace_block); + } + ASSERT_EQ(replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 47, /* at */ replace_to_rows - 10); // 10 rows will be overlapped + ASSERT_EQ(37 + replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_EQ(37 + replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(47 + replace_to_rows, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); +} +CATCH + + +/** + * This test verify that, the DMFile will never be marked as GCable, during different segment operations. + * Otherwise, the DMFile will be unsafe to be used in another replaceData. + */ +TEST_F(SegmentReplaceDataBasicTest, DMFileGCIsUnchanged) +try +{ + WriteBatches ingest_wbs(dm_context->storage_pool, dm_context->getWriteLimiter()); + + auto delegator = storage_path_pool->getStableDiskDelegator(); + auto file_id = storage_pool->newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); + auto input_stream = std::make_shared(Block{}); + auto dm_file = writeIntoNewDMFile( + *dm_context, + table_columns, + input_stream, + file_id, + delegator.choosePath(), + DMFileBlockOutputStream::Flags{}); + + ingest_wbs.data.putExternal(file_id, /* tag */ 0); + ingest_wbs.writeLogAndData(); + delegator.addDTFile(file_id, dm_file->getBytesOnDisk(), dm_file->parentPath()); + + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, dm_file); + ASSERT_EQ(0, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + + ingest_wbs.rollbackWrittenLogAndData(); + + // Note: we have not yet enabled GC for the dmfile here. + ASSERT_FALSE(dm_file->canGC()); + { + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_TRUE(stable_page_ids.count(dm_file->fileId())); + } + + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 47); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + // Even when the stable is replaced, the DMFile should not be marked as GCable. + ASSERT_FALSE(dm_file->canGC()); + { + storage_pool->data_storage_v3->gc(/* not_skip */ true); + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); + ASSERT_FALSE(stable_page_ids.count(dm_file->fileId())); + } + + // TODO: May be check split and merge as well. + + dm_file->enableGC(); +} +CATCH + + +TEST_P(SegmentReplaceDataTest, MultipleSegmentsSharingDMFile) +try +{ + std::optional seg_right_id; + Block block{}; + + if (replace_to_rows == 0) + { + seg_right_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 0); + // block is empty, split point doesn't matter. + } + else + { + seg_right_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, replace_to_rows - 10); /* right seg should contain 10 rows after replacing data */ + block = prepareWriteBlock(0, replace_to_rows); + } + + ASSERT_TRUE(seg_right_id.has_value()); + replaceSegmentData({*seg_right_id, DELTA_MERGE_FIRST_SEGMENT_ID}, block); + ASSERT_TRUE(areSegmentsSharingStable({*seg_right_id, DELTA_MERGE_FIRST_SEGMENT_ID})); + + UInt64 expected_left_rows, expected_right_rows; + if (replace_to_rows == 0) + { + expected_left_rows = 0; + expected_right_rows = 0; + } + else + { + expected_left_rows = replace_to_rows - 10; + expected_right_rows = 10; + } + ASSERT_EQ(expected_left_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(expected_right_rows, getSegmentRowNum(*seg_right_id)); + + // Now let's write something and perform merge delta for the right seg + writeSegment(*seg_right_id, 151); + expected_right_rows += 151; + ASSERT_EQ(expected_right_rows, getSegmentRowNumWithoutMVCC(*seg_right_id)); + flushSegmentCache(*seg_right_id); + mergeSegmentDelta(*seg_right_id); + ASSERT_EQ(expected_right_rows, getSegmentRowNumWithoutMVCC(*seg_right_id)); + // Left is not affected + ASSERT_EQ(expected_left_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_FALSE(areSegmentsSharingStable({*seg_right_id, DELTA_MERGE_FIRST_SEGMENT_ID})); + + ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); + storage_pool->data_storage_v3->gc(/* not_skip */ true); + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(2, stable_page_ids.size()); + + mergeSegment({DELTA_MERGE_FIRST_SEGMENT_ID, *seg_right_id}); + storage_pool->data_storage_v3->gc(/* not_skip */ true); + stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); +} +CATCH + + +TEST_F(SegmentReplaceDataBasicTest, ReplaceMultipleTimes) +try +{ + for (size_t i = 0; i < 20; ++i) + { + auto rows = std::uniform_int_distribution<>(1, 100)(random); + auto block = prepareWriteBlock(0, rows); + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, block); + ASSERT_EQ(rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + + // Write some rows doesn't affect our next replaceData + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + } + + ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); + storage_pool->data_storage_v3->gc(/* not_skip */ true); + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); +} +CATCH + + +TEST_P(SegmentReplaceDataTest, ReplaceSameDMFileMultipleTimes) +try +{ + auto block = prepareWriteBlock(0, replace_to_rows); + + WriteBatches ingest_wbs(dm_context->storage_pool, dm_context->getWriteLimiter()); + + auto delegator = storage_path_pool->getStableDiskDelegator(); + auto file_id = storage_pool->newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); + auto input_stream = std::make_shared(block); + auto dm_file = writeIntoNewDMFile( + *dm_context, + table_columns, + input_stream, + file_id, + delegator.choosePath(), + DMFileBlockOutputStream::Flags{}); + + ingest_wbs.data.putExternal(file_id, /* tag */ 0); + ingest_wbs.writeLogAndData(); + delegator.addDTFile(file_id, dm_file->getBytesOnDisk(), dm_file->parentPath()); + + for (size_t i = 0; i < 20; ++i) + { + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, block); + ASSERT_EQ(replace_to_rows, getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID)); + // Write some rows doesn't affect our next replaceData + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + } + + dm_file->enableGC(); + ingest_wbs.rollbackWrittenLogAndData(); + + ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); + storage_pool->data_storage_v3->gc(/* not_skip */ true); + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); +} +CATCH + + +/** + * The out of bound data introduced by replaceData should not be seen after the merge. + */ +TEST_F(SegmentReplaceDataBasicTest, ReplaceOutOfBoundAndMerge) +try +{ + auto seg_right_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 100, Segment::SplitMode::Physical); + ASSERT_TRUE(seg_right_id.has_value()); + + writeSegment(*seg_right_id, 10); + ASSERT_EQ(0, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(10, getSegmentRowNumWithoutMVCC(*seg_right_id)); + + auto block = prepareWriteBlock(0, 300); + // Only replace this block to the left seg, whose range is [-∞, 100). + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, block); + ASSERT_EQ(100, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(10, getSegmentRowNumWithoutMVCC(*seg_right_id)); + + mergeSegment({DELTA_MERGE_FIRST_SEGMENT_ID, *seg_right_id}); + ASSERT_EQ(110, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + + ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); + storage_pool->log_storage_v3->gc(/* not_skip */ true); + storage_pool->data_storage_v3->gc(/* not_skip */ true); + ASSERT_EQ(storage_pool->log_storage_v3->getNumberOfPages(), 0); + ASSERT_EQ(storage_pool->data_storage_v3->getNumberOfPages(), 1); + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); +} +CATCH + + +TEST_F(SegmentReplaceDataBasicTest, ReleaseExistingSharedDMFile) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 500, /* at */ 0); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + + // Use logical split to create two segments sharing the same stable. + auto seg_right_id = splitSegmentAt(DELTA_MERGE_FIRST_SEGMENT_ID, 100, Segment::SplitMode::Logical); + ASSERT_TRUE(seg_right_id.has_value()); + ASSERT_TRUE(areSegmentsSharingStable({DELTA_MERGE_FIRST_SEGMENT_ID, *seg_right_id})); + + ASSERT_EQ(100, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + ASSERT_EQ(400, getSegmentRowNumWithoutMVCC(*seg_right_id)); + + auto shared_dm_files = segments[*seg_right_id]->getStable()->getDMFiles(); + + // As stable is shared in logical split, we should only have 1 alive external file. + ASSERT_TRUE(storage_pool->log_storage_v3 != nullptr); + storage_pool->data_storage_v3->gc(/* not_skip */ true); + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + + // Now let's replace one segment. + auto block = prepareWriteBlock(0, 300); + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, block); + + ASSERT_EQ(100, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); // We should only see [0, 100) + ASSERT_EQ(400, getSegmentRowNumWithoutMVCC(*seg_right_id)); + + // The previously-shared stable should be still valid. + storage_pool->data_storage_v3->gc(/* not_skip */ true); + stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(2, stable_page_ids.size()); + ASSERT_TRUE(stable_page_ids.count(shared_dm_files[0]->fileId())); +} +CATCH + + +TEST_F(SegmentReplaceDataBasicTest, ReadSnapshotBeforeReplace) +try +{ + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 400); // 400 in stable + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, 41); // 41 in memtable + + auto segment = segments[DELTA_MERGE_FIRST_SEGMENT_ID]; + auto in_stream = segment->getInputStreamRaw(*dm_context, *tableColumns()); + + // Now let's replace data. + auto block = prepareWriteBlock(0, 233); + replaceSegmentData({DELTA_MERGE_FIRST_SEGMENT_ID}, block); + + // There is a snapshot alive, so we should have 2 stables. + storage_pool->data_storage_v3->gc(/* not_skip */ true); + auto stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(2, stable_page_ids.size()); + + // Continue the read + auto n_rows = DB::tests::getInputStreamNRows(in_stream); + ASSERT_EQ(441, n_rows); + + ASSERT_EQ(233, getSegmentRowNumWithoutMVCC(DELTA_MERGE_FIRST_SEGMENT_ID)); + + // Snapshot is dropped. + in_stream = {}; + storage_pool->data_storage_v3->gc(/* not_skip */ true); + stable_page_ids = storage_pool->data_storage_v3->getAliveExternalPageIds(NAMESPACE_ID); + ASSERT_EQ(1, stable_page_ids.size()); +} +CATCH + + +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp index 084a88de0f7..0860059af1d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -62,6 +62,7 @@ void SegmentTestBasic::reloadWithOptions(SegmentTestOptions config) size_t SegmentTestBasic::getSegmentRowNumWithoutMVCC(PageId segment_id) { + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); auto segment = segments[segment_id]; auto in = segment->getInputStreamRaw(*dm_context, *tableColumns()); return getInputStreamNRows(in); @@ -69,6 +70,7 @@ size_t SegmentTestBasic::getSegmentRowNumWithoutMVCC(PageId segment_id) size_t SegmentTestBasic::getSegmentRowNum(PageId segment_id) { + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); auto segment = segments[segment_id]; auto in = segment->getInputStream(*dm_context, *tableColumns(), {segment->getRowKeyRange()}); return getInputStreamNRows(in); @@ -78,6 +80,7 @@ std::optional SegmentTestBasic::splitSegment(PageId segment_id, Segment: { LOG_FMT_INFO(logger_op, "splitSegment, segment_id={} split_mode={}", segment_id, magic_enum::enum_name(split_mode)); + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); auto origin_segment = segments[segment_id]; size_t origin_segment_row_num = getSegmentRowNum(segment_id); @@ -124,6 +127,7 @@ std::optional SegmentTestBasic::splitSegmentAt(PageId segment_id, Int64 split_at_key = RowKeyValue::fromHandle(split_at); } + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); auto origin_segment = segments[segment_id]; size_t origin_segment_row_num = getSegmentRowNum(segment_id); @@ -167,9 +171,8 @@ void SegmentTestBasic::mergeSegment(const std::vector & segments_id, boo for (const auto segment_id : segments_id) { - auto it = segments.find(segment_id); - RUNTIME_CHECK(it != segments.end(), segment_id); - segments_to_merge.emplace_back(it->second); + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); + segments_to_merge.emplace_back(segments[segment_id]); auto rows = getSegmentRowNum(segment_id); segments_rows.emplace_back(rows); @@ -203,6 +206,7 @@ void SegmentTestBasic::mergeSegmentDelta(PageId segment_id, bool check_rows) { LOG_FMT_INFO(logger_op, "mergeSegmentDelta, segment_id={}", segment_id); + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNum(segment_id); SegmentPtr merged_segment = segment->mergeDelta(*dm_context, tableColumns()); @@ -218,6 +222,7 @@ void SegmentTestBasic::flushSegmentCache(PageId segment_id) { LOG_FMT_INFO(logger_op, "flushSegmentCache, segment_id={}", segment_id); + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNum(segment_id); segment->flushCache(*dm_context); @@ -227,9 +232,8 @@ void SegmentTestBasic::flushSegmentCache(PageId segment_id) std::pair SegmentTestBasic::getSegmentKeyRange(PageId segment_id) { - auto segment_it = segments.find(segment_id); - EXPECT_TRUE(segment_it != segments.end()); - const auto & segment = segment_it->second; + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); + const auto & segment = segments[segment_id]; Int64 start_key, end_key; if (!options.is_common_handle) @@ -263,80 +267,125 @@ std::pair SegmentTestBasic::getSegmentKeyRange(PageId segment_id) return {start_key, end_key}; } -void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows, std::optional begin_key) +Block SegmentTestBasic::prepareWriteBlock(Int64 start_key, Int64 end_key, bool is_deleted) { - LOG_FMT_INFO(logger_op, "writeSegment, segment_id={} rows={}", segment_id, write_rows); + RUNTIME_CHECK(start_key <= end_key); + if (end_key == start_key) + return Block{}; + version++; + return DMTestEnv::prepareSimpleWriteBlock( + start_key, // + end_key, + false, + version, + DMTestEnv::pk_name, + EXTRA_HANDLE_COLUMN_ID, + options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, + options.is_common_handle, + 1, + true, + is_deleted); +} - if (write_rows == 0) - return; +std::vector SegmentTestBasic::prepareWriteBlocksInSegmentRange(PageId segment_id, UInt64 total_write_rows, std::optional write_start_key, bool is_deleted) +{ + RUNTIME_CHECK(total_write_rows < std::numeric_limits::max()); - RUNTIME_CHECK(write_rows > 0); - RUNTIME_CHECK(write_rows < std::numeric_limits::max()); + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); + auto [segment_start_key, segment_end_key] = getSegmentKeyRange(segment_id); + auto segment_max_rows = static_cast(segment_end_key - segment_start_key); - auto segment = segments[segment_id]; - size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); - auto [start_key, end_key] = getSegmentKeyRange(segment_id); + if (segment_max_rows == 0) + return {}; - LOG_FMT_DEBUG(logger, "write to segment, segment={} segment_rows={} start_key={} end_key={}", segment->info(), segment_row_num, start_key, end_key); + if (write_start_key.has_value()) + { + // When write start key is specified, the caller must know exactly the segment range. + RUNTIME_CHECK(*write_start_key >= segment_start_key); + RUNTIME_CHECK(static_cast(segment_end_key - *write_start_key) > 0); + } - auto segment_max_rows = static_cast(end_key - start_key); - if (segment_max_rows == 0) - return; - // If the length of segment key range is larger than `write_rows`, then - // write the new data with the same tso in one block. - // Otherwise create multiple block with increasing tso until the `remain_row_num` - // down to 0. - UInt64 remain_row_num = 0; - if (segment_max_rows > write_rows) + if (!write_start_key.has_value()) { - if (begin_key.has_value()) + // When write start key is unspecified, we will: + // A. If the segment is large enough, we randomly pick a write start key in the range. + // B. If the segment is small, we write from the beginning. + if (segment_max_rows > total_write_rows) { - RUNTIME_CHECK(begin_key >= start_key, *begin_key, start_key); - RUNTIME_CHECK(begin_key < end_key, *begin_key, end_key); - start_key = *begin_key; + write_start_key = std::uniform_int_distribution{segment_start_key, segment_end_key - static_cast(total_write_rows)}(random); } else { - // The segment range is large enough, let's randomly pick a start key: - // Suppose we have segment range = [0, 11), which could contain at most 11 rows. - // Now we want to write 10 rows -- The write start key could be randomized in [0, 1]. - start_key = std::uniform_int_distribution{start_key, end_key - static_cast(write_rows)}(random); + write_start_key = segment_start_key; } - end_key = start_key + write_rows; - } - else - { - remain_row_num = write_rows - segment_max_rows; - RUNTIME_CHECK(!begin_key.has_value()); // Currently we don't support specifying start key when segment is small } + + auto max_write_rows_each_round = static_cast(segment_end_key - *write_start_key); + RUNTIME_CHECK(max_write_rows_each_round > 0); + RUNTIME_CHECK(*write_start_key >= segment_start_key); + + std::vector blocks; + + // If the length of segment key range is larger than `write_rows`, then + // write the new data with the same tso in one block. + // Otherwise create multiple block with increasing tso until the `remain_row_num` + // down to 0. + UInt64 remaining_rows = total_write_rows; + while (remaining_rows > 0) { - // write to segment and not flush - LOG_FMT_DEBUG(logger, "write block to segment, block_range=[{}, {})", start_key, end_key); - Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); - segment->write(*dm_context, std::move(block), false); - version++; + UInt64 write_rows_this_round = std::min(remaining_rows, max_write_rows_each_round); + RUNTIME_CHECK(write_rows_this_round > 0); + Int64 write_end_key_this_round = *write_start_key + static_cast(write_rows_this_round); + RUNTIME_CHECK(write_end_key_this_round <= segment_end_key); + + Block block = prepareWriteBlock(*write_start_key, write_end_key_this_round, is_deleted); + blocks.emplace_back(block); + remaining_rows -= write_rows_this_round; + + LOG_FMT_DEBUG(logger, "Prepared block for write, block_range=[{}, {}) (rows={}), total_rows_to_write={} remain_rows={}", // + *write_start_key, + write_end_key_this_round, + write_rows_this_round, + total_write_rows, + remaining_rows); } - while (remain_row_num > 0) + + return blocks; +} + +void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows, std::optional start_at) +{ + LOG_FMT_INFO(logger_op, "writeSegment, segment_id={} write_rows={}", segment_id, write_rows); + + if (write_rows == 0) + return; + + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); + auto [start_key, end_key] = getSegmentKeyRange(segment_id); + LOG_FMT_DEBUG(logger, "write to segment, segment={} segment_rows={} start_key={} end_key={}", segment->info(), segment_row_num, start_key, end_key); + + auto blocks = prepareWriteBlocksInSegmentRange(segment_id, write_rows, start_at, /* is_deleted */ false); + for (const auto & block : blocks) { - UInt64 write_num = std::min(remain_row_num, segment_max_rows); - LOG_FMT_DEBUG(logger, "write block to segment, block_range=[{}, {})", start_key, write_num + start_key); - Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); - segment->write(*dm_context, std::move(block), false); - remain_row_num -= write_num; - version++; + segment->write(*dm_context, block, false); } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); operation_statistics["write"]++; } -void SegmentTestBasic::ingestDTFileIntoSegment(PageId segment_id, UInt64 write_rows) +void SegmentTestBasic::ingestDTFileIntoSegment(PageId segment_id, UInt64 write_rows, std::optional start_at) { - LOG_FMT_INFO(logger_op, "ingestDTFileIntoSegment, segment_id={} rows={}", segment_id, write_rows); + LOG_FMT_INFO(logger_op, "ingestDTFileIntoSegment, segment_id={} write_rows={}", segment_id, write_rows); if (write_rows == 0) return; - auto write_data = [&](SegmentPtr segment, const Block & block) { + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); + + auto ingest_data = [&](SegmentPtr segment, const Block & block) { WriteBatches ingest_wbs(dm_context->storage_pool, dm_context->getWriteLimiter()); auto delegator = storage_path_pool->getStableDiskDelegator(); auto parent_path = delegator.choosePath(); @@ -370,85 +419,37 @@ void SegmentTestBasic::ingestDTFileIntoSegment(PageId segment_id, UInt64 write_r auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); auto [start_key, end_key] = getSegmentKeyRange(segment_id); + LOG_FMT_DEBUG(logger, "ingest to segment, segment={} segment_rows={} start_key={} end_key={}", segment->info(), segment_row_num, start_key, end_key); - auto segment_max_rows = static_cast(end_key - start_key); - if (segment_max_rows == 0) - return; - // If the length of segment key range is larger than `write_rows`, then - // write the new data with the same tso in one block. - // Otherwise create multiple block with increasing tso until the `remain_row_num` - // down to 0. - UInt64 remain_row_num = 0; - if (segment_max_rows > write_rows) - { - start_key = std::uniform_int_distribution{start_key, end_key - static_cast(write_rows)}(random); - end_key = start_key + write_rows; - } - else + auto blocks = prepareWriteBlocksInSegmentRange(segment_id, write_rows, start_at, /* is_deleted */ false); + for (const auto & block : blocks) { - remain_row_num = write_rows - segment_max_rows; - } - { - // write to segment and not flush - LOG_FMT_DEBUG(logger, "ingest block to segment, block_range=[{}, {})", start_key, end_key); - Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); - write_data(segment, block); - version++; - } - while (remain_row_num > 0) - { - UInt64 write_num = std::min(remain_row_num, segment_max_rows); - LOG_FMT_DEBUG(logger, "ingest block to segment, block_range=[{}, {})", start_key, write_num + start_key); - Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); - write_data(segment, block); - remain_row_num -= write_num; - version++; + ingest_data(segment, block); } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); operation_statistics["ingest"]++; } -void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id, UInt64 write_rows) +void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id, UInt64 write_rows, std::optional start_at) { - LOG_FMT_INFO(logger_op, "writeSegmentWithDeletedPack, segment_id={}", segment_id); + LOG_FMT_INFO(logger_op, "writeSegmentWithDeletedPack, segment_id={} write_rows={}", segment_id, write_rows); + if (write_rows == 0) + return; + + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); auto segment = segments[segment_id]; size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); auto [start_key, end_key] = getSegmentKeyRange(segment_id); + LOG_FMT_DEBUG(logger, "write deleted pack to segment, segment={} segment_rows={} start_key={} end_key={}", segment->info(), segment_row_num, start_key, end_key); - auto segment_max_rows = static_cast(end_key - start_key); - if (segment_max_rows == 0) - return; - // If the length of segment key range is larger than `write_rows`, then - // write the new data with the same tso in one block. - // Otherwise create multiple block with increasing tso until the `remain_row_num` - // down to 0. - UInt64 remain_row_num = 0; - if (segment_max_rows > write_rows) - { - start_key = std::uniform_int_distribution{start_key, end_key - static_cast(write_rows)}(random); - end_key = start_key + write_rows; - } - else + auto blocks = prepareWriteBlocksInSegmentRange(segment_id, write_rows, start_at, /* is_deleted */ true); + for (const auto & block : blocks) { - remain_row_num = write_rows - segment_max_rows; - } - { - // write to segment and not flush - LOG_FMT_DEBUG(logger, "write block to segment, block_range=[{}, {})", start_key, end_key); - Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); - segment->write(*dm_context, std::move(block), true); - version++; - } - while (remain_row_num > 0) - { - UInt64 write_num = std::min(remain_row_num, segment_max_rows); - LOG_FMT_DEBUG(logger, "write block to segment, block_range=[{}, {})", start_key, write_num + start_key); - Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); - segment->write(*dm_context, std::move(block), true); - remain_row_num -= write_num; - version++; + segment->write(*dm_context, block, false); } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); operation_statistics["writeDelete"]++; } @@ -457,15 +458,63 @@ void SegmentTestBasic::deleteRangeSegment(PageId segment_id) { LOG_FMT_INFO(logger_op, "deleteRangeSegment, segment_id={}", segment_id); + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); auto segment = segments[segment_id]; segment->write(*dm_context, /*delete_range*/ segment->getRowKeyRange()); EXPECT_EQ(getSegmentRowNum(segment_id), 0); - operation_statistics["deleteRange"]++; +} + +void SegmentTestBasic::replaceSegmentData(const std::vector & segments_id, const Block & block) +{ + LOG_FMT_DEBUG(logger, "replace segment data using block, segments_id={} block_rows={}", fmt::join(segments_id, ","), block.rows()); + + auto delegator = storage_path_pool->getStableDiskDelegator(); + auto parent_path = delegator.choosePath(); + auto file_provider = db_context->getFileProvider(); + + WriteBatches ingest_wbs(dm_context->storage_pool, dm_context->getWriteLimiter()); + + auto file_id = storage_pool->newDataPageIdForDTFile(delegator, __PRETTY_FUNCTION__); + auto input_stream = std::make_shared(block); + auto dm_file = writeIntoNewDMFile( + *dm_context, + table_columns, + input_stream, + file_id, + parent_path, + {}); + + ingest_wbs.data.putExternal(file_id, /* tag */ 0); + ingest_wbs.writeLogAndData(); + delegator.addDTFile(file_id, dm_file->getBytesOnDisk(), parent_path); + + replaceSegmentData(segments_id, dm_file); + + dm_file->enableGC(); + ingest_wbs.rollbackWrittenLogAndData(); +} + +void SegmentTestBasic::replaceSegmentData(const std::vector & segments_id, const DMFilePtr & file) +{ + LOG_FMT_INFO(logger_op, "replaceSegmentData, segments_id={} file_rows={} file={}", fmt::join(segments_id, ","), file->getRows(), file->path()); + + for (const auto segment_id : segments_id) + { + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); + auto segment = segments[segment_id]; + auto new_segment = segment->dangerouslyReplaceDataForTest(*dm_context, file); + ASSERT_TRUE(new_segment != nullptr); + segments[new_segment->segmentId()] = new_segment; + } + operation_statistics["replaceData"]++; } bool SegmentTestBasic::areSegmentsSharingStable(const std::vector & segments_id) { RUNTIME_CHECK(segments_id.size() >= 2); + for (auto segment_id : segments_id) + RUNTIME_CHECK(segments.find(segment_id) != segments.end()); + auto base_stable = segments[segments_id[0]]->getStable()->getDMFilesString(); for (size_t i = 1; i < segments_id.size(); i++) { @@ -492,7 +541,7 @@ SegmentPtr SegmentTestBasic::reload(bool is_common_handle, const ColumnDefinesPt { TiFlashStorageTestBasic::reload(std::move(db_settings)); storage_path_pool = std::make_unique(db_context->getPathPool().withTable("test", "t1", false)); - storage_pool = std::make_unique(*db_context, /*ns_id*/ 100, *storage_path_pool, "test.t1"); + storage_pool = std::make_unique(*db_context, NAMESPACE_ID, *storage_path_pool, "test.t1"); storage_pool->restore(); ColumnDefinesPtr cols = (!pre_define_columns) ? DMTestEnv::getDefaultColumns(is_common_handle ? DMTestEnv::PkType::CommonHandle : DMTestEnv::PkType::HiddenTiDBRowID) : pre_define_columns; setColumns(cols); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h index 0f79e1e6985..51d7605684d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h @@ -62,11 +62,20 @@ class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic * When begin_key is specified, new rows will be written from specified key. Otherwise, new rows may be * written randomly in the segment range. */ - void writeSegment(PageId segment_id, UInt64 write_rows = 100, std::optional begin_key = std::nullopt); - void ingestDTFileIntoSegment(PageId segment_id, UInt64 write_rows = 100); - void writeSegmentWithDeletedPack(PageId segment_id, UInt64 write_rows = 100); + void writeSegment(PageId segment_id, UInt64 write_rows = 100, std::optional start_at = std::nullopt); + void ingestDTFileIntoSegment(PageId segment_id, UInt64 write_rows = 100, std::optional start_at = std::nullopt); + void writeSegmentWithDeletedPack(PageId segment_id, UInt64 write_rows = 100, std::optional start_at = std::nullopt); void deleteRangeSegment(PageId segment_id); + /** + * This function does not check rows. + */ + void replaceSegmentData(const std::vector & segments_id, const DMFilePtr & file); + void replaceSegmentData(const std::vector & segments_id, const Block & block); + + Block prepareWriteBlock(Int64 start_key, Int64 end_key, bool is_deleted = false); + std::vector prepareWriteBlocksInSegmentRange(PageId segment_id, UInt64 total_write_rows, std::optional write_start_key = std::nullopt, bool is_deleted = false); + size_t getSegmentRowNumWithoutMVCC(PageId segment_id); size_t getSegmentRowNum(PageId segment_id); @@ -75,7 +84,7 @@ class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic /** * You must pass at least 2 segments. Checks whether all segments passed in are sharing the same stable. */ - bool areSegmentsSharingStable(const std::vector & segments_id); + [[nodiscard]] bool areSegmentsSharingStable(const std::vector & segments_id); std::pair getSegmentKeyRange(PageId segment_id); @@ -104,6 +113,8 @@ class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic void reloadDMContext(); protected: + inline static constexpr PageId NAMESPACE_ID = 100; + /// all these var lives as ref in dm_context std::unique_ptr storage_path_pool; std::unique_ptr storage_pool; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_randomized.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_randomized.cpp index 4b4cf19bf82..90826480422 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_randomized.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_randomized.cpp @@ -71,9 +71,10 @@ class SegmentRandomizedTest : public SegmentTestBasic {0.1, &SegmentRandomizedTest::deleteRangeRandomSegment}, {1.0, &SegmentRandomizedTest::splitRandomSegment}, {1.0, &SegmentRandomizedTest::splitAtRandomSegment}, - {0.25, &SegmentRandomizedTest::mergeRandomSegment}, + {0.25, &SegmentRandomizedTest::mergeRandomSegments}, {1.0, &SegmentRandomizedTest::mergeDeltaRandomSegment}, {1.0, &SegmentRandomizedTest::flushCacheRandomSegment}, + {0.5, &SegmentRandomizedTest::replaceRandomSegmentsData}, {0.25, &SegmentRandomizedTest::writeRandomSegmentWithDeletedPack}}; /** @@ -139,7 +140,7 @@ class SegmentRandomizedTest : public SegmentTestBasic splitSegmentAt(segment_id, split_at, split_mode); } - void mergeRandomSegment() + void mergeRandomSegments() { if (segments.size() < 2) return; @@ -166,6 +167,68 @@ class SegmentRandomizedTest : public SegmentTestBasic flushSegmentCache(random_segment_id); } + void replaceRandomSegmentsData() + { + if (segments.empty()) + return; + + auto segments_to_pick = std::uniform_int_distribution{1, 5}(random); + std::vector segments_list; + std::map expected_data_each_segment; + for (size_t i = 0; i < segments_to_pick; ++i) + { + auto id = getRandomSegmentId(); // allow duplicate + segments_list.emplace_back(id); + expected_data_each_segment[id] = 0; + } + + auto [min_key, max_key] = getSegmentKeyRange(segments_list[0]); + for (size_t i = 1; i < segments_to_pick; ++i) + { + auto [new_min_key, new_max_key] = getSegmentKeyRange(segments_list[i]); + if (new_min_key < min_key) + min_key = new_min_key; + if (new_max_key > max_key) + max_key = new_max_key; + } + + Block block{}; + if (max_key > min_key) + { + // Now let's generate some data. + std::vector n_rows_collection{0, 10, 50, 1000}; + auto block_rows = n_rows_collection[std::uniform_int_distribution{0, n_rows_collection.size() - 1}(random)]; + if (block_rows > 0) + { + auto block_start_key = std::uniform_int_distribution{min_key, max_key - 1}(random); + auto block_end_key = block_start_key + static_cast(block_rows); + block = prepareWriteBlock(block_start_key, block_end_key); + + // How many data will we have for each segment after replacing data? It should be BlockRange ∩ SegmentRange. + for (auto segment_id : segments_list) + { + auto [seg_min_key, seg_max_key] = getSegmentKeyRange(segment_id); + auto intersect_min = std::max(seg_min_key, block_start_key); + auto intersect_max = std::min(seg_max_key, block_end_key); + if (intersect_min <= intersect_max) + { + // There is an intersection + expected_data_each_segment[segment_id] = static_cast(intersect_max - intersect_min); + } + } + } + } + + LOG_FMT_DEBUG(logger, "start random replace segment data, segments_id={} block_rows={} all_segments={}", fmt::join(segments_list, ","), block.rows(), segments.size()); + replaceSegmentData({segments_list}, block); + + // Verify rows. + for (auto segment_id : segments_list) + { + EXPECT_EQ(getSegmentRowNum(segment_id), expected_data_each_segment[segment_id]); + } + } + Segment::SplitMode getRandomSplitMode() { int mode = std::uniform_int_distribution{1, 2}(random);