From da3233ab9113b65373be0ffc6aa26309b6cc327a Mon Sep 17 00:00:00 2001 From: reindexer-bot <@> Date: Sat, 30 Dec 2023 22:37:03 +0000 Subject: [PATCH] Merge branch '1576_1570_subquery_explain' into 'develop' Subquery explain - [#1576] [#1570] See merge request itv-backend/reindexer!1485 --- changelog.md | 8 +- .../cmd/reindexer_server/contrib/Dockerfile | 2 +- .../test/test_storage_compatibility.sh | 195 ++++++++++++++++++ cpp_src/core/nsselecter/explaincalc.cc | 10 + cpp_src/core/nsselecter/explaincalc.h | 38 +++- cpp_src/core/nsselecter/nsselecter.cc | 1 + cpp_src/core/nsselecter/nsselecter.h | 1 + cpp_src/core/query/query.cc | 4 +- cpp_src/core/queryresults/queryresults.h | 5 +- cpp_src/core/reindexer_impl/rx_selector.cc | 51 +++-- cpp_src/core/reindexer_impl/rx_selector.h | 9 +- .../gtests/bench/fixtures/api_tv_simple.cc | 55 ++++- cpp_src/gtests/bench/fixtures/api_tv_simple.h | 7 +- .../gtests/tests/fixtures/queries_verifier.h | 9 + .../gtests/tests/unit/string_function_test.cc | 8 + cpp_src/readme.md | 15 +- cpp_src/server/contrib/server.md | 13 ++ cpp_src/server/contrib/server.yml | 24 +++ cpp_src/server/httpserver.cc | 1 + cpp_src/vendor/prometheus/family.h | 4 +- cpp_src/vendor/prometheus/impl/check_names.cc | 10 +- iterator.go | 15 +- query.go | 12 +- test/join_test.go | 19 ++ test/queries_test.go | 184 ++++++++++++++++- test/reindexer_bench_test.go | 25 +++ 26 files changed, 669 insertions(+), 56 deletions(-) create mode 100755 cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh diff --git a/changelog.md b/changelog.md index 9147eb029..daa705c8c 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,6 @@ # Version 3.21.0 (15.12.2023) ## Core -- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implemented in the next releases) +- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implement in the next releases) - [fea] Added backtraces/minidump support for Windows platform - [fea] Added query crash tracker support for Windows platform - [fix] Added explicit error for aggregations in joined queries @@ -16,8 +16,8 @@ ## Go connector - [fea] Added Go API and DSL-convertor for subqueries -- [fea] Changed CJSON-to-object conversion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field -- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage at runtime +- [fea] Changed CJSON-to-object convetrion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field +- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage in runtime - [fix] Fixed panic handling in the CJSON deserialization - [fix] Fixed logging in `cproto`-binding. Error messages will no longer be redirected to stdout instead of user's logger @@ -25,7 +25,7 @@ - [fea] Saved the scroll position on the sorting - [fea] Changed the Server ID range - [fea] Improved the notification about the supported browsers -- [fea] Added the default values to the config form when the default config is used +- [fea] Added the default values to the config form when the default config is using - [fix] Fixed the wrong redirect to a fake database - [fix] Fixed the column order changing on the data sorting - [fix] Fixed the horizontal scroll on the data sorting diff --git a/cpp_src/cmd/reindexer_server/contrib/Dockerfile b/cpp_src/cmd/reindexer_server/contrib/Dockerfile index 8eb80a77c..94681c300 100644 --- a/cpp_src/cmd/reindexer_server/contrib/Dockerfile +++ b/cpp_src/cmd/reindexer_server/contrib/Dockerfile @@ -3,7 +3,7 @@ FROM alpine:3.14 AS build RUN cd /tmp && apk update && \ apk add git curl autoconf automake libtool linux-headers g++ make libunwind-dev grpc-dev grpc protobuf-dev c-ares-dev && \ git clone https://github.com/gperftools/gperftools.git && \ - cd gperftools && \ + cd gperftools && git checkout gperftools-2.13 && \ echo "noinst_PROGRAMS =" >> Makefile.am && \ sed -i s/_sigev_un\._tid/sigev_notify_thread_id/ src/profile-handler.cc && \ ./autogen.sh && ./configure --disable-dependency-tracking && make -j8 && make install diff --git a/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh new file mode 100755 index 000000000..d189d3841 --- /dev/null +++ b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh @@ -0,0 +1,195 @@ +#!/bin/bash +# Task: https://github.com/restream/reindexer/-/issues/1188 +set -e + +function KillAndRemoveServer { + local pid=$1 + kill $pid + wait $pid + yum remove -y 'reindexer*' > /dev/null +} + +function WaitForDB { + # wait until DB is loaded + set +e # disable "exit on error" so the script won't stop when DB's not loaded yet + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + while [[ $is_connected != "test" ]] + do + sleep 2 + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + done + set -e +} + +function CompareNamespacesLists { + local ns_list_actual=$1 + local ns_list_expected=$2 + local pid=$3 + + diff=$(echo ${ns_list_actual[@]} ${ns_list_expected[@]} | tr ' ' '\n' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: namespaces list not changed" + else + echo "##### FAIL: namespaces list was changed" + echo "expected: $ns_list_expected" + echo "actual: $ns_list_actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + +function CompareMemstats { + local actual=$1 + local expected=$2 + local pid=$3 + diff=$(echo ${actual[@]} ${expected[@]} | tr ' ' '\n' | sed 's/\(.*\),$/\1/' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: memstats not changed" + else + echo "##### FAIL: memstats was changed" + echo "expected: $expected" + echo "actual: $actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + + +RX_SERVER_CURRENT_VERSION_RPM="$(basename build/reindexer-*server*.rpm)" +VERSION_FROM_RPM=$(echo "$RX_SERVER_CURRENT_VERSION_RPM" | grep -o '.*server-..') +VERSION=$(echo ${VERSION_FROM_RPM: -2:1}) # one-digit version + +echo "## choose latest release rpm file" +if [ $VERSION == 3 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 3) + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +elif [ $VERSION == 4 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 4) + # replicationstats ns added for v4 + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\n#replicationstats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +else + echo "Unknown version" + exit 1 +fi + +echo "## downloading latest release rpm file: $LATEST_RELEASE" +curl "http://repo.itv.restr.im/itv-api-ng/7/x86_64/$LATEST_RELEASE" --output $LATEST_RELEASE; +echo "## downloading example DB" +curl "https://git.restream.ru/MaksimKravchuk/reindexer_testdata/-/raw/master/big.zip" --output big.zip; +unzip -o big.zip # unzips into mydb_big.rxdump; + +ADDRESS="cproto://127.0.0.1:6534/" +DB_NAME="test" + +memstats_expected=$'[ +{"replication":{"data_hash":24651210926,"data_count":3}}, +{"replication":{"data_hash":6252344969,"data_count":1}}, +{"replication":{"data_hash":37734732881,"data_count":28}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":1024095024522,"data_count":1145}}, +{"replication":{"data_hash":8373644068,"data_count":1315}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":7404222244,"data_count":97}}, +{"replication":{"data_hash":94132837196,"data_count":4}}, +{"replication":{"data_hash":1896088071,"data_count":2}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":-672103903,"data_count":33538}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":6833710705,"data_count":1}}, +{"replication":{"data_hash":5858155773472,"data_count":4500}}, +{"replication":{"data_hash":-473221280268823592,"data_count":65448}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":8288213744,"data_count":3}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":354171024786967,"data_count":3941}}, +{"replication":{"data_hash":-6520334670,"data_count":35886}}, +{"replication":{"data_hash":112772074632,"data_count":281}}, +{"replication":{"data_hash":-12679568198538,"data_count":1623116}} +] +Returned 27 rows' + +echo "##### Forward compatibility test #####" + +DB_PATH=$(pwd)"/rx_db" + +echo "Database: "$DB_PATH + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +# run RX server with disabled logging +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb; +sleep 1; + +namespaces_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_1; +CompareNamespacesLists "${namespaces_1[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_1[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l0 --corelog=none --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_2; +CompareNamespacesLists "${namespaces_2[@]}" "${namespaces_1[@]}" $server_pid; + +memstats_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_2[@]}" "${memstats_1[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; +sleep 1; + +echo "##### Backward compatibility test #####" + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb; +sleep 1; + +namespaces_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_3; +CompareNamespacesLists "${namespaces_3[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_3[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_4; +CompareNamespacesLists "${namespaces_4[@]}" "${namespaces_3[@]}" $server_pid; + +memstats_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_4[@]}" "${memstats_3[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; diff --git a/cpp_src/core/nsselecter/explaincalc.cc b/cpp_src/core/nsselecter/explaincalc.cc index 642cade9c..b76c6dbb8 100644 --- a/cpp_src/core/nsselecter/explaincalc.cc +++ b/cpp_src/core/nsselecter/explaincalc.cc @@ -202,6 +202,16 @@ std::string ExplainCalc::GetJSON() { json.Put("postprocess_us"sv, To_us(postprocess_)); json.Put("loop_us"sv, To_us(loop_)); json.Put("general_sort_us"sv, To_us(sort_)); + if (!subqueries_.empty()) { + auto subQuries = json.Array("subqueries"); + for (const auto &sq : subqueries_) { + auto s = subQuries.Object(); + s.Put("namespace", sq.NsName()); + s.Raw("explain", sq.Explain()); + std::visit(overloaded{[&](size_t k) { s.Put("keys", k); }, [&](const std::string &f) { s.Put("field", f); }}, + sq.FieldOrKeys()); + } + } } json.Put("sort_index"sv, sortIndex_); json.Put("sort_by_uncommitted_index"sv, sortOptimization_); diff --git a/cpp_src/core/nsselecter/explaincalc.h b/cpp_src/core/nsselecter/explaincalc.h index 837dfafde..67e6ff4ed 100644 --- a/cpp_src/core/nsselecter/explaincalc.h +++ b/cpp_src/core/nsselecter/explaincalc.h @@ -2,6 +2,7 @@ #include #include +#include #include #include "core/type_consts.h" @@ -17,6 +18,24 @@ struct ConditionInjection; typedef std::vector JoinedSelectors; typedef std::vector OnConditionInjections; +class SubQueryExplain { +public: + SubQueryExplain(const std::string& ns, std::string&& exp) : explain_{std::move(exp)}, namespace_{ns} {} + [[nodiscard]] const std::string& NsName() const& noexcept { return namespace_; } + [[nodiscard]] const auto& FieldOrKeys() const& noexcept { return fieldOrKeys_; } + [[nodiscard]] const std::string& Explain() const& noexcept { return explain_; } + void SetFieldOrKeys(std::variant&& fok) noexcept { fieldOrKeys_ = std::move(fok); } + + auto NsName() const&& = delete; + auto FieldOrKeys() const&& = delete; + auto Explain() const&& = delete; + +private: + std::string explain_; + std::string namespace_; + std::variant fieldOrKeys_{size_t(0)}; +}; + class ExplainCalc { public: typedef std::chrono::high_resolution_clock Clock; @@ -42,11 +61,12 @@ class ExplainCalc { void PutCount(int cnt) noexcept { count_ = cnt; } void PutSortIndex(std::string_view index) noexcept { sortIndex_ = index; } - void PutSelectors(const SelectIteratorContainer *qres) noexcept { selectors_ = qres; } - void PutJoinedSelectors(const JoinedSelectors *jselectors) noexcept { jselectors_ = jselectors; } + void PutSelectors(const SelectIteratorContainer* qres) noexcept { selectors_ = qres; } + void PutJoinedSelectors(const JoinedSelectors* jselectors) noexcept { jselectors_ = jselectors; } void SetPreselectTime(Duration preselectTime) noexcept { preselect_ = preselectTime; } - void PutOnConditionInjections(const OnConditionInjections *onCondInjections) noexcept { onInjections_ = onCondInjections; } + void PutOnConditionInjections(const OnConditionInjections* onCondInjections) noexcept { onInjections_ = onCondInjections; } void SetSortOptimization(bool enable) noexcept { sortOptimization_ = enable; } + void SetSubQueriesExplains(std::vector&& subQueriesExpl) noexcept { subqueries_ = std::move(subQueriesExpl); } void LogDump(int logLevel); std::string GetJSON(); @@ -59,14 +79,15 @@ class ExplainCalc { Duration Sort() const noexcept { return sort_; } size_t Iterations() const noexcept { return iters_; } - static int To_us(const Duration &d) noexcept; + static int To_us(const Duration& d) noexcept; bool IsEnabled() const noexcept { return enabled_; } private: Duration lap() noexcept; time_point last_point_, sort_start_point_; - Duration total_, prepare_ = Duration::zero(); + Duration total_ = Duration::zero(); + Duration prepare_ = Duration::zero(); Duration preselect_ = Duration::zero(); Duration select_ = Duration::zero(); Duration postprocess_ = Duration::zero(); @@ -74,9 +95,10 @@ class ExplainCalc { Duration sort_ = Duration::zero(); std::string_view sortIndex_; - const SelectIteratorContainer *selectors_ = nullptr; - const JoinedSelectors *jselectors_ = nullptr; - const OnConditionInjections *onInjections_ = nullptr; ///< Optional + const SelectIteratorContainer* selectors_ = nullptr; + const JoinedSelectors* jselectors_ = nullptr; + const OnConditionInjections* onInjections_ = nullptr; ///< Optional + std::vector subqueries_; int iters_ = 0; int count_ = 0; diff --git a/cpp_src/core/nsselecter/nsselecter.cc b/cpp_src/core/nsselecter/nsselecter.cc index 3a7c22bc0..adc1d491e 100644 --- a/cpp_src/core/nsselecter/nsselecter.cc +++ b/cpp_src/core/nsselecter/nsselecter.cc @@ -27,6 +27,7 @@ void NsSelecter::operator()(QueryResults &result, SelectCtx &ctx, const RdxConte auto &explain = ctx.explain; explain = ExplainCalc(ctx.query.GetExplain() || logLevel >= LogInfo); + explain.SetSubQueriesExplains(std::move(ctx.subQueriesExplains)); ActiveQueryScope queryScope(ctx, ns_->optimizationState_, explain, ns_->locker_.IsReadOnly(), ns_->strHolder_.get()); explain.SetPreselectTime(ctx.preResultTimeTotal); diff --git a/cpp_src/core/nsselecter/nsselecter.h b/cpp_src/core/nsselecter/nsselecter.h index 6d4c87f34..6d25e2c63 100644 --- a/cpp_src/core/nsselecter/nsselecter.h +++ b/cpp_src/core/nsselecter/nsselecter.h @@ -33,6 +33,7 @@ struct SelectCtx { const Query *parentQuery = nullptr; ExplainCalc explain; bool requiresCrashTracking = false; + std::vector subQueriesExplains; RX_ALWAYS_INLINE bool isMergeQuerySubQuery() const noexcept { return isMergeQuery == IsMergeQuery::Yes && parentQuery; } }; diff --git a/cpp_src/core/query/query.cc b/cpp_src/core/query/query.cc index c8e5fe0fa..02fabd907 100644 --- a/cpp_src/core/query/query.cc +++ b/cpp_src/core/query/query.cc @@ -293,7 +293,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { Debug(ser.GetVarUint()); break; case QueryStrictMode: - strictMode_ = StrictMode(ser.GetVarUint()); + Strict(StrictMode(ser.GetVarUint())); break; case QueryLimit: count_ = ser.GetVarUint(); @@ -315,7 +315,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { break; } case QueryExplain: - explain_ = true; + Explain(true); break; case QueryWithRank: withRank_ = true; diff --git a/cpp_src/core/queryresults/queryresults.h b/cpp_src/core/queryresults/queryresults.h index 4c06d9d5f..70cfce802 100644 --- a/cpp_src/core/queryresults/queryresults.h +++ b/cpp_src/core/queryresults/queryresults.h @@ -54,9 +54,10 @@ class QueryResults { void Erase(ItemRefVector::iterator begin, ItemRefVector::iterator end); size_t Count() const noexcept { return items_.size(); } size_t TotalCount() const noexcept { return totalCount; } - const std::string &GetExplainResults() const &noexcept { return explainResults; } + const std::string &GetExplainResults() const & noexcept { return explainResults; } const std::string &GetExplainResults() const && = delete; - const std::vector &GetAggregationResults() const &noexcept { return aggregationResults; } + std::string &&MoveExplainResults() & noexcept { return std::move(explainResults); } + const std::vector &GetAggregationResults() const & noexcept { return aggregationResults; } const std::vector &GetAggregationResults() const && = delete; void Clear(); h_vector GetNamespaces() const; diff --git a/cpp_src/core/reindexer_impl/rx_selector.cc b/cpp_src/core/reindexer_impl/rx_selector.cc index ffc87deb5..a1127b1e1 100644 --- a/cpp_src/core/reindexer_impl/rx_selector.cc +++ b/cpp_src/core/reindexer_impl/rx_selector.cc @@ -38,12 +38,16 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc } std::vector queryResultsHolder; std::optional queryCopy; + ExplainCalc::Duration preselectTimeTotal{0}; + std::vector subQueryExplains; if (!q.GetSubQueries().empty()) { if (q.GetDebugLevel() >= LogInfo || ns->config_.logLevel >= LogInfo) { logPrintf(LogInfo, "Query before subqueries substitution: %s", q.GetSQL()); } queryCopy.emplace(q); - preselectSubQueries(*queryCopy, queryResultsHolder, locks, func, ctx); + const auto preselectStartTime = ExplainCalc::Clock::now(); + subQueryExplains = preselectSubQueries(*queryCopy, queryResultsHolder, locks, func, ctx); + preselectTimeTotal += ExplainCalc::Clock::now() - preselectStartTime; } const Query& query = queryCopy ? *queryCopy : q; std::vector joinQueryResultsContexts; @@ -58,12 +62,11 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc } JoinedSelectors mainJoinedSelectors; - ExplainCalc::Duration preselectTimeTotal{0}; if (thereAreJoins) { const auto preselectStartTime = ExplainCalc::Clock::now(); mainJoinedSelectors = prepareJoinedSelectors(query, result, locks, func, joinQueryResultsContexts, ctx); result.joined_.resize(1 + query.GetMergeQueries().size()); - preselectTimeTotal = ExplainCalc::Clock::now() - preselectStartTime; + preselectTimeTotal += ExplainCalc::Clock::now() - preselectStartTime; } IsFTQuery isFtQuery{IsFTQuery::NotSet}; { @@ -73,6 +76,7 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc selCtx.contextCollectingMode = true; selCtx.functions = &func; selCtx.nsid = 0; + selCtx.subQueriesExplains = std::move(subQueryExplains); if (!query.GetMergeQueries().empty()) { selCtx.isMergeQuery = IsMergeQuery::Yes; if rx_unlikely (!query.sortingEntries_.empty()) { @@ -142,13 +146,16 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc std::optional mQueryCopy; if (!mq.GetSubQueries().empty()) { mQueryCopy.emplace(mq); - preselectSubQueries(*mQueryCopy, queryResultsHolder, locks, func, ctx); } const JoinedQuery& mQuery = mQueryCopy ? *mQueryCopy : mq; + SelectCtx mctx(mQuery, &query); + if (!mq.GetSubQueries().empty()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + mctx.subQueriesExplains = preselectSubQueries(*mQueryCopy, queryResultsHolder, locks, func, ctx); + } auto mns = locks.Get(mQuery.NsName()); assertrx_throw(mns); - SelectCtx mctx(mQuery, &query); mctx.nsid = ++counter; mctx.isMergeQuery = IsMergeQuery::Yes; mctx.isFtQuery = isFtQuery; @@ -245,7 +252,7 @@ bool RxSelector::isPreResultValuesModeOptimizationAvailable(const Query& jItemQ, template bool RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, NsLocker& locks, SelectFunctionsHolder& func, - const RdxContext& rdxCtx) { + std::vector& explain, const RdxContext& rdxCtx) { auto ns = locks.Get(subQuery.NsName()); assertrx_throw(ns); @@ -259,12 +266,16 @@ bool RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, N QueryResults result; ns->Select(result, sctx, rdxCtx); locks.Delete(ns); + if (!result.GetExplainResults().empty()) { + explain.emplace_back(subQuery.NsName(), result.MoveExplainResults()); + } return sctx.matchedAtLeastOnce; } template VariantArray RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, NsLocker& locks, QueryResults& qr, - SelectFunctionsHolder& func, const RdxContext& rdxCtx) { + SelectFunctionsHolder& func, std::variant fieldOrKeys, + std::vector& explain, const RdxContext& rdxCtx) { NamespaceImpl::Ptr ns = locks.Get(subQuery.NsName()); assertrx_throw(ns); @@ -353,6 +364,10 @@ VariantArray RxSelector::selectSubQuery(const Query& subQuery, const Query& main } } locks.Delete(ns); + if (!qr.GetExplainResults().empty()) { + explain.emplace_back(subQuery.NsName(), std::move(qr.MoveExplainResults())); + explain.back().SetFieldOrKeys(std::move(fieldOrKeys)); + } return result; } @@ -450,8 +465,12 @@ JoinedSelectors RxSelector::prepareJoinedSelectors(const Query& q, QueryResults& } template -void RxSelector::preselectSubQueries(Query& mainQuery, std::vector& queryResultsHolder, NsLocker& locks, - SelectFunctionsHolder& func, const RdxContext& ctx) { +std::vector RxSelector::preselectSubQueries(Query& mainQuery, std::vector& queryResultsHolder, + NsLocker& locks, SelectFunctionsHolder& func, const RdxContext& ctx) { + std::vector explains; + if (mainQuery.GetExplain() || mainQuery.GetDebugLevel() >= LogInfo) { + explains.reserve(mainQuery.GetSubQueries().size()); + } for (size_t i = 0, s = mainQuery.Entries().Size(); i < s; ++i) { mainQuery.Entries().InvokeAppropriate( i, Skip{}, @@ -459,14 +478,16 @@ void RxSelector::preselectSubQueries(Query& mainQuery, std::vector try { const CondType cond = sqe.Condition(); if (cond == CondAny || cond == CondEmpty) { - if (selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, func, ctx) == (cond == CondAny)) { + if (selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, func, explains, ctx) == + (cond == CondAny)) { mainQuery.SetEntry(i); } else { mainQuery.SetEntry(i); } } else { QueryResults qr; - const auto values = selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, qr, func, ctx); + const auto values = selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, qr, func, + sqe.Values().size(), explains, ctx); if (QueryEntries::CheckIfSatisfyCondition(values, sqe.Condition(), sqe.Values())) { mainQuery.SetEntry(i); } else { @@ -481,15 +502,17 @@ void RxSelector::preselectSubQueries(Query& mainQuery, std::vector [&](const SubQueryFieldEntry& sqe) { try { queryResultsHolder.resize(queryResultsHolder.size() + 1); - mainQuery.SetEntry( - i, std::move(mainQuery.GetUpdatableEntry(i)).FieldName(), sqe.Condition(), - selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, queryResultsHolder.back(), func, ctx)); + mainQuery.SetEntry(i, std::move(mainQuery.GetUpdatableEntry(i)).FieldName(), + sqe.Condition(), + selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, + queryResultsHolder.back(), func, sqe.FieldName(), explains, ctx)); } catch (const Error& err) { throw Error(err.code(), "Error during preprocessing of subquery '" + mainQuery.GetSubQuery(sqe.QueryIndex()).GetSQL() + "': " + err.what()); } }); } + return explains; } template void RxSelector::DoSelect>( diff --git a/cpp_src/core/reindexer_impl/rx_selector.h b/cpp_src/core/reindexer_impl/rx_selector.h index d77e9c5e9..1110f4651 100644 --- a/cpp_src/core/reindexer_impl/rx_selector.h +++ b/cpp_src/core/reindexer_impl/rx_selector.h @@ -83,14 +83,15 @@ class RxSelector { static JoinedSelectors prepareJoinedSelectors(const Query &q, QueryResults &result, NsLocker &locks, SelectFunctionsHolder &func, std::vector &, const RdxContext &ctx); template - static void preselectSubQueries(Query &mainQuery, std::vector &queryResultsHolder, NsLocker &, SelectFunctionsHolder &, - const RdxContext &); + [[nodiscard]] static std::vector preselectSubQueries(Query &mainQuery, std::vector &queryResultsHolder, + NsLocker &, SelectFunctionsHolder &, const RdxContext &); template [[nodiscard]] static bool selectSubQuery(const Query &subQuery, const Query &mainQuery, NsLocker &, SelectFunctionsHolder &, - const RdxContext &); + std::vector &, const RdxContext &); template [[nodiscard]] static VariantArray selectSubQuery(const Query &subQuery, const Query &mainQuery, NsLocker &, QueryResults &, - SelectFunctionsHolder &, const RdxContext &); + SelectFunctionsHolder &, std::variant fieldOrKeys, + std::vector &, const RdxContext &); static bool isPreResultValuesModeOptimizationAvailable(const Query &jItemQ, const NamespaceImpl::Ptr &jns, const Query &mainQ); }; diff --git a/cpp_src/gtests/bench/fixtures/api_tv_simple.cc b/cpp_src/gtests/bench/fixtures/api_tv_simple.cc index 7c8cec8b9..2fe2dd32a 100644 --- a/cpp_src/gtests/bench/fixtures/api_tv_simple.cc +++ b/cpp_src/gtests/bench/fixtures/api_tv_simple.cc @@ -111,6 +111,9 @@ void ApiTvSimple::RegisterAllCases() { Register("FromCJSONPKOnly", &ApiTvSimple::FromCJSONPKOnly, this); Register("GetCJSON", &ApiTvSimple::GetCJSON, this); Register("ExtractField", &ApiTvSimple::ExtractField, this); + Register("SubQueryEq", &ApiTvSimple::SubQueryEq, this); + Register("SubQuerySet", &ApiTvSimple::SubQuerySet, this); + Register("SubQueryAggregate", &ApiTvSimple::SubQueryAggregate, this); // Those benches should be last, because they are recreating indexes cache Register("Query4CondRangeDropCache", &ApiTvSimple::Query4CondRangeDropCache, this)->Iterations(1000); @@ -197,12 +200,14 @@ reindexer::Error ApiTvSimple::Initialize() { err = db_->Commit(stringSelectNs_); if (!err.ok()) return err; - NamespaceDef mainNsDef{innerJoinLowSelectivityMainNs_}; + NamespaceDef mainNsDef{mainNs_}; mainNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()).AddIndex("field", "hash", "int", IndexOpts()); err = db_->AddNamespace(mainNsDef); if (!err.ok()) return err; - NamespaceDef rightNsDef{innerJoinLowSelectivityRightNs_}; - rightNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()).AddIndex("field", "hash", "int", IndexOpts()); + NamespaceDef rightNsDef{rightNs_}; + rightNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()) + .AddIndex("field", "hash", "int", IndexOpts()) + .AddIndex("id_tree", "tree", "int", IndexOpts()); err = db_->AddNamespace(rightNsDef); if (!err.ok()) return err; @@ -227,6 +232,7 @@ reindexer::Error ApiTvSimple::Initialize() { reindexer::JsonBuilder bld2(wrSer_); bld2.Put("id", i); bld2.Put("field", i); + bld2.Put("id_tree", i); bld2.End(); err = rItem.FromJSON(wrSer_.Slice()); if (!err.ok()) return err; @@ -805,9 +811,9 @@ void ApiTvSimple::Query0CondInnerJoinUnlimit(benchmark::State& state) { void ApiTvSimple::Query0CondInnerJoinUnlimitLowSelectivity(benchmark::State& state) { AllocsTracker allocsTracker(state); for (auto _ : state) { // NOLINT(*deadcode.DeadStores) - Query q4join(innerJoinLowSelectivityRightNs_); + Query q4join(rightNs_); q4join.Where("id", CondLe, 250); - Query q(innerJoinLowSelectivityMainNs_); + Query q(mainNs_); q.InnerJoin("id", "id", CondEq, std::move(q4join)).ReqTotal(); QueryResults qres; @@ -816,6 +822,43 @@ void ApiTvSimple::Query0CondInnerJoinUnlimitLowSelectivity(benchmark::State& sta } } +void ApiTvSimple::SubQueryEq(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + Query q = Query(mainNs_).Where( + "id", CondEq, Query(rightNs_).Select({"field"}).Where("id", CondEq, VariantArray::Create(int(rand() % kTotalItemsMainJoinNs)))); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + +void ApiTvSimple::SubQuerySet(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + const int rangeMin = rand() % (kTotalItemsMainJoinNs - 500); + Query q = Query(mainNs_).Where( + "id", CondSet, Query(rightNs_).Select({"id"}).Where("id_tree", CondRange, VariantArray::Create(rangeMin, rangeMin + 500))); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + +void ApiTvSimple::SubQueryAggregate(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + Query q = Query(mainNs_).Where("id", CondEq, + Query(rightNs_) + .Aggregate(AggAvg, {"id"}) + .Where("id", CondLt, VariantArray::Create(int(rand() % kTotalItemsMainJoinNs))) + .Limit(500)); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + void ApiTvSimple::Query2CondInnerJoin(benchmark::State& state) { AllocsTracker allocsTracker(state); for (auto _ : state) { // NOLINT(*deadcode.DeadStores) @@ -1202,7 +1245,7 @@ void ApiTvSimple::query2CondIdSet(benchmark::State& state, const std::vector>& idsets); reindexer::Error prepareCJsonBench(); @@ -147,8 +150,8 @@ class ApiTvSimple : private BaseFixture { std::unordered_map>> idsets_; reindexer::WrSerializer wrSer_; std::string stringSelectNs_{"string_select_ns"}; - std::string innerJoinLowSelectivityMainNs_{"inner_join_low_selectivity_main_ns"}; - std::string innerJoinLowSelectivityRightNs_{"inner_join_low_selectivity_right_ns"}; + std::string mainNs_{"main_ns"}; + std::string rightNs_{"right_ns"}; std::string cjsonNsName_{"cjson_ns_name"}; std::unique_ptr itemForCjsonBench_; std::vector fieldsToExtract_; diff --git a/cpp_src/gtests/tests/fixtures/queries_verifier.h b/cpp_src/gtests/tests/fixtures/queries_verifier.h index 6929958fc..033a149ed 100644 --- a/cpp_src/gtests/tests/fixtures/queries_verifier.h +++ b/cpp_src/gtests/tests/fixtures/queries_verifier.h @@ -1,7 +1,16 @@ #pragma once #include + +#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#include +#pragma GCC diagnostic pop +#else // REINDEX_WITH_ASAN #include +#endif // REINDEX_WITH_ASAN + #include #include "core/nsselecter/joinedselectormock.h" #include "core/nsselecter/sortexpression.h" diff --git a/cpp_src/gtests/tests/unit/string_function_test.cc b/cpp_src/gtests/tests/unit/string_function_test.cc index 441a1e29c..62ad2cea4 100644 --- a/cpp_src/gtests/tests/unit/string_function_test.cc +++ b/cpp_src/gtests/tests/unit/string_function_test.cc @@ -1,4 +1,12 @@ +#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #include +#pragma GCC diagnostic pop +#else // REINDEX_WITH_ASAN +#include +#endif // REINDEX_WITH_ASAN + #include "gtest/gtest.h" #include "reindexer_api.h" #include "tools/customlocal.h" diff --git a/cpp_src/readme.md b/cpp_src/readme.md index b496cca9e..d7649bfba 100644 --- a/cpp_src/readme.md +++ b/cpp_src/readme.md @@ -46,7 +46,7 @@ yum update yum install reindexer-server ``` -Available distros: `centos-7`, `fedora-38`, `fedora-39`, `redos-7` +Available distros: `centos-7`, `fedora-38`, `fedora-39`. ### Ubuntu/Debian @@ -57,7 +57,18 @@ apt update apt install reindexer-server ``` -Available distros: `debian-bookworm`, `debian-bullseye`, `ubuntu-bionic`, `ubuntu-focal`, `ubuntu-jammy` +Available distros: `debian-bookworm`, `debian-bullseye`, `ubuntu-focal`, `ubuntu-jammy` + +### Redos + +```bash +rpm --import https://repo.reindexer.io/RX-KEY.GPG +dnf config-manager --add-repo https://repo.reindexer.io//x86_64/ +dnf update +dnf install reindexer-server +``` + +Available distros: `redos-7`. ## OSX brew diff --git a/cpp_src/server/contrib/server.md b/cpp_src/server/contrib/server.md index 856261b09..923812c17 100644 --- a/cpp_src/server/contrib/server.md +++ b/cpp_src/server/contrib/server.md @@ -2195,6 +2195,7 @@ Query execution explainings |**selectors**
*optional*|Filter selectors, used to proccess query conditions|< [selectors](#explaindef-selectors) > array| |**sort_by_uncommitted_index**
*optional*|Optimization of sort by uncompleted index has been performed|boolean| |**sort_index**
*optional*|Index, which used for sort results|string| +|**subqueries**
*optional*|Explain of subqueries preselect|< [subqueries](#explaindef-subqueries) > array| |**total_us**
*optional*|Total query execution time|integer| @@ -2231,6 +2232,7 @@ Query execution explainings |---|---|---| |**comparators**
*optional*|Count of comparators used, for this selector|integer| |**cost**
*optional*|Cost expectation of this selector|integer| +|**description**
*optional*|Description of the selector|string| |**explain_preselect**
*optional*|Preselect in joined namespace execution explainings|[ExplainDef](#explaindef)| |**explain_select**
*optional*|One of selects in joined namespace execution explainings|[ExplainDef](#explaindef)| |**field**
*optional*|Field or index name|string| @@ -2239,6 +2241,17 @@ Query execution explainings |**keys**
*optional*|Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching|integer| |**matched**
*optional*|Count of processed documents, matched this selector|integer| |**method**
*optional*|Method, used to process condition|enum (scan, index, inner_join, left_join)| +|**type**
*optional*|Type of the selector|string| + + +**subqueries** + +|Name|Description|Schema| +|---|---|---| +|**explain**
*optional*|Explain of the subquery's preselect|[ExplainDef](#explaindef)| +|**field**
*optional*|Name of field being compared with the subquery's result|string| +|**keys**
*optional*|Count of keys being compared with the subquery's result|integer| +|**namespace**
*optional*|Subquery's namespace name|string| diff --git a/cpp_src/server/contrib/server.yml b/cpp_src/server/contrib/server.yml index f4f66051a..7243300f2 100644 --- a/cpp_src/server/contrib/server.yml +++ b/cpp_src/server/contrib/server.yml @@ -3299,6 +3299,12 @@ definitions: keys: type: integer description: "Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching" + type: + type: string + description: "Type of the selector" + description: + type: string + description: "Description of the selector" explain_preselect: description: "Preselect in joined namespace execution explainings" $ref: "#/definitions/ExplainDef" @@ -3366,6 +3372,24 @@ definitions: values_count: type: integer description: resulting size of query values set + subqueries: + type: array + description: "Explain of subqueries preselect" + items: + type: object + properties: + namespace: + type: string + description: "Subquery's namespace name" + keys: + type: integer + description: "Count of keys being compared with the subquery's result" + field: + type: string + description: "Name of field being compared with the subquery's result" + explain: + description: "Explain of the subquery's preselect" + $ref: "#/definitions/ExplainDef" AggregationResDef: diff --git a/cpp_src/server/httpserver.cc b/cpp_src/server/httpserver.cc index 87cd16189..f0a2c65cd 100644 --- a/cpp_src/server/httpserver.cc +++ b/cpp_src/server/httpserver.cc @@ -22,6 +22,7 @@ #include "resources_wrapper.h" #include "statscollect/istatswatcher.h" #include "statscollect/prometheus.h" +#include "tools/alloc_ext/je_malloc_extension.h" #include "tools/alloc_ext/tc_malloc_extension.h" #include "tools/flagguard.h" #include "tools/fsops.h" diff --git a/cpp_src/vendor/prometheus/family.h b/cpp_src/vendor/prometheus/family.h index 5c59122d0..6992bc161 100644 --- a/cpp_src/vendor/prometheus/family.h +++ b/cpp_src/vendor/prometheus/family.h @@ -157,7 +157,7 @@ T& Family::Add(std::map&& labels, int64_t epoch, Ar auto metrics_iter = metrics_.find(hash); if (metrics_iter != metrics_.end()) { -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(WITH_STDLIB_DEBUG) auto labels_iter = labels_.find(hash); assertrx(labels_iter != labels_.end()); const auto& old_labels = labels_iter->second; @@ -166,7 +166,7 @@ T& Family::Add(std::map&& labels, int64_t epoch, Ar metrics_iter->second.epoch = epoch; return *metrics_iter->second.ptr; } else { -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(WITH_STDLIB_DEBUG) for (auto& label_pair : labels) { auto& label_name = label_pair.first; assertrx(CheckLabelName(label_name)); diff --git a/cpp_src/vendor/prometheus/impl/check_names.cc b/cpp_src/vendor/prometheus/impl/check_names.cc index 0aabbc88c..6a800ce31 100644 --- a/cpp_src/vendor/prometheus/impl/check_names.cc +++ b/cpp_src/vendor/prometheus/impl/check_names.cc @@ -1,15 +1,21 @@ #include "prometheus/check_names.h" -#include - #if defined(__GLIBCXX__) && __GLIBCXX__ <= 20150623 #define STD_REGEX_IS_BROKEN #endif +#if defined(__GNUC__) && (__GNUC__ == 12) && (__GNUC_MINOR__ == 2) && defined(REINDEX_WITH_ASAN) +// regex header is broken in GCC 12.2 with ASAN +#define STD_REGEX_IS_BROKEN +#endif #if defined(_MSC_VER) && _MSC_VER < 1900 #define STD_REGEX_IS_BROKEN #endif +#ifndef STD_REGEX_IS_BROKEN +#include +#endif + namespace prometheus { bool CheckMetricName(const std::string& name) { // see https://prometheus.io/docs/concepts/data_model/ diff --git a/iterator.go b/iterator.go index d70832764..9fd8d9165 100644 --- a/iterator.go +++ b/iterator.go @@ -15,11 +15,11 @@ import ( type ExplainSelector struct { // Field or index name - Field string `json:"field"` + Field string `json:"field,omitempty"` // Field type enum: indexed, non-indexed FieldType string `json:"field_type,omitempty"` // Method, used to process condition - Method string `json:"method"` + Method string `json:"method,omitempty"` // Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching Keys int `json:"keys"` // Count of comparators used, for this selector @@ -30,6 +30,8 @@ type ExplainSelector struct { Matched int `json:"matched"` // Count of scanned documents by this selector Items int `json:"items"` + Type string `json:"type,omitempty"` + Description string `json:"description,omitempty"` // Preselect in joined namespace execution explainings ExplainPreselect *ExplainResults `json:"explain_preselect,omitempty"` // One of selects in joined namespace execution explainings @@ -37,6 +39,13 @@ type ExplainSelector struct { Selectors []ExplainSelector `json:"selectors,omitempty"` } +type ExplainSubQuery struct { + Namespace string `json:"namespace"` + Explain ExplainResults `json:"explain"` + Keys int `json:"keys,omitempty"` + Field string `json:"field,omitempty"` +} + // ExplainResults presents query plan type ExplainResults struct { // Total query execution time @@ -61,6 +70,8 @@ type ExplainResults struct { Selectors []ExplainSelector `json:"selectors"` // Explaining attempts to inject Join queries ON-conditions into the Main Query WHERE clause OnConditionsInjections []ExplainJoinOnInjections `json:"on_conditions_injections,omitempty"` + // Explaining of subqueries' preselect + SubQueriesExplains []ExplainSubQuery `json:"subqueries,omitempty"` } // Describes the process of a single JOIN-query ON-conditions injection into the Where clause of a main query diff --git a/query.go b/query.go index c2f1be9cb..7a001df5d 100644 --- a/query.go +++ b/query.go @@ -555,20 +555,24 @@ func (q *Query) DWithin(index string, point Point, distance float64) *Query { return q } -func (q *Query) AggregateSum(field string) { +func (q *Query) AggregateSum(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggSum).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateAvg(field string) { +func (q *Query) AggregateAvg(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggAvg).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateMin(field string) { +func (q *Query) AggregateMin(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggMin).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateMax(field string) { +func (q *Query) AggregateMax(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggMax).PutVarCUInt(1).PutVString(field) + return q } type AggregateFacetRequest struct { diff --git a/test/join_test.go b/test/join_test.go index 46687225c..08837876c 100644 --- a/test/join_test.go +++ b/test/join_test.go @@ -471,6 +471,7 @@ type expectedExplain struct { Field string FieldType string Method string + Description string Keys int Comparators int Matched int @@ -499,6 +500,13 @@ type expectedExplainJoinOnInjections struct { Conditions []expectedExplainConditionInjection } +type expectedExplainSubQuery struct { + Namespace string + Keys int + Field string + Selectors []expectedExplain +} + func checkExplain(t *testing.T, res []reindexer.ExplainSelector, expected []expectedExplain, fieldName string) { require.Equal(t, len(expected), len(res)) for i := 0; i < len(expected); i++ { @@ -514,6 +522,7 @@ func checkExplain(t *testing.T, res []reindexer.ExplainSelector, expected []expe assert.Equalf(t, expected[i].Matched, res[i].Matched, fieldName+expected[i].Field) assert.Equalf(t, expected[i].Keys, res[i].Keys, fieldName+expected[i].Field) assert.Equalf(t, expected[i].Comparators, res[i].Comparators, fieldName+expected[i].Field) + assert.Equalf(t, expected[i].Description, res[i].Description, fieldName+expected[i].Field) if len(expected[i].Preselect) == 0 { assert.Nil(t, res[i].ExplainPreselect, fieldName+expected[i].Field) } else { @@ -561,6 +570,16 @@ func checkExplainJoinOnInjections(t *testing.T, res []reindexer.ExplainJoinOnInj } } +func checkExplainSubqueries(t *testing.T, res []reindexer.ExplainSubQuery, expected []expectedExplainSubQuery) { + require.Equal(t, len(expected), len(res)) + for i := 0; i < len(expected); i++ { + assert.Equal(t, expected[i].Namespace, res[i].Namespace) + assert.Equal(t, expected[i].Field, res[i].Field) + assert.Equal(t, expected[i].Keys, res[i].Keys) + checkExplain(t, res[i].Explain.Selectors, expected[i].Selectors, "") + } +} + func TestExplainJoin(t *testing.T) { nsMain := "test_explain_main" nsJoined := "test_explain_joined" diff --git a/test/queries_test.go b/test/queries_test.go index eae306113..191c84146 100644 --- a/test/queries_test.go +++ b/test/queries_test.go @@ -241,6 +241,8 @@ func init() { tnamespaces["test_items_eqaul_position"] = TestItemEqualPosition{} tnamespaces["test_items_strict"] = TestItem{} tnamespaces["test_items_strict_joined"] = TestJoinItem{} + + tnamespaces["test_items_explain"] = TestItemSimple{} } func FillTestItemsForNot() { @@ -1119,7 +1121,7 @@ func callQueriesSequence(t *testing.T, namespace string, distinct []string, sort newTestQuery(DB, namespace).Distinct(distinct).Sort(sort, desc).ReqTotal(). WhereQuery(t, newTestQuery(DB, namespace).Where("id", reindexer.EQ, mkID(rand.Int()%5000)), - reindexer.ANY, nil). + reindexer.ANY, nil). ExecAndVerify(t) newTestQuery(DB, namespace).Distinct(distinct).Sort(sort, desc).ReqTotal(). @@ -2173,3 +2175,183 @@ func TestQrIdleTimeout(t *testing.T) { } }) } + +func TestQueryExplain(t *testing.T) { + t.Parallel() + + ns := "test_items_explain" + + tx := newTestTx(DB, ns) + for i := 0; i < 5; i++ { + tx.Upsert(TestItemSimple{ID: i, Year: i, Name: randString()}) + } + tx.MustCommit() + + t.Run("Subquery explain check (WhereQuery)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + WhereQuery(t, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 1), reindexer.GE, 0) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 5, + }, + { + Description: "always true", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 1, + }, + }, + }, + }) + }) + + t.Run("Subquery explain check (Where)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + Where("id", reindexer.EQ, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 3)) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "id", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Field: "id", + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + }, + }, + }) + }) + + t.Run("Subquery explain check (Where + WhereQuery)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + Where("id", reindexer.SET, DB.Query(ns).Select("id").Where("year", reindexer.SET, []int{1, 2})). + WhereQuery(t, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 5), reindexer.LE, 10) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 1, + }, + { + Description: "always false", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 0, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Field: "id", + Selectors: []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 5, + }, + { + Field: "year", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 2, + }, + }, + }, + { + Namespace: ns, + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 0, + }, + }, + }, + }) + }) +} diff --git a/test/reindexer_bench_test.go b/test/reindexer_bench_test.go index 3d5802948..63e8a174d 100644 --- a/test/reindexer_bench_test.go +++ b/test/reindexer_bench_test.go @@ -409,6 +409,31 @@ func Benchmark2CondQueryTotal(b *testing.B) { } } +func BenchmarkSubQueryEq(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.EQ, DBD.Query("test_join_items").Select("id").WhereInt32("id", reindexer.EQ, prices[rand.Int()%len(prices)])).Limit(20) + q.MustExec().FetchAll() + } +} + +func BenchmarkSubQuerySet(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + rangeMin := prices[rand.Int()%len(prices)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.SET, DBD.Query("test_join_items").Select("id").WhereInt32("id", reindexer.RANGE, rangeMin, rangeMin + 500)).Limit(20) + q.MustExec().FetchAll() + } +} + +func BenchmarkSubQueryAggregate(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.LT, DBD.Query("test_join_items").AggregateAvg("id").WhereInt32("id", reindexer.SET, prices...).Limit(500)).Limit(20) + q.MustExec().FetchAll() + } +} + func Benchmark2CondQueryLeftJoin(b *testing.B) { ctx := &TestJoinCtx{} for i := 0; i < b.N; i++ {