Skip to content

Commit

Permalink
Merge branch '1576_1570_subquery_explain' into 'develop'
Browse files Browse the repository at this point in the history
Subquery explain - [#1576] [#1570]

See merge request itv-backend/reindexer!1485
  • Loading branch information
reindexer-bot committed Dec 25, 2023
1 parent 54027a7 commit f8ba567
Show file tree
Hide file tree
Showing 26 changed files with 669 additions and 56 deletions.
8 changes: 4 additions & 4 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Version 3.21.0 (15.12.2023)
## Core
- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implemented in the next releases)
- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implement in the next releases)
- [fea] Added backtraces/minidump support for Windows platform
- [fea] Added query crash tracker support for Windows platform
- [fix] Added explicit error for aggregations in joined queries
Expand All @@ -16,16 +16,16 @@

## Go connector
- [fea] Added Go API and DSL-convertor for subqueries
- [fea] Changed CJSON-to-object conversion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field
- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage at runtime
- [fea] Changed CJSON-to-object convetrion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field
- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage in runtime
- [fix] Fixed panic handling in the CJSON deserialization
- [fix] Fixed logging in `cproto`-binding. Error messages will no longer be redirected to stdout instead of user's logger

## Face
- [fea] Saved the scroll position on the sorting
- [fea] Changed the Server ID range
- [fea] Improved the notification about the supported browsers
- [fea] Added the default values to the config form when the default config is used
- [fea] Added the default values to the config form when the default config is using
- [fix] Fixed the wrong redirect to a fake database
- [fix] Fixed the column order changing on the data sorting
- [fix] Fixed the horizontal scroll on the data sorting
Expand Down
2 changes: 1 addition & 1 deletion cpp_src/cmd/reindexer_server/contrib/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM alpine:3.14 AS build
RUN cd /tmp && apk update && \
apk add git curl autoconf automake libtool linux-headers g++ make libunwind-dev grpc-dev grpc protobuf-dev c-ares-dev && \
git clone https://github.com/gperftools/gperftools.git && \
cd gperftools && \
cd gperftools && git checkout gperftools-2.13 && \
echo "noinst_PROGRAMS =" >> Makefile.am && \
sed -i s/_sigev_un\._tid/sigev_notify_thread_id/ src/profile-handler.cc && \
./autogen.sh && ./configure --disable-dependency-tracking && make -j8 && make install
Expand Down
195 changes: 195 additions & 0 deletions cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#!/bin/bash
# Task: https://github.com/restream/reindexer/-/issues/1188
set -e

function KillAndRemoveServer {
local pid=$1
kill $pid
wait $pid
yum remove -y 'reindexer*' > /dev/null
}

function WaitForDB {
# wait until DB is loaded
set +e # disable "exit on error" so the script won't stop when DB's not loaded yet
is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list');
while [[ $is_connected != "test" ]]
do
sleep 2
is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list');
done
set -e
}

function CompareNamespacesLists {
local ns_list_actual=$1
local ns_list_expected=$2
local pid=$3

diff=$(echo ${ns_list_actual[@]} ${ns_list_expected[@]} | tr ' ' '\n' | sort | uniq -u) # compare in any order
if [ "$diff" == "" ]; then
echo "## PASS: namespaces list not changed"
else
echo "##### FAIL: namespaces list was changed"
echo "expected: $ns_list_expected"
echo "actual: $ns_list_actual"
KillAndRemoveServer $pid;
exit 1
fi
}

function CompareMemstats {
local actual=$1
local expected=$2
local pid=$3
diff=$(echo ${actual[@]} ${expected[@]} | tr ' ' '\n' | sed 's/\(.*\),$/\1/' | sort | uniq -u) # compare in any order
if [ "$diff" == "" ]; then
echo "## PASS: memstats not changed"
else
echo "##### FAIL: memstats was changed"
echo "expected: $expected"
echo "actual: $actual"
KillAndRemoveServer $pid;
exit 1
fi
}


RX_SERVER_CURRENT_VERSION_RPM="$(basename build/reindexer-*server*.rpm)"
VERSION_FROM_RPM=$(echo "$RX_SERVER_CURRENT_VERSION_RPM" | grep -o '.*server-..')
VERSION=$(echo ${VERSION_FROM_RPM: -2:1}) # one-digit version

echo "## choose latest release rpm file"
if [ $VERSION == 3 ]; then
LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 3)
namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg'
elif [ $VERSION == 4 ]; then
LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 4)
# replicationstats ns added for v4
namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\n#replicationstats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg'
else
echo "Unknown version"
exit 1
fi

echo "## downloading latest release rpm file: $LATEST_RELEASE"
curl "http://repo.itv.restr.im/itv-api-ng/7/x86_64/$LATEST_RELEASE" --output $LATEST_RELEASE;
echo "## downloading example DB"
curl "https://git.restream.ru/MaksimKravchuk/reindexer_testdata/-/raw/master/big.zip" --output big.zip;
unzip -o big.zip # unzips into mydb_big.rxdump;

ADDRESS="cproto://127.0.0.1:6534/"
DB_NAME="test"

memstats_expected=$'[
{"replication":{"data_hash":24651210926,"data_count":3}},
{"replication":{"data_hash":6252344969,"data_count":1}},
{"replication":{"data_hash":37734732881,"data_count":28}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":1024095024522,"data_count":1145}},
{"replication":{"data_hash":8373644068,"data_count":1315}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":7404222244,"data_count":97}},
{"replication":{"data_hash":94132837196,"data_count":4}},
{"replication":{"data_hash":1896088071,"data_count":2}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":-672103903,"data_count":33538}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":6833710705,"data_count":1}},
{"replication":{"data_hash":5858155773472,"data_count":4500}},
{"replication":{"data_hash":-473221280268823592,"data_count":65448}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":8288213744,"data_count":3}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":354171024786967,"data_count":3941}},
{"replication":{"data_hash":-6520334670,"data_count":35886}},
{"replication":{"data_hash":112772074632,"data_count":281}},
{"replication":{"data_hash":-12679568198538,"data_count":1623116}}
]
Returned 27 rows'

echo "##### Forward compatibility test #####"

DB_PATH=$(pwd)"/rx_db"

echo "Database: "$DB_PATH

echo "## installing latest release: $LATEST_RELEASE"
yum install -y $LATEST_RELEASE > /dev/null;
# run RX server with disabled logging
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb;
sleep 1;

namespaces_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_1;
CompareNamespacesLists "${namespaces_1[@]}" "${namespaces_list_expected[@]}" $server_pid;

memstats_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_1[@]}" "${memstats_expected[@]}" $server_pid;

KillAndRemoveServer $server_pid;

echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM"
yum install -y build/*.rpm > /dev/null;
reindexer_server -l0 --corelog=none --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

WaitForDB

namespaces_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_2;
CompareNamespacesLists "${namespaces_2[@]}" "${namespaces_1[@]}" $server_pid;

memstats_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_2[@]}" "${memstats_1[@]}" $server_pid;

KillAndRemoveServer $server_pid;
rm -rf $DB_PATH;
sleep 1;

echo "##### Backward compatibility test #####"

echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM"
yum install -y build/*.rpm > /dev/null;
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb;
sleep 1;

namespaces_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_3;
CompareNamespacesLists "${namespaces_3[@]}" "${namespaces_list_expected[@]}" $server_pid;

memstats_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_3[@]}" "${memstats_expected[@]}" $server_pid;

KillAndRemoveServer $server_pid;

echo "## installing latest release: $LATEST_RELEASE"
yum install -y $LATEST_RELEASE > /dev/null;
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

WaitForDB

namespaces_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_4;
CompareNamespacesLists "${namespaces_4[@]}" "${namespaces_3[@]}" $server_pid;

memstats_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_4[@]}" "${memstats_3[@]}" $server_pid;

KillAndRemoveServer $server_pid;
rm -rf $DB_PATH;
10 changes: 10 additions & 0 deletions cpp_src/core/nsselecter/explaincalc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,16 @@ std::string ExplainCalc::GetJSON() {
json.Put("postprocess_us"sv, To_us(postprocess_));
json.Put("loop_us"sv, To_us(loop_));
json.Put("general_sort_us"sv, To_us(sort_));
if (!subqueries_.empty()) {
auto subQuries = json.Array("subqueries");
for (const auto &sq : subqueries_) {
auto s = subQuries.Object();
s.Put("namespace", sq.NsName());
s.Raw("explain", sq.Explain());
std::visit(overloaded{[&](size_t k) { s.Put("keys", k); }, [&](const std::string &f) { s.Put("field", f); }},
sq.FieldOrKeys());
}
}
}
json.Put("sort_index"sv, sortIndex_);
json.Put("sort_by_uncommitted_index"sv, sortOptimization_);
Expand Down
38 changes: 30 additions & 8 deletions cpp_src/core/nsselecter/explaincalc.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <chrono>
#include <string_view>
#include <variant>
#include <vector>

#include "core/type_consts.h"
Expand All @@ -17,6 +18,24 @@ struct ConditionInjection;
typedef std::vector<JoinedSelector> JoinedSelectors;
typedef std::vector<JoinOnInjection> OnConditionInjections;

class SubQueryExplain {
public:
SubQueryExplain(const std::string& ns, std::string&& exp) : explain_{std::move(exp)}, namespace_{ns} {}
[[nodiscard]] const std::string& NsName() const& noexcept { return namespace_; }
[[nodiscard]] const auto& FieldOrKeys() const& noexcept { return fieldOrKeys_; }
[[nodiscard]] const std::string& Explain() const& noexcept { return explain_; }
void SetFieldOrKeys(std::variant<std::string, size_t>&& fok) noexcept { fieldOrKeys_ = std::move(fok); }

auto NsName() const&& = delete;
auto FieldOrKeys() const&& = delete;
auto Explain() const&& = delete;

private:
std::string explain_;
std::string namespace_;
std::variant<std::string, size_t> fieldOrKeys_{size_t(0)};
};

class ExplainCalc {
public:
typedef std::chrono::high_resolution_clock Clock;
Expand All @@ -42,11 +61,12 @@ class ExplainCalc {

void PutCount(int cnt) noexcept { count_ = cnt; }
void PutSortIndex(std::string_view index) noexcept { sortIndex_ = index; }
void PutSelectors(const SelectIteratorContainer *qres) noexcept { selectors_ = qres; }
void PutJoinedSelectors(const JoinedSelectors *jselectors) noexcept { jselectors_ = jselectors; }
void PutSelectors(const SelectIteratorContainer* qres) noexcept { selectors_ = qres; }
void PutJoinedSelectors(const JoinedSelectors* jselectors) noexcept { jselectors_ = jselectors; }
void SetPreselectTime(Duration preselectTime) noexcept { preselect_ = preselectTime; }
void PutOnConditionInjections(const OnConditionInjections *onCondInjections) noexcept { onInjections_ = onCondInjections; }
void PutOnConditionInjections(const OnConditionInjections* onCondInjections) noexcept { onInjections_ = onCondInjections; }
void SetSortOptimization(bool enable) noexcept { sortOptimization_ = enable; }
void SetSubQueriesExplains(std::vector<SubQueryExplain>&& subQueriesExpl) noexcept { subqueries_ = std::move(subQueriesExpl); }

void LogDump(int logLevel);
std::string GetJSON();
Expand All @@ -59,24 +79,26 @@ class ExplainCalc {
Duration Sort() const noexcept { return sort_; }

size_t Iterations() const noexcept { return iters_; }
static int To_us(const Duration &d) noexcept;
static int To_us(const Duration& d) noexcept;
bool IsEnabled() const noexcept { return enabled_; }

private:
Duration lap() noexcept;

time_point last_point_, sort_start_point_;
Duration total_, prepare_ = Duration::zero();
Duration total_ = Duration::zero();
Duration prepare_ = Duration::zero();
Duration preselect_ = Duration::zero();
Duration select_ = Duration::zero();
Duration postprocess_ = Duration::zero();
Duration loop_ = Duration::zero();
Duration sort_ = Duration::zero();

std::string_view sortIndex_;
const SelectIteratorContainer *selectors_ = nullptr;
const JoinedSelectors *jselectors_ = nullptr;
const OnConditionInjections *onInjections_ = nullptr; ///< Optional
const SelectIteratorContainer* selectors_ = nullptr;
const JoinedSelectors* jselectors_ = nullptr;
const OnConditionInjections* onInjections_ = nullptr; ///< Optional
std::vector<SubQueryExplain> subqueries_;

int iters_ = 0;
int count_ = 0;
Expand Down
1 change: 1 addition & 0 deletions cpp_src/core/nsselecter/nsselecter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ void NsSelecter::operator()(QueryResults &result, SelectCtx &ctx, const RdxConte

auto &explain = ctx.explain;
explain = ExplainCalc(ctx.query.GetExplain() || logLevel >= LogInfo);
explain.SetSubQueriesExplains(std::move(ctx.subQueriesExplains));
ActiveQueryScope queryScope(ctx, ns_->optimizationState_, explain, ns_->locker_.IsReadOnly(), ns_->strHolder_.get());

explain.SetPreselectTime(ctx.preResultTimeTotal);
Expand Down
1 change: 1 addition & 0 deletions cpp_src/core/nsselecter/nsselecter.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ struct SelectCtx {
const Query *parentQuery = nullptr;
ExplainCalc explain;
bool requiresCrashTracking = false;
std::vector<SubQueryExplain> subQueriesExplains;

RX_ALWAYS_INLINE bool isMergeQuerySubQuery() const noexcept { return isMergeQuery == IsMergeQuery::Yes && parentQuery; }
};
Expand Down
4 changes: 2 additions & 2 deletions cpp_src/core/query/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) {
Debug(ser.GetVarUint());
break;
case QueryStrictMode:
strictMode_ = StrictMode(ser.GetVarUint());
Strict(StrictMode(ser.GetVarUint()));
break;
case QueryLimit:
count_ = ser.GetVarUint();
Expand All @@ -315,7 +315,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) {
break;
}
case QueryExplain:
explain_ = true;
Explain(true);
break;
case QueryWithRank:
withRank_ = true;
Expand Down
5 changes: 3 additions & 2 deletions cpp_src/core/queryresults/queryresults.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,10 @@ class QueryResults {
void Erase(ItemRefVector::iterator begin, ItemRefVector::iterator end);
size_t Count() const noexcept { return items_.size(); }
size_t TotalCount() const noexcept { return totalCount; }
const std::string &GetExplainResults() const &noexcept { return explainResults; }
const std::string &GetExplainResults() const & noexcept { return explainResults; }
const std::string &GetExplainResults() const && = delete;
const std::vector<AggregationResult> &GetAggregationResults() const &noexcept { return aggregationResults; }
std::string &&MoveExplainResults() & noexcept { return std::move(explainResults); }
const std::vector<AggregationResult> &GetAggregationResults() const & noexcept { return aggregationResults; }
const std::vector<AggregationResult> &GetAggregationResults() const && = delete;
void Clear();
h_vector<std::string_view, 1> GetNamespaces() const;
Expand Down
Loading

0 comments on commit f8ba567

Please sign in to comment.