Skip to content

Commit

Permalink
[fix] Fix subquery bench
Browse files Browse the repository at this point in the history
  • Loading branch information
reindexer-bot committed Dec 24, 2023
1 parent 54027a7 commit ae2a6d3
Show file tree
Hide file tree
Showing 14 changed files with 330 additions and 25 deletions.
8 changes: 4 additions & 4 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Version 3.21.0 (15.12.2023)
## Core
- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implemented in the next releases)
- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implement in the next releases)
- [fea] Added backtraces/minidump support for Windows platform
- [fea] Added query crash tracker support for Windows platform
- [fix] Added explicit error for aggregations in joined queries
Expand All @@ -16,16 +16,16 @@

## Go connector
- [fea] Added Go API and DSL-convertor for subqueries
- [fea] Changed CJSON-to-object conversion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field
- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage at runtime
- [fea] Changed CJSON-to-object convetrion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field
- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage in runtime
- [fix] Fixed panic handling in the CJSON deserialization
- [fix] Fixed logging in `cproto`-binding. Error messages will no longer be redirected to stdout instead of user's logger

## Face
- [fea] Saved the scroll position on the sorting
- [fea] Changed the Server ID range
- [fea] Improved the notification about the supported browsers
- [fea] Added the default values to the config form when the default config is used
- [fea] Added the default values to the config form when the default config is using
- [fix] Fixed the wrong redirect to a fake database
- [fix] Fixed the column order changing on the data sorting
- [fix] Fixed the horizontal scroll on the data sorting
Expand Down
2 changes: 1 addition & 1 deletion cpp_src/cmd/reindexer_server/contrib/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM alpine:3.14 AS build
RUN cd /tmp && apk update && \
apk add git curl autoconf automake libtool linux-headers g++ make libunwind-dev grpc-dev grpc protobuf-dev c-ares-dev && \
git clone https://github.com/gperftools/gperftools.git && \
cd gperftools && \
cd gperftools && git checkout gperftools-2.13 && \
echo "noinst_PROGRAMS =" >> Makefile.am && \
sed -i s/_sigev_un\._tid/sigev_notify_thread_id/ src/profile-handler.cc && \
./autogen.sh && ./configure --disable-dependency-tracking && make -j8 && make install
Expand Down
195 changes: 195 additions & 0 deletions cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#!/bin/bash
# Task: https://github.com/restream/reindexer/-/issues/1188
set -e

function KillAndRemoveServer {
local pid=$1
kill $pid
wait $pid
yum remove -y 'reindexer*' > /dev/null
}

function WaitForDB {
# wait until DB is loaded
set +e # disable "exit on error" so the script won't stop when DB's not loaded yet
is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list');
while [[ $is_connected != "test" ]]
do
sleep 2
is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list');
done
set -e
}

function CompareNamespacesLists {
local ns_list_actual=$1
local ns_list_expected=$2
local pid=$3

diff=$(echo ${ns_list_actual[@]} ${ns_list_expected[@]} | tr ' ' '\n' | sort | uniq -u) # compare in any order
if [ "$diff" == "" ]; then
echo "## PASS: namespaces list not changed"
else
echo "##### FAIL: namespaces list was changed"
echo "expected: $ns_list_expected"
echo "actual: $ns_list_actual"
KillAndRemoveServer $pid;
exit 1
fi
}

function CompareMemstats {
local actual=$1
local expected=$2
local pid=$3
diff=$(echo ${actual[@]} ${expected[@]} | tr ' ' '\n' | sed 's/\(.*\),$/\1/' | sort | uniq -u) # compare in any order
if [ "$diff" == "" ]; then
echo "## PASS: memstats not changed"
else
echo "##### FAIL: memstats was changed"
echo "expected: $expected"
echo "actual: $actual"
KillAndRemoveServer $pid;
exit 1
fi
}


RX_SERVER_CURRENT_VERSION_RPM="$(basename build/reindexer-*server*.rpm)"
VERSION_FROM_RPM=$(echo "$RX_SERVER_CURRENT_VERSION_RPM" | grep -o '.*server-..')
VERSION=$(echo ${VERSION_FROM_RPM: -2:1}) # one-digit version

echo "## choose latest release rpm file"
if [ $VERSION == 3 ]; then
LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 3)
namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg'
elif [ $VERSION == 4 ]; then
LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 4)
# replicationstats ns added for v4
namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\n#replicationstats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg'
else
echo "Unknown version"
exit 1
fi

echo "## downloading latest release rpm file: $LATEST_RELEASE"
curl "http://repo.itv.restr.im/itv-api-ng/7/x86_64/$LATEST_RELEASE" --output $LATEST_RELEASE;
echo "## downloading example DB"
curl "https://git.restream.ru/MaksimKravchuk/reindexer_testdata/-/raw/master/big.zip" --output big.zip;
unzip -o big.zip # unzips into mydb_big.rxdump;

ADDRESS="cproto://127.0.0.1:6534/"
DB_NAME="test"

memstats_expected=$'[
{"replication":{"data_hash":24651210926,"data_count":3}},
{"replication":{"data_hash":6252344969,"data_count":1}},
{"replication":{"data_hash":37734732881,"data_count":28}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":1024095024522,"data_count":1145}},
{"replication":{"data_hash":8373644068,"data_count":1315}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":7404222244,"data_count":97}},
{"replication":{"data_hash":94132837196,"data_count":4}},
{"replication":{"data_hash":1896088071,"data_count":2}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":-672103903,"data_count":33538}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":6833710705,"data_count":1}},
{"replication":{"data_hash":5858155773472,"data_count":4500}},
{"replication":{"data_hash":-473221280268823592,"data_count":65448}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":8288213744,"data_count":3}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":354171024786967,"data_count":3941}},
{"replication":{"data_hash":-6520334670,"data_count":35886}},
{"replication":{"data_hash":112772074632,"data_count":281}},
{"replication":{"data_hash":-12679568198538,"data_count":1623116}}
]
Returned 27 rows'

echo "##### Forward compatibility test #####"

DB_PATH=$(pwd)"/rx_db"

echo "Database: "$DB_PATH

echo "## installing latest release: $LATEST_RELEASE"
yum install -y $LATEST_RELEASE > /dev/null;
# run RX server with disabled logging
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb;
sleep 1;

namespaces_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_1;
CompareNamespacesLists "${namespaces_1[@]}" "${namespaces_list_expected[@]}" $server_pid;

memstats_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_1[@]}" "${memstats_expected[@]}" $server_pid;

KillAndRemoveServer $server_pid;

echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM"
yum install -y build/*.rpm > /dev/null;
reindexer_server -l0 --corelog=none --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

WaitForDB

namespaces_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_2;
CompareNamespacesLists "${namespaces_2[@]}" "${namespaces_1[@]}" $server_pid;

memstats_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_2[@]}" "${memstats_1[@]}" $server_pid;

KillAndRemoveServer $server_pid;
rm -rf $DB_PATH;
sleep 1;

echo "##### Backward compatibility test #####"

echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM"
yum install -y build/*.rpm > /dev/null;
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb;
sleep 1;

namespaces_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_3;
CompareNamespacesLists "${namespaces_3[@]}" "${namespaces_list_expected[@]}" $server_pid;

memstats_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_3[@]}" "${memstats_expected[@]}" $server_pid;

KillAndRemoveServer $server_pid;

echo "## installing latest release: $LATEST_RELEASE"
yum install -y $LATEST_RELEASE > /dev/null;
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

WaitForDB

namespaces_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_4;
CompareNamespacesLists "${namespaces_4[@]}" "${namespaces_3[@]}" $server_pid;

memstats_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_4[@]}" "${memstats_3[@]}" $server_pid;

KillAndRemoveServer $server_pid;
rm -rf $DB_PATH;
4 changes: 2 additions & 2 deletions cpp_src/core/query/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) {
Debug(ser.GetVarUint());
break;
case QueryStrictMode:
strictMode_ = StrictMode(ser.GetVarUint());
Strict(StrictMode(ser.GetVarUint()));
break;
case QueryLimit:
count_ = ser.GetVarUint();
Expand All @@ -315,7 +315,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) {
break;
}
case QueryExplain:
explain_ = true;
Explain(true);
break;
case QueryWithRank:
withRank_ = true;
Expand Down
55 changes: 49 additions & 6 deletions cpp_src/gtests/bench/fixtures/api_tv_simple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ void ApiTvSimple::RegisterAllCases() {
Register("FromCJSONPKOnly", &ApiTvSimple::FromCJSONPKOnly, this);
Register("GetCJSON", &ApiTvSimple::GetCJSON, this);
Register("ExtractField", &ApiTvSimple::ExtractField, this);
Register("SubQueryEq", &ApiTvSimple::SubQueryEq, this);
Register("SubQuerySet", &ApiTvSimple::SubQuerySet, this);
Register("SubQueryAggregate", &ApiTvSimple::SubQueryAggregate, this);

// Those benches should be last, because they are recreating indexes cache
Register("Query4CondRangeDropCache", &ApiTvSimple::Query4CondRangeDropCache, this)->Iterations(1000);
Expand Down Expand Up @@ -197,12 +200,14 @@ reindexer::Error ApiTvSimple::Initialize() {
err = db_->Commit(stringSelectNs_);
if (!err.ok()) return err;

NamespaceDef mainNsDef{innerJoinLowSelectivityMainNs_};
NamespaceDef mainNsDef{mainNs_};
mainNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()).AddIndex("field", "hash", "int", IndexOpts());
err = db_->AddNamespace(mainNsDef);
if (!err.ok()) return err;
NamespaceDef rightNsDef{innerJoinLowSelectivityRightNs_};
rightNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()).AddIndex("field", "hash", "int", IndexOpts());
NamespaceDef rightNsDef{rightNs_};
rightNsDef.AddIndex("id", "hash", "int", IndexOpts().PK())
.AddIndex("field", "hash", "int", IndexOpts())
.AddIndex("id_tree", "tree", "int", IndexOpts());
err = db_->AddNamespace(rightNsDef);
if (!err.ok()) return err;

Expand All @@ -227,6 +232,7 @@ reindexer::Error ApiTvSimple::Initialize() {
reindexer::JsonBuilder bld2(wrSer_);
bld2.Put("id", i);
bld2.Put("field", i);
bld2.Put("id_tree", i);
bld2.End();
err = rItem.FromJSON(wrSer_.Slice());
if (!err.ok()) return err;
Expand Down Expand Up @@ -805,9 +811,9 @@ void ApiTvSimple::Query0CondInnerJoinUnlimit(benchmark::State& state) {
void ApiTvSimple::Query0CondInnerJoinUnlimitLowSelectivity(benchmark::State& state) {
AllocsTracker allocsTracker(state);
for (auto _ : state) { // NOLINT(*deadcode.DeadStores)
Query q4join(innerJoinLowSelectivityRightNs_);
Query q4join(rightNs_);
q4join.Where("id", CondLe, 250);
Query q(innerJoinLowSelectivityMainNs_);
Query q(mainNs_);
q.InnerJoin("id", "id", CondEq, std::move(q4join)).ReqTotal();

QueryResults qres;
Expand All @@ -816,6 +822,43 @@ void ApiTvSimple::Query0CondInnerJoinUnlimitLowSelectivity(benchmark::State& sta
}
}

void ApiTvSimple::SubQueryEq(benchmark::State& state) {
AllocsTracker allocsTracker(state);
for (auto _ : state) { // NOLINT(*deadcode.DeadStores)
Query q = Query(mainNs_).Where(
"id", CondEq, Query(rightNs_).Select({"field"}).Where("id", CondEq, VariantArray::Create(int(rand() % kTotalItemsMainJoinNs))));
QueryResults qres;
auto err = db_->Select(q, qres);
if (!err.ok()) state.SkipWithError(err.what().c_str());
}
}

void ApiTvSimple::SubQuerySet(benchmark::State& state) {
AllocsTracker allocsTracker(state);
for (auto _ : state) { // NOLINT(*deadcode.DeadStores)
const int rangeMin = rand() % (kTotalItemsMainJoinNs - 500);
Query q = Query(mainNs_).Where(
"id", CondSet, Query(rightNs_).Select({"id"}).Where("id_tree", CondRange, VariantArray::Create(rangeMin, rangeMin + 500)));
QueryResults qres;
auto err = db_->Select(q, qres);
if (!err.ok()) state.SkipWithError(err.what().c_str());
}
}

void ApiTvSimple::SubQueryAggregate(benchmark::State& state) {
AllocsTracker allocsTracker(state);
for (auto _ : state) { // NOLINT(*deadcode.DeadStores)
Query q = Query(mainNs_).Where("id", CondEq,
Query(rightNs_)
.Aggregate(AggAvg, {"id"})
.Where("id", CondLt, VariantArray::Create(int(rand() % kTotalItemsMainJoinNs)))
.Limit(500));
QueryResults qres;
auto err = db_->Select(q, qres);
if (!err.ok()) state.SkipWithError(err.what().c_str());
}
}

void ApiTvSimple::Query2CondInnerJoin(benchmark::State& state) {
AllocsTracker allocsTracker(state);
for (auto _ : state) { // NOLINT(*deadcode.DeadStores)
Expand Down Expand Up @@ -1202,7 +1245,7 @@ void ApiTvSimple::query2CondIdSet(benchmark::State& state, const std::vector<std
AllocsTracker allocsTracker(state);
unsigned counter = 0;
for (auto _ : state) { // NOLINT(*deadcode.DeadStores)
Query q(innerJoinLowSelectivityRightNs_);
Query q(rightNs_);
q.Where("id", CondSet, idsets[counter++ % idsets.size()]).Where("field", CondGt, int(kTotalItemsMainJoinNs / 2)).Limit(20);

QueryResults qres;
Expand Down
7 changes: 5 additions & 2 deletions cpp_src/gtests/bench/fixtures/api_tv_simple.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ class ApiTvSimple : private BaseFixture {
void Query4CondRangeDropCache(State& state);
void Query4CondRangeDropCacheTotal(State& state);
void Query4CondRangeDropCacheCachedTotal(State& state);
void SubQueryEq(State&);
void SubQuerySet(State&);
void SubQueryAggregate(State&);

void query2CondIdSet(State& state, const std::vector<std::vector<int>>& idsets);
reindexer::Error prepareCJsonBench();
Expand All @@ -147,8 +150,8 @@ class ApiTvSimple : private BaseFixture {
std::unordered_map<unsigned, std::vector<std::vector<int>>> idsets_;
reindexer::WrSerializer wrSer_;
std::string stringSelectNs_{"string_select_ns"};
std::string innerJoinLowSelectivityMainNs_{"inner_join_low_selectivity_main_ns"};
std::string innerJoinLowSelectivityRightNs_{"inner_join_low_selectivity_right_ns"};
std::string mainNs_{"main_ns"};
std::string rightNs_{"right_ns"};
std::string cjsonNsName_{"cjson_ns_name"};
std::unique_ptr<reindexer::Item> itemForCjsonBench_;
std::vector<std::string> fieldsToExtract_;
Expand Down
9 changes: 9 additions & 0 deletions cpp_src/gtests/tests/fixtures/queries_verifier.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
#pragma once

#include <gtest/gtest.h>

#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#include <regex>
#pragma GCC diagnostic pop
#else // REINDEX_WITH_ASAN
#include <regex>
#endif // REINDEX_WITH_ASAN

#include <unordered_map>
#include "core/nsselecter/joinedselectormock.h"
#include "core/nsselecter/sortexpression.h"
Expand Down
Loading

0 comments on commit ae2a6d3

Please sign in to comment.