Skip to content

Commit

Permalink
Merge branch 'qa/1618_m1_bench' into 'develop'
Browse files Browse the repository at this point in the history
#1618 add m1 bench

See merge request itv-backend/reindexer!1488
  • Loading branch information
reindexer-bot committed Jan 9, 2024
1 parent 54027a7 commit 0740060
Show file tree
Hide file tree
Showing 29 changed files with 697 additions and 66 deletions.
8 changes: 4 additions & 4 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Version 3.21.0 (15.12.2023)
## Core
- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implemented in the next releases)
- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implement in the next releases)
- [fea] Added backtraces/minidump support for Windows platform
- [fea] Added query crash tracker support for Windows platform
- [fix] Added explicit error for aggregations in joined queries
Expand All @@ -16,16 +16,16 @@

## Go connector
- [fea] Added Go API and DSL-convertor for subqueries
- [fea] Changed CJSON-to-object conversion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field
- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage at runtime
- [fea] Changed CJSON-to-object convetrion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field
- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage in runtime
- [fix] Fixed panic handling in the CJSON deserialization
- [fix] Fixed logging in `cproto`-binding. Error messages will no longer be redirected to stdout instead of user's logger

## Face
- [fea] Saved the scroll position on the sorting
- [fea] Changed the Server ID range
- [fea] Improved the notification about the supported browsers
- [fea] Added the default values to the config form when the default config is used
- [fea] Added the default values to the config form when the default config is using
- [fix] Fixed the wrong redirect to a fake database
- [fix] Fixed the column order changing on the data sorting
- [fix] Fixed the horizontal scroll on the data sorting
Expand Down
2 changes: 1 addition & 1 deletion cpp_src/cmd/reindexer_server/contrib/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM alpine:3.14 AS build
RUN cd /tmp && apk update && \
apk add git curl autoconf automake libtool linux-headers g++ make libunwind-dev grpc-dev grpc protobuf-dev c-ares-dev && \
git clone https://github.com/gperftools/gperftools.git && \
cd gperftools && \
cd gperftools && git checkout gperftools-2.13 && \
echo "noinst_PROGRAMS =" >> Makefile.am && \
sed -i s/_sigev_un\._tid/sigev_notify_thread_id/ src/profile-handler.cc && \
./autogen.sh && ./configure --disable-dependency-tracking && make -j8 && make install
Expand Down
195 changes: 195 additions & 0 deletions cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#!/bin/bash
# Task: https://github.com/restream/reindexer/-/issues/1188
set -e

function KillAndRemoveServer {
local pid=$1
kill $pid
wait $pid
yum remove -y 'reindexer*' > /dev/null
}

function WaitForDB {
# wait until DB is loaded
set +e # disable "exit on error" so the script won't stop when DB's not loaded yet
is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list');
while [[ $is_connected != "test" ]]
do
sleep 2
is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list');
done
set -e
}

function CompareNamespacesLists {
local ns_list_actual=$1
local ns_list_expected=$2
local pid=$3

diff=$(echo ${ns_list_actual[@]} ${ns_list_expected[@]} | tr ' ' '\n' | sort | uniq -u) # compare in any order
if [ "$diff" == "" ]; then
echo "## PASS: namespaces list not changed"
else
echo "##### FAIL: namespaces list was changed"
echo "expected: $ns_list_expected"
echo "actual: $ns_list_actual"
KillAndRemoveServer $pid;
exit 1
fi
}

function CompareMemstats {
local actual=$1
local expected=$2
local pid=$3
diff=$(echo ${actual[@]} ${expected[@]} | tr ' ' '\n' | sed 's/\(.*\),$/\1/' | sort | uniq -u) # compare in any order
if [ "$diff" == "" ]; then
echo "## PASS: memstats not changed"
else
echo "##### FAIL: memstats was changed"
echo "expected: $expected"
echo "actual: $actual"
KillAndRemoveServer $pid;
exit 1
fi
}


RX_SERVER_CURRENT_VERSION_RPM="$(basename build/reindexer-*server*.rpm)"
VERSION_FROM_RPM=$(echo "$RX_SERVER_CURRENT_VERSION_RPM" | grep -o '.*server-..')
VERSION=$(echo ${VERSION_FROM_RPM: -2:1}) # one-digit version

echo "## choose latest release rpm file"
if [ $VERSION == 3 ]; then
LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 3)
namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg'
elif [ $VERSION == 4 ]; then
LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 4)
# replicationstats ns added for v4
namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\n#replicationstats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg'
else
echo "Unknown version"
exit 1
fi

echo "## downloading latest release rpm file: $LATEST_RELEASE"
curl "http://repo.itv.restr.im/itv-api-ng/7/x86_64/$LATEST_RELEASE" --output $LATEST_RELEASE;
echo "## downloading example DB"
curl "https://git.restream.ru/MaksimKravchuk/reindexer_testdata/-/raw/master/big.zip" --output big.zip;
unzip -o big.zip # unzips into mydb_big.rxdump;

ADDRESS="cproto://127.0.0.1:6534/"
DB_NAME="test"

memstats_expected=$'[
{"replication":{"data_hash":24651210926,"data_count":3}},
{"replication":{"data_hash":6252344969,"data_count":1}},
{"replication":{"data_hash":37734732881,"data_count":28}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":1024095024522,"data_count":1145}},
{"replication":{"data_hash":8373644068,"data_count":1315}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":7404222244,"data_count":97}},
{"replication":{"data_hash":94132837196,"data_count":4}},
{"replication":{"data_hash":1896088071,"data_count":2}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":-672103903,"data_count":33538}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":6833710705,"data_count":1}},
{"replication":{"data_hash":5858155773472,"data_count":4500}},
{"replication":{"data_hash":-473221280268823592,"data_count":65448}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":8288213744,"data_count":3}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":0,"data_count":0}},
{"replication":{"data_hash":354171024786967,"data_count":3941}},
{"replication":{"data_hash":-6520334670,"data_count":35886}},
{"replication":{"data_hash":112772074632,"data_count":281}},
{"replication":{"data_hash":-12679568198538,"data_count":1623116}}
]
Returned 27 rows'

echo "##### Forward compatibility test #####"

DB_PATH=$(pwd)"/rx_db"

echo "Database: "$DB_PATH

echo "## installing latest release: $LATEST_RELEASE"
yum install -y $LATEST_RELEASE > /dev/null;
# run RX server with disabled logging
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb;
sleep 1;

namespaces_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_1;
CompareNamespacesLists "${namespaces_1[@]}" "${namespaces_list_expected[@]}" $server_pid;

memstats_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_1[@]}" "${memstats_expected[@]}" $server_pid;

KillAndRemoveServer $server_pid;

echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM"
yum install -y build/*.rpm > /dev/null;
reindexer_server -l0 --corelog=none --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

WaitForDB

namespaces_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_2;
CompareNamespacesLists "${namespaces_2[@]}" "${namespaces_1[@]}" $server_pid;

memstats_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_2[@]}" "${memstats_1[@]}" $server_pid;

KillAndRemoveServer $server_pid;
rm -rf $DB_PATH;
sleep 1;

echo "##### Backward compatibility test #####"

echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM"
yum install -y build/*.rpm > /dev/null;
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb;
sleep 1;

namespaces_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_3;
CompareNamespacesLists "${namespaces_3[@]}" "${namespaces_list_expected[@]}" $server_pid;

memstats_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_3[@]}" "${memstats_expected[@]}" $server_pid;

KillAndRemoveServer $server_pid;

echo "## installing latest release: $LATEST_RELEASE"
yum install -y $LATEST_RELEASE > /dev/null;
reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH &
server_pid=$!
sleep 2;

WaitForDB

namespaces_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list');
echo $namespaces_4;
CompareNamespacesLists "${namespaces_4[@]}" "${namespaces_3[@]}" $server_pid;

memstats_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats');
CompareMemstats "${memstats_4[@]}" "${memstats_3[@]}" $server_pid;

KillAndRemoveServer $server_pid;
rm -rf $DB_PATH;
4 changes: 2 additions & 2 deletions cpp_src/core/idsetcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ T &operator<<(T &os, const IdSetCacheVal &v) {
}

struct equal_idset_cache_key {
bool operator()(const IdSetCacheKey &lhs, const IdSetCacheKey &rhs) const {
bool operator()(const IdSetCacheKey &lhs, const IdSetCacheKey &rhs) const noexcept {
return lhs.cond == rhs.cond && lhs.sort == rhs.sort && *lhs.keys == *rhs.keys;
}
};
struct hash_idset_cache_key {
size_t operator()(const IdSetCacheKey &s) const { return (s.cond << 8) ^ (s.sort << 16) ^ s.keys->Hash(); }
size_t operator()(const IdSetCacheKey &s) const noexcept { return (size_t(s.cond) << 8) ^ (size_t(s.sort) << 16) ^ s.keys->Hash(); }
};

using IdSetCacheBase = LRUCache<IdSetCacheKey, IdSetCacheVal, hash_idset_cache_key, equal_idset_cache_key>;
Expand Down
30 changes: 23 additions & 7 deletions cpp_src/core/index/indexordered.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,34 +60,41 @@ SelectKeyResults IndexOrdered<T>::SelectKey(const VariantArray &keys, CondType c
auto startIt = this->idx_map.begin();
auto endIt = this->idx_map.end();
auto key1 = *keys.begin();
std::array<typename T::iterator, 2> cacheKeyIts;
unsigned cacheKeyItsCnt = 0;

switch (condition) {
case CondLt:
endIt = this->idx_map.lower_bound(static_cast<ref_type>(key1));
cacheKeyIts[0] = endIt = this->idx_map.lower_bound(static_cast<ref_type>(key1));
cacheKeyItsCnt = 1;
break;
case CondLe:
endIt = this->idx_map.lower_bound(static_cast<ref_type>(key1));
cacheKeyIts[0] = endIt = this->idx_map.lower_bound(static_cast<ref_type>(key1));
cacheKeyItsCnt = 1;
if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast<ref_type>(key1), endIt->first)) endIt++;
break;
case CondGt:
startIt = this->idx_map.upper_bound(static_cast<ref_type>(key1));
cacheKeyIts[0] = startIt = this->idx_map.upper_bound(static_cast<ref_type>(key1));
cacheKeyItsCnt = 1;
break;
case CondGe:
startIt = this->idx_map.find(static_cast<ref_type>(key1));
cacheKeyIts[0] = startIt = this->idx_map.find(static_cast<ref_type>(key1));
cacheKeyItsCnt = 1;
if (startIt == this->idx_map.end()) startIt = this->idx_map.upper_bound(static_cast<ref_type>(key1));
break;
case CondRange: {
const auto &key2 = keys[1];

startIt = this->idx_map.find(static_cast<ref_type>(key1));
cacheKeyIts[0] = startIt = this->idx_map.find(static_cast<ref_type>(key1));
if (startIt == this->idx_map.end()) startIt = this->idx_map.upper_bound(static_cast<ref_type>(key1));

endIt = this->idx_map.lower_bound(static_cast<ref_type>(key2));
cacheKeyIts[1] = endIt = this->idx_map.lower_bound(static_cast<ref_type>(key2));
if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast<ref_type>(key2), endIt->first)) endIt++;

if (endIt != this->idx_map.end() && this->idx_map.key_comp()(endIt->first, static_cast<ref_type>(key1))) {
return SelectKeyResults(std::move(res));
}
cacheKeyItsCnt = 2;

} break;
case CondAny:
Expand Down Expand Up @@ -145,7 +152,16 @@ SelectKeyResults IndexOrdered<T>::SelectKey(const VariantArray &keys, CondType c
};

if (count > 1 && !opts.distinct && !opts.disableIdSetCache) {
this->tryIdsetCache(keys, condition, sortId, std::move(selector), res);
assertrx_throw(cacheKeyItsCnt == 1 || cacheKeyItsCnt == 2);
// Using btree node pointers instead of the real values from the condition
VariantArray cacheKeys;
for (unsigned i = 0; i < cacheKeyItsCnt; ++i) {
auto &cacheKeyIt = cacheKeyIts[i];
const int64_t ptrVal =
(cacheKeyIt == this->idx_map.end()) ? std::numeric_limits<int64_t>::max() : int64_t(&(*cacheKeyIt));
cacheKeys.emplace_back(ptrVal);
}
this->tryIdsetCache(cacheKeys, condition, sortId, std::move(selector), res);
} else {
size_t idsCount;
selector(res, idsCount);
Expand Down
4 changes: 3 additions & 1 deletion cpp_src/core/index/indexunordered.cc
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ void IndexUnordered<T>::Delete(const Variant &key, IdType id, StringsHolder &str
}
}

// WARNING: 'keys' is a key for LRUCache and in some cases (for ordered indexes, for example) can contain values,
// which are not correspond to the initial values from queries conditions
template <typename T>
bool IndexUnordered<T>::tryIdsetCache(const VariantArray &keys, CondType condition, SortType sortId,
const std::function<bool(SelectKeyResult &, size_t &)> &selector, SelectKeyResult &res) {
Expand All @@ -271,7 +273,7 @@ bool IndexUnordered<T>::tryIdsetCache(const VariantArray &keys, CondType conditi
cache_->Put(ckey, res.MergeIdsets(res.deferedExplicitSort, idsCount));
}
} else {
res.push_back(SingleSelectKeyResult(cached.val.ids));
res.emplace_back(std::move(cached.val.ids));
}
} else {
scanWin = selector(res, idsCount);
Expand Down
10 changes: 10 additions & 0 deletions cpp_src/core/nsselecter/explaincalc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,16 @@ std::string ExplainCalc::GetJSON() {
json.Put("postprocess_us"sv, To_us(postprocess_));
json.Put("loop_us"sv, To_us(loop_));
json.Put("general_sort_us"sv, To_us(sort_));
if (!subqueries_.empty()) {
auto subQuries = json.Array("subqueries");
for (const auto &sq : subqueries_) {
auto s = subQuries.Object();
s.Put("namespace", sq.NsName());
s.Raw("explain", sq.Explain());
std::visit(overloaded{[&](size_t k) { s.Put("keys", k); }, [&](const std::string &f) { s.Put("field", f); }},
sq.FieldOrKeys());
}
}
}
json.Put("sort_index"sv, sortIndex_);
json.Put("sort_by_uncommitted_index"sv, sortOptimization_);
Expand Down
Loading

0 comments on commit 0740060

Please sign in to comment.