diff --git a/bindings/consts.go b/bindings/consts.go index f30640c6e..8c5423746 100644 --- a/bindings/consts.go +++ b/bindings/consts.go @@ -2,7 +2,7 @@ package bindings const CInt32Max = int(^uint32(0) >> 1) -const ReindexerVersion = "v3.21.0" +const ReindexerVersion = "v3.22.0" // public go consts from type_consts.h and reindexer_ctypes.h const ( diff --git a/changelog.md b/changelog.md index 9147eb029..86e80c69d 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,46 @@ +# Version 3.22.0 (XX.02.2024) +## Core +- [fea] Added `explain` results for the [subqueries](readme.md#subqueries-nested-queries) +- [fea] Added support for limit/offset in `Delete` and `Update` queries +- [fea] Optimized ordered indexes' cache logic to achive more cache hits and more compact cache size +- [fea] Added support for `COUNT_CACHED(*)`/`CachedTotal()` aggregation in the queries with INNER JOINS. Now it's possible to cache total count results for such queries +- [fix] Fixed SQL parsing for combintaions of the [subqueries](readme.md#subqueries-nested-queries) and other conditions in the main query +- [fix] Fixed [select functions](fulltext.md#using-select-functions) with '.' delimiter. Previously those functios were actually expected '=' as a delimiter + +## Fulltext +- [fea] Reworked logic for the stop-words. [More details](fulltext.md#stopwords-details) +- [fea] Added config for the base ranking algorithm. Check `bm25_config` field in the [fulltext settings](fulltext.md#base-config-parameters) + +## Replication +- [fea] Fixed sync logic to allow runtime server ID changing + +## Reindexer server +- [fix] Fixed SQL suggests for subqueries and some kinds of the JOIN-queries + +## Docker +- [fea] Base docker image was updated to alpine 3.19 + +## Build +- [fix] Fixed dependencies and build for alpine 3.15+ +- [fix] Fixed Release build with GCC 13.2 + +## Face +- [fea] Added the subqueries field to the explain mode +- [fea] Upgraded the Webpack to 5.х +- [fea] Added the default values to the NS config during the mode changing +- [fix] Fixed the message about the outdated browser version after Chrome upgraded to v120. +- [fix] Fixed the settings panel layout on the Performance page, which was overlapped by the message about the outdated browser version +- [fix] Fixed the table columns auto resizing +- [fix] Fixed the table header width issue that appeared on the table resizing +- [fix] Fixed the table layout that crashed on scrolling +- [fix] Fixed the empty space between the last NS and the Total section on the Memory page +- [fix] Fixed the title changing on the NS page during a new NS creating +- [fix] Fixed the tooltip position in the sidebar menu +- [fix] Fixed the "+" button for the Expire after field + # Version 3.21.0 (15.12.2023) ## Core -- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implemented in the next releases) +- [fea] Added [subqueries](readme.md#subqueries-nested-queries) support (`explain` for subqueries will be implement in the next releases) - [fea] Added backtraces/minidump support for Windows platform - [fea] Added query crash tracker support for Windows platform - [fix] Added explicit error for aggregations in joined queries @@ -16,8 +56,8 @@ ## Go connector - [fea] Added Go API and DSL-convertor for subqueries -- [fea] Changed CJSON-to-object conversion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field -- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage at runtime +- [fea] Changed CJSON-to-object convetrion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field +- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage in runtime - [fix] Fixed panic handling in the CJSON deserialization - [fix] Fixed logging in `cproto`-binding. Error messages will no longer be redirected to stdout instead of user's logger @@ -25,7 +65,7 @@ - [fea] Saved the scroll position on the sorting - [fea] Changed the Server ID range - [fea] Improved the notification about the supported browsers -- [fea] Added the default values to the config form when the default config is used +- [fea] Added the default values to the config form when the default config is using - [fix] Fixed the wrong redirect to a fake database - [fix] Fixed the column order changing on the data sorting - [fix] Fixed the horizontal scroll on the data sorting diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index a500f4e78..d60ebefc5 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -29,7 +29,8 @@ option (ENABLE_TCMALLOC "Enable tcmalloc extensions" ON) option (ENABLE_JEMALLOC "Enable jemalloc extensions" ON) option (ENABLE_ROCKSDB "Enable rocksdb storage" ON) option (ENABLE_GRPC "Enable GRPC service" OFF) -option (ENABLE_SSE "Enable SSE instructions" ON) +option(ENABLE_SSE "Enable SSE instructions" ON) +option(ENABLE_SERVER_AS_PROCESS_IN_TEST "Enable server as process" OFF) if (NOT GRPC_PACKAGE_PROVIDER) @@ -42,7 +43,7 @@ else() option (LINK_RESOURCES "Link web resources as binary data" ON) endif() -set (REINDEXER_VERSION_DEFAULT "3.21.0") +set (REINDEXER_VERSION_DEFAULT "3.22.0") if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "RelWithDebInfo") @@ -731,7 +732,7 @@ if (NOT WIN32) SET(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "server") SET(DIST_INCLUDE_FILES "tools/errors.h" "tools/serializer.h" "tools/varint.h" "tools/stringstools.h" "tools/customhash.h" "tools/assertrx.h" "tools/jsonstring.h" - "tools/verifying_updater.h" + "tools/verifying_updater.h" "tools/customlocal.h" "core/reindexer.h" "core/type_consts.h" "core/item.h" "core/payload/payloadvalue.h" "core/payload/payloadiface.h" "core/indexopts.h" "core/namespacedef.h" "core/keyvalue/variant.h" "core/keyvalue/geometry.h" "core/sortingprioritiestable.h" "core/rdxcontext.h" "core/activity_context.h" "core/type_consts_helpers.h" "core/payload/fieldsset.h" "core/payload/payloadtype.h" diff --git a/cpp_src/client/coroqueryresults.cc b/cpp_src/client/coroqueryresults.cc index ee3c1649a..4c0360cee 100644 --- a/cpp_src/client/coroqueryresults.cc +++ b/cpp_src/client/coroqueryresults.cc @@ -53,11 +53,19 @@ void CoroQueryResults::Bind(std::string_view rawResult, RPCQrId id) { PayloadType("tmp").clone()->deserialize(ser); }, ResultSerializer::AggsFlag::ClearAggregations); + + auto copyStart = rawResult.begin() + ser.Pos(); + if (const auto rawResLen = std::distance(copyStart, rawResult.end()); rx_unlikely(rawResLen > int64_t(RawResBufT::max_size()))) { + throw Error( + errLogic, + "client::QueryResults::Bind: rawResult buffer overflow. Max size if %d bytes, but %d bytes requested. Try to reduce " + "fetch limit (current limit is %d)", + RawResBufT::max_size(), rawResLen, fetchAmount_); + } + rawResult_.assign(copyStart, rawResult.end()); } catch (const Error &err) { status_ = err; } - - rawResult_.assign(rawResult.begin() + ser.Pos(), rawResult.end()); } void CoroQueryResults::fetchNextResults() { @@ -79,7 +87,14 @@ void CoroQueryResults::fetchNextResults() { ser.GetRawQueryParams(queryParams_, nullptr, ResultSerializer::AggsFlag::DontClearAggregations); - rawResult_.assign(rawResult.begin() + ser.Pos(), rawResult.end()); + auto copyStart = rawResult.begin() + ser.Pos(); + if (const auto rawResLen = std::distance(copyStart, rawResult.end()); rx_unlikely(rawResLen > int64_t(RawResBufT::max_size()))) { + throw Error(errLogic, + "client::QueryResults::fetchNextResults: rawResult buffer overflow. Max size if %d bytes, but %d bytes requested. Try " + "to reduce fetch limit (current limit is %d)", + RawResBufT::max_size(), rawResLen, fetchAmount_); + } + rawResult_.assign(copyStart, rawResult.end()); } h_vector CoroQueryResults::GetNamespaces() const { diff --git a/cpp_src/client/coroqueryresults.h b/cpp_src/client/coroqueryresults.h index c8b43f15c..d791ae426 100644 --- a/cpp_src/client/coroqueryresults.h +++ b/cpp_src/client/coroqueryresults.h @@ -72,6 +72,7 @@ class CoroQueryResults { friend class RPCClient; friend class CoroRPCClient; friend class RPCClientMock; + using RawResBufT = h_vector; CoroQueryResults(net::cproto::CoroClientConnection* conn, NsArray&& nsArray, int fetchFlags, int fetchAmount, seconds timeout); CoroQueryResults(net::cproto::CoroClientConnection* conn, NsArray&& nsArray, std::string_view rawResult, RPCQrId id, int fetchFlags, int fetchAmount, seconds timeout); @@ -81,7 +82,7 @@ class CoroQueryResults { net::cproto::CoroClientConnection* conn_; NsArray nsArray_; - h_vector rawResult_; + RawResBufT rawResult_; RPCQrId queryID_; int fetchOffset_; int fetchFlags_; diff --git a/cpp_src/client/itemimpl.cc b/cpp_src/client/itemimpl.cc index ef15c097d..39217dabd 100644 --- a/cpp_src/client/itemimpl.cc +++ b/cpp_src/client/itemimpl.cc @@ -54,7 +54,7 @@ void ItemImpl::FromCJSON(std::string_view slice) { throw Error(errParseJson, "Internal error - left unparsed data %d", rdser.Pos()); } tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, Variant(p_string(&tupleData_))); + pl.Set(0, Variant(p_string(&tupleData_), Variant::no_hold_t{})); } Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool /*pkOnly*/) { @@ -88,7 +88,7 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool /*pkOnly*/) { if (err.ok()) { // Put tuple to field[0] tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, Variant(p_string(&tupleData_))); + pl.Set(0, Variant(p_string(&tupleData_), Variant::no_hold_t{})); ser_ = WrSerializer(); } return err; @@ -102,7 +102,7 @@ Error ItemImpl::FromMsgPack(std::string_view buf, size_t &offset) { Error err = decoder.Decode(buf, pl, ser_, offset); if (err.ok()) { tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, Variant(p_string(&tupleData_))); + pl.Set(0, Variant(p_string(&tupleData_), Variant::no_hold_t{})); } return err; } diff --git a/cpp_src/cmd/reindexer_server/contrib/Dockerfile b/cpp_src/cmd/reindexer_server/contrib/Dockerfile index 8eb80a77c..0da240cb0 100644 --- a/cpp_src/cmd/reindexer_server/contrib/Dockerfile +++ b/cpp_src/cmd/reindexer_server/contrib/Dockerfile @@ -1,12 +1,11 @@ -FROM alpine:3.14 AS build +FROM alpine:3.19 AS build RUN cd /tmp && apk update && \ - apk add git curl autoconf automake libtool linux-headers g++ make libunwind-dev grpc-dev grpc protobuf-dev c-ares-dev && \ + apk add git curl autoconf automake libtool linux-headers g++ make libunwind-dev grpc-dev protobuf-dev c-ares-dev patch && \ git clone https://github.com/gperftools/gperftools.git && \ cd gperftools && \ - echo "noinst_PROGRAMS =" >> Makefile.am && \ sed -i s/_sigev_un\._tid/sigev_notify_thread_id/ src/profile-handler.cc && \ - ./autogen.sh && ./configure --disable-dependency-tracking && make -j8 && make install + ./autogen.sh && ./configure --disable-dependency-tracking && make -j8 && make install ADD . /src @@ -19,17 +18,16 @@ RUN ./dependencies.sh && \ make -j8 reindexer_server reindexer_tool && \ make install -C cpp_src/cmd/reindexer_server && \ make install -C cpp_src/cmd/reindexer_tool && \ + make install -C cpp_src/server/grpc && \ cp ../cpp_src/cmd/reindexer_server/contrib/entrypoint.sh /entrypoint.sh && \ rm -rf /usr/local/lib/*.a /usr/local/include /usr/local/lib/libtcmalloc_debug* /usr/local/lib/libtcmalloc_minimal* \ - /usr/local/lib/libprofiler* /usr/local/lib/libtcmalloc.* /usr/local/share/doc /usr/local/share/man /usr/local/share/perl5 /usr/local/bin/pprof* + /usr/local/lib/libprofiler* /usr/local/lib/libtcmalloc.* /usr/local/share/doc /usr/local/share/man /usr/local/share/perl5 /usr/local/bin/pprof* -RUN cd build && make install -C cpp_src/server/grpc - -FROM alpine:3.14 +FROM alpine:3.19 COPY --from=build /usr/local /usr/local COPY --from=build /entrypoint.sh /entrypoint.sh -RUN apk update && apk add libstdc++ libunwind snappy libexecinfo leveldb c-ares libprotobuf xz-libs && rm -rf /var/cache/apk/* +RUN apk update && apk add libstdc++ libunwind snappy leveldb c-ares libprotobuf xz-libs grpc-cpp && rm -rf /var/cache/apk/* ENV RX_DATABASE /db ENV RX_CORELOG stdout diff --git a/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh new file mode 100755 index 000000000..d189d3841 --- /dev/null +++ b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh @@ -0,0 +1,195 @@ +#!/bin/bash +# Task: https://github.com/restream/reindexer/-/issues/1188 +set -e + +function KillAndRemoveServer { + local pid=$1 + kill $pid + wait $pid + yum remove -y 'reindexer*' > /dev/null +} + +function WaitForDB { + # wait until DB is loaded + set +e # disable "exit on error" so the script won't stop when DB's not loaded yet + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + while [[ $is_connected != "test" ]] + do + sleep 2 + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + done + set -e +} + +function CompareNamespacesLists { + local ns_list_actual=$1 + local ns_list_expected=$2 + local pid=$3 + + diff=$(echo ${ns_list_actual[@]} ${ns_list_expected[@]} | tr ' ' '\n' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: namespaces list not changed" + else + echo "##### FAIL: namespaces list was changed" + echo "expected: $ns_list_expected" + echo "actual: $ns_list_actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + +function CompareMemstats { + local actual=$1 + local expected=$2 + local pid=$3 + diff=$(echo ${actual[@]} ${expected[@]} | tr ' ' '\n' | sed 's/\(.*\),$/\1/' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: memstats not changed" + else + echo "##### FAIL: memstats was changed" + echo "expected: $expected" + echo "actual: $actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + + +RX_SERVER_CURRENT_VERSION_RPM="$(basename build/reindexer-*server*.rpm)" +VERSION_FROM_RPM=$(echo "$RX_SERVER_CURRENT_VERSION_RPM" | grep -o '.*server-..') +VERSION=$(echo ${VERSION_FROM_RPM: -2:1}) # one-digit version + +echo "## choose latest release rpm file" +if [ $VERSION == 3 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 3) + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +elif [ $VERSION == 4 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 4) + # replicationstats ns added for v4 + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\n#replicationstats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +else + echo "Unknown version" + exit 1 +fi + +echo "## downloading latest release rpm file: $LATEST_RELEASE" +curl "http://repo.itv.restr.im/itv-api-ng/7/x86_64/$LATEST_RELEASE" --output $LATEST_RELEASE; +echo "## downloading example DB" +curl "https://git.restream.ru/MaksimKravchuk/reindexer_testdata/-/raw/master/big.zip" --output big.zip; +unzip -o big.zip # unzips into mydb_big.rxdump; + +ADDRESS="cproto://127.0.0.1:6534/" +DB_NAME="test" + +memstats_expected=$'[ +{"replication":{"data_hash":24651210926,"data_count":3}}, +{"replication":{"data_hash":6252344969,"data_count":1}}, +{"replication":{"data_hash":37734732881,"data_count":28}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":1024095024522,"data_count":1145}}, +{"replication":{"data_hash":8373644068,"data_count":1315}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":7404222244,"data_count":97}}, +{"replication":{"data_hash":94132837196,"data_count":4}}, +{"replication":{"data_hash":1896088071,"data_count":2}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":-672103903,"data_count":33538}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":6833710705,"data_count":1}}, +{"replication":{"data_hash":5858155773472,"data_count":4500}}, +{"replication":{"data_hash":-473221280268823592,"data_count":65448}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":8288213744,"data_count":3}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":354171024786967,"data_count":3941}}, +{"replication":{"data_hash":-6520334670,"data_count":35886}}, +{"replication":{"data_hash":112772074632,"data_count":281}}, +{"replication":{"data_hash":-12679568198538,"data_count":1623116}} +] +Returned 27 rows' + +echo "##### Forward compatibility test #####" + +DB_PATH=$(pwd)"/rx_db" + +echo "Database: "$DB_PATH + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +# run RX server with disabled logging +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb; +sleep 1; + +namespaces_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_1; +CompareNamespacesLists "${namespaces_1[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_1[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l0 --corelog=none --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_2; +CompareNamespacesLists "${namespaces_2[@]}" "${namespaces_1[@]}" $server_pid; + +memstats_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_2[@]}" "${memstats_1[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; +sleep 1; + +echo "##### Backward compatibility test #####" + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb; +sleep 1; + +namespaces_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_3; +CompareNamespacesLists "${namespaces_3[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_3[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_4; +CompareNamespacesLists "${namespaces_4[@]}" "${namespaces_3[@]}" $server_pid; + +memstats_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_4[@]}" "${memstats_3[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; diff --git a/cpp_src/core/cjson/cjsondecoder.cc b/cpp_src/core/cjson/cjsondecoder.cc index 53a5162cf..7f6e84571 100644 --- a/cpp_src/core/cjson/cjsondecoder.cc +++ b/cpp_src/core/cjson/cjsondecoder.cc @@ -127,7 +127,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs [[nodiscard]] Variant CJsonDecoder::cjsonValueToVariant(TagType tagType, Serializer &rdser, KeyValueType fieldType) { if (fieldType.Is() && tagType != TagType::TAG_STRING) { storage_.emplace_back(rdser.GetRawVariant(KeyValueType{tagType}).As()); - return Variant(p_string(&storage_.back()), false); + return Variant(p_string(&storage_.back()), Variant::no_hold_t{}); } else { return reindexer::cjsonValueToVariant(tagType, rdser, fieldType); } diff --git a/cpp_src/core/cjson/cjsonmodifier.cc b/cpp_src/core/cjson/cjsonmodifier.cc index 8409dbeeb..c8474cc54 100644 --- a/cpp_src/core/cjson/cjsonmodifier.cc +++ b/cpp_src/core/cjson/cjsonmodifier.cc @@ -1,5 +1,4 @@ #include "cjsonmodifier.h" -#include "core/keyvalue/p_string.h" #include "core/type_consts_helpers.h" #include "jsondecoder.h" #include "tagsmatcher.h" diff --git a/cpp_src/core/ft/bm25.h b/cpp_src/core/ft/bm25.h index 54f928c3d..b20e5521d 100644 --- a/cpp_src/core/ft/bm25.h +++ b/cpp_src/core/ft/bm25.h @@ -1,27 +1,80 @@ #pragma once #include -#include namespace reindexer { -const static double kKeofBm25k1 = 2.0; -const static double kKeofBm25b = 0.75; - -inline double IDF(double totalDocCount, double matchedDocCount) { - double f = log((totalDocCount - matchedDocCount + 1) / matchedDocCount) / log(1 + totalDocCount); - // saturate min to 0.2 - if (f < 0.2) f = 0.2; - return f; -} - -inline double TF(double termCountInDoc, double mostFreqWordCountInDoc, double wordsInDoc) { - (void)mostFreqWordCountInDoc; - (void)wordsInDoc; - return termCountInDoc; -} - -inline double bm25score(double termCountInDoc, double mostFreqWordCountInDoc, double wordsInDoc, double avgDocLen) { - auto termFreq = TF(termCountInDoc, mostFreqWordCountInDoc, wordsInDoc); - return termFreq * (kKeofBm25k1 + 1.0) / (termFreq + kKeofBm25k1 * (1.0 - kKeofBm25b + kKeofBm25b * wordsInDoc / avgDocLen)); -} + +class Bm25Rx { +public: + Bm25Rx(double totalDocCount, double matchedDocCount, double k1, double b) noexcept + : k1_(k1), b_(b), idf_(IDF(totalDocCount, matchedDocCount)) {} + + RX_ALWAYS_INLINE double Get(double termCountInDoc, double wordsInDoc, double avgDocLen) const noexcept { + auto termFreq = TF(termCountInDoc, wordsInDoc); + return idf_ * termFreq * (k1_ + 1.0) / (termFreq + k1_ * (1.0 - b_ + b_ * wordsInDoc / avgDocLen)); + } + RX_ALWAYS_INLINE double GetIDF() const noexcept { return idf_; } + +private: + static RX_ALWAYS_INLINE double IDF(double totalDocCount, double matchedDocCount) noexcept { + double f = log((totalDocCount - matchedDocCount + 1) / matchedDocCount) / log(1 + totalDocCount); + // saturate min to 0.2 + if (f < 0.2) f = 0.2; + return f; + } + static RX_ALWAYS_INLINE double TF(double termCountInDoc, double wordsInDoc) noexcept { + (void)wordsInDoc; + return termCountInDoc; + } + + const double k1_; + const double b_; + const double idf_; +}; + +class Bm25Classic { +public: + Bm25Classic(double totalDocCount, double matchedDocCount, double k1, double b) noexcept + : k1_(k1), b_(b), idf_(IDF(totalDocCount, matchedDocCount)) {} + + RX_ALWAYS_INLINE double Get(double termCountInDoc, double wordsInDoc, double avgDocLen) const { + auto termFreq = TF(termCountInDoc, wordsInDoc); + return idf_ * termFreq * (k1_ + 1.0) / (termFreq + k1_ * (1.0 - b_ + b_ * wordsInDoc / avgDocLen)); + } + RX_ALWAYS_INLINE double GetIDF() const noexcept { return idf_; } + +private: + static RX_ALWAYS_INLINE double IDF(double totalDocCount, double matchedDocCount) noexcept { + return log(totalDocCount / (matchedDocCount + 1)) + 1; + } + static RX_ALWAYS_INLINE double TF(double termCountInDoc, double wordsInDoc) noexcept { return termCountInDoc / wordsInDoc; } + + const double k1_; + const double b_; + const double idf_; +}; + +class TermCount { +public: + TermCount(double /*totalDocCount*/, double /*matchedDocCount*/, double /*k1*/, double /*b*/) noexcept {} + + RX_ALWAYS_INLINE double Get(double termCountInDoc, double /*wordsInDoc*/, double /*avgDocLen*/) const noexcept { + return termCountInDoc; + } + RX_ALWAYS_INLINE double GetIDF() const noexcept { return 0.0; } +}; + +template +class Bm25Calculator { +public: + Bm25Calculator(double totalDocCount, double matchedDocCount, double k1, double b) : bm_(totalDocCount, matchedDocCount, k1, b) {} + RX_ALWAYS_INLINE double Get(double termCountInDoc, double wordsInDoc, double avgDocLen) const { + return bm_.Get(termCountInDoc, wordsInDoc, avgDocLen); + } + RX_ALWAYS_INLINE double GetIDF() const noexcept { return bm_.GetIDF(); } + +private: + const BM bm_; +}; + } // namespace reindexer diff --git a/cpp_src/core/ft/config/baseftconfig.cc b/cpp_src/core/ft/config/baseftconfig.cc index 2cb4ad7f1..5df2d0198 100644 --- a/cpp_src/core/ft/config/baseftconfig.cc +++ b/cpp_src/core/ft/config/baseftconfig.cc @@ -1,16 +1,14 @@ #include "baseftconfig.h" -#include #include "core/cjson/jsonbuilder.h" #include "core/ft/stopwords/stop.h" #include "tools/errors.h" -#include "tools/jsontools.h" namespace reindexer { BaseFTConfig::BaseFTConfig() { - for (const char **p = stop_words_en; *p != nullptr; p++) stopWords.insert(*p); - for (const char **p = stop_words_ru; *p != nullptr; p++) stopWords.insert(*p); + for (const char **p = stop_words_en; *p != nullptr; p++) stopWords.insert({*p, StopWord::Type::Morpheme}); + for (const char **p = stop_words_ru; *p != nullptr; p++) stopWords.insert({*p, StopWord::Type::Morpheme}); } void BaseFTConfig::parseBase(const gason::JsonNode &root) { @@ -25,7 +23,25 @@ void BaseFTConfig::parseBase(const gason::JsonNode &root) { auto &stopWordsNode = root["stop_words"]; if (!stopWordsNode.empty()) { stopWords.clear(); - for (auto &sw : stopWordsNode) stopWords.insert(sw.As()); + for (auto &sw : stopWordsNode) { + std::string word; + StopWord::Type type = StopWord::Type::Stop; + if (sw.value.getTag() == gason::JsonTag::JSON_STRING) { + word = sw.As(); + } else if (sw.value.getTag() == gason::JsonTag::JSON_OBJECT) { + word = sw["word"].As(); + type = sw["is_morpheme"].As() ? StopWord::Type::Morpheme : StopWord::Type::Stop; + } + + if (std::find_if(word.begin(), word.end(), [](const auto &symbol) { return std::isspace(symbol); }) != word.end()) { + throw Error(errParams, "Stop words can't contain spaces: %s", word); + } + + auto [it, inserted] = stopWords.emplace(std::move(word), type); + if (!inserted && it->type != type) { + throw Error(errParams, "Duplicate stop-word with different morpheme attribute: %s", *it); + } + } } auto &stemmersNode = root["stemmers"]; @@ -80,7 +96,9 @@ void BaseFTConfig::getJson(JsonBuilder &jsonBuilder) const { { auto stopWordsNode = jsonBuilder.Array("stop_words"); for (const auto &sw : stopWords) { - stopWordsNode.Put(nullptr, sw); + auto wordNode = stopWordsNode.Object(nullptr); + wordNode.Put("word", sw); + wordNode.Put("is_morpheme", sw.type == StopWord::Type::Morpheme); } } { diff --git a/cpp_src/core/ft/config/baseftconfig.h b/cpp_src/core/ft/config/baseftconfig.h index f17db982f..aecd0b2c1 100644 --- a/cpp_src/core/ft/config/baseftconfig.h +++ b/cpp_src/core/ft/config/baseftconfig.h @@ -19,6 +19,12 @@ static constexpr int kMinMergeLimitValue = 0; class JsonBuilder; +struct StopWord : std::string { + enum class Type { Stop, Morpheme }; + StopWord(std::string base, Type type = Type::Stop) noexcept : std::string(std::move(base)), type(type) {} + Type type; +}; + class BaseFTConfig { public: struct Synonym { @@ -39,7 +45,8 @@ class BaseFTConfig { bool enableKbLayout = true; bool enableNumbersSearch = false; bool enableWarmupOnNsCopy = false; - fast_hash_set stopWords; + + fast_hash_set stopWords; std::vector synonyms; int logLevel = 0; std::string extraWordSymbols = "-/+"; // word contains symbols (IsAlpa | IsDigit) {IsAlpa | IsDigit | IsExtra} diff --git a/cpp_src/core/ft/config/ftfastconfig.cc b/cpp_src/core/ft/config/ftfastconfig.cc index 902463a9e..4dba75cc9 100644 --- a/cpp_src/core/ft/config/ftfastconfig.cc +++ b/cpp_src/core/ft/config/ftfastconfig.cc @@ -70,6 +70,12 @@ void FtFastConfig::parse(std::string_view json, const RHashMap maxStepSize = root["max_step_size"].As<>(maxStepSize, 5); maxAreasInDoc = root["max_areas_in_doc"].As(maxAreasInDoc); maxTotalAreasToCache = root["max_total_areas_to_cache"].As(maxTotalAreasToCache); + + if (!root["bm25_config"].empty()) { + auto conf = root["bm25_config"]; + bm25Config.parse(conf); + } + summationRanksByFieldsRatio = root["sum_ranks_by_fields_ratio"].As<>(summationRanksByFieldsRatio, 0.0, 1.0); FtFastFieldConfig defaultFieldCfg; @@ -148,6 +154,12 @@ std::string FtFastConfig::GetJson(const fast_hash_map& fields) jsonBuilder.Put("sum_ranks_by_fields_ratio", summationRanksByFieldsRatio); jsonBuilder.Put("max_areas_in_doc", maxAreasInDoc); jsonBuilder.Put("max_total_areas_to_cache", maxTotalAreasToCache); + + { + auto conf = jsonBuilder.Object("bm25_config"); + bm25Config.getJson(conf); + } + switch (optimization) { case Optimization::Memory: jsonBuilder.Put("optimization", "Memory"); @@ -183,4 +195,35 @@ std::string FtFastConfig::GetJson(const fast_hash_map& fields) return std::string(wrser.Slice()); } +void FtFastConfig::Bm25Config::getJson(JsonBuilder& jsonBuilder) const { + jsonBuilder.Put("bm25_k1", bm25k1); + jsonBuilder.Put("bm25_b", bm25b); + switch (bm25Type) { + case Bm25Type::classic: + jsonBuilder.Put("bm25_type", "bm25"); + break; + case Bm25Type::rx: + jsonBuilder.Put("bm25_type", "rx_bm25"); + break; + case Bm25Type::wordCount: + jsonBuilder.Put("bm25_type", "word_count"); + break; + } +} + +void FtFastConfig::Bm25Config::parse(const gason::JsonNode& node) { + bm25k1 = node["bm25_k1"].As(bm25k1, 0.0); + bm25b = node["bm25_b"].As(bm25b, 0.0, 1.0); + const std::string bm25TypeStr = toLower(node["bm25_type"].As("rx_bm25")); + if (bm25TypeStr == "rx_bm25") { + bm25Type = Bm25Type::rx; + } else if (bm25TypeStr == "bm25") { + bm25Type = Bm25Type::classic; + } else if (bm25TypeStr == "word_count") { + bm25Type = Bm25Type::wordCount; + } else { + throw Error(errParseJson, "FtFastConfig: unknown bm25Type value: %s", bm25TypeStr); + } +} + } // namespace reindexer diff --git a/cpp_src/core/ft/config/ftfastconfig.h b/cpp_src/core/ft/config/ftfastconfig.h index 3b011739b..56c1aa417 100644 --- a/cpp_src/core/ft/config/ftfastconfig.h +++ b/cpp_src/core/ft/config/ftfastconfig.h @@ -1,7 +1,6 @@ #pragma once #include "baseftconfig.h" -#include "estl/h_vector.h" namespace reindexer { @@ -39,9 +38,21 @@ struct FtFastConfig : public BaseFTConfig { int maxRebuildSteps = 50; int maxStepSize = 4000; + struct Bm25Config { + enum class Bm25Type { classic, rx, wordCount }; + double bm25k1 = 2.0; + double bm25b = 0.75; + Bm25Type bm25Type = Bm25Type::rx; + void getJson(JsonBuilder& jsonBuilder) const; + void parse(const gason::JsonNode& root); + }; + + Bm25Config bm25Config; + double summationRanksByFieldsRatio = 0.0; int maxAreasInDoc = 5; int maxTotalAreasToCache = -1; + RVector fieldsCfg; enum class Optimization { CPU, Memory } optimization = Optimization::Memory; bool enablePreselectBeforeFt = false; diff --git a/cpp_src/core/ft/ft_fast/selecter.cc b/cpp_src/core/ft/ft_fast/selecter.cc index 40082ae17..f86075e6b 100644 --- a/cpp_src/core/ft/ft_fast/selecter.cc +++ b/cpp_src/core/ft/ft_fast/selecter.cc @@ -1,11 +1,10 @@ #include "selecter.h" -#include -#include "core/ft/bm25.h" #include "core/ft/typos.h" #include "core/rdxcontext.h" #include "estl/defines.h" #include "sort/pdqsort.hpp" #include "tools/logger.h" +#include "core/ft/bm25.h" namespace { RX_ALWAYS_INLINE double pos2rank(int pos) { @@ -211,7 +210,19 @@ RX_NO_INLINE IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, // Typos for terms with low relevancy will not be processed for (auto& res : ctx.rawResults) results.emplace_back(std::move(res)); - return mergeResults(std::move(results), ctx.totalORVids, synonymsBounds, inTransaction, std::move(mergeStatuses), rdxCtx); + switch (holder_.cfg_->bm25Config.bm25Type) { + case FtFastConfig::Bm25Config::Bm25Type::rx: + return mergeResults(std::move(results), ctx.totalORVids, synonymsBounds, inTransaction, std::move(mergeStatuses), + rdxCtx); + case FtFastConfig::Bm25Config::Bm25Type::classic: + return mergeResults(std::move(results), ctx.totalORVids, synonymsBounds, inTransaction, std::move(mergeStatuses), + rdxCtx); + case FtFastConfig::Bm25Config::Bm25Type::wordCount: + return mergeResults(std::move(results), ctx.totalORVids, synonymsBounds, inTransaction, std::move(mergeStatuses), + rdxCtx); + } + assertrx(false); + return IDataHolder::MergeData(); } template @@ -408,14 +419,14 @@ RX_ALWAYS_INLINE void Selecter::debugMergeStep(const char* msg, int vid, #endif } template -RX_ALWAYS_INLINE void Selecter::calcFieldBoost(double idf, unsigned long long f, const IdRelType& relid, const FtDslOpts& opts, - int termProc, double& termRank, double& normBm25, bool& dontSkipCurTermRank, - h_vector& ranksInFields, int& field) { +template +RX_ALWAYS_INLINE void Selecter::calcFieldBoost(const Calculator& bm25Calc, unsigned long long f, const IdRelType& relid, + const FtDslOpts& opts, int termProc, double& termRank, double& normBm25, + bool& dontSkipCurTermRank, h_vector& ranksInFields, int& field) { assertrx(f < holder_.cfg_->fieldsCfg.size()); const auto& fldCfg = holder_.cfg_->fieldsCfg[f]; // raw bm25 - const double bm25 = idf * bm25score(relid.WordsInField(f), holder_.vdocs_[relid.Id()].mostFreqWordCount[f], - holder_.vdocs_[relid.Id()].wordsCount[f], holder_.avgWordsCount_[f]); + const double bm25 = bm25Calc.Get(relid.WordsInField(f), holder_.vdocs_[relid.Id()].wordsCount[f], holder_.avgWordsCount_[f]); // normalized bm25 const double normBm25Tmp = bound(bm25, fldCfg.bm25Weight, fldCfg.bm25Boost); @@ -551,7 +562,7 @@ void Selecter::subMergeLoop(std::vector& subMerg } template -template +template void Selecter::mergeGroupResult(std::vector& rawResults, size_t from, size_t to, FtMergeStatuses::Statuses& mergeStatuses, IDataHolder::MergeData& merged, std::vector& merged_rd, OpType op, const bool hasBeenAnd, @@ -564,7 +575,8 @@ void Selecter::mergeGroupResult(std::vector& rawResul // expandable) IDataHolder::MergeData subMerged; std::vector subMergedPositionData; - mergeResultsPart(rawResults, from, to, subMerged, subMergedPositionData, inTransaction, rdxCtx); + + mergeResultsPart(rawResults, from, to, subMerged, subMergedPositionData, inTransaction, rdxCtx); switch (op) { case OpOr: { @@ -655,6 +667,7 @@ void Selecter::addAreas(IDataHolder::MergeData& merged, int32_t areaInde // docRank=summ(max(subTermRank))*255/allmax // allmax=max(docRank) template +template void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, IDataHolder::MergeData& merged, std::vector& merged_rd, std::vector& idoffsets, std::vector& curExists, @@ -683,7 +696,9 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI // loop on subterm (word, translit, stemmmer,...) for (auto& r : rawRes) { if (!inTransaction) ThrowOnCancel(rdxCtx); - auto idf = IDF(totalDocsCount, r.vids_->size()); + Bm25Calculator bm25{double(totalDocsCount), double(r.vids_->size()), holder_.cfg_->bm25Config.bm25k1, + holder_.cfg_->bm25Config.bm25b}; + static_assert(sizeof(bm25) <= 32, "Bm25Calculator size is greater than 32 bytes"); // cycle through the documents for the given subterm for (auto&& relid : *r.vids_) { static_assert((std::is_same_v && std::is_same_v) || @@ -711,12 +726,12 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI } // Find field with max rank - auto [termRank, field] = calcTermRank(rawRes, idf, relid, r.proc_); + auto [termRank, field] = calcTermRank(rawRes, bm25, relid, r.proc_); if (!termRank) { continue; } if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { - logPrintf(LogInfo, "Pattern %s, idf %f, termLenBoost %f", r.pattern, idf, rawRes.term.opts.termLenBoost); + logPrintf(LogInfo, "Pattern %s, idf %f, termLenBoost %f", r.pattern, bm25.GetIDF(), rawRes.term.opts.termLenBoost); } if (simple) { // one term @@ -760,7 +775,9 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI } } template -std::pair Selecter::calcTermRank(const TextSearchResults& rawRes, double idf, const IdRelType& relid, int proc) { +template +std::pair Selecter::calcTermRank(const TextSearchResults& rawRes, Calculator bm25Calc, const IdRelType& relid, + int proc) { // Find field with max rank int field = 0; double termRank = 0.0; @@ -783,7 +800,7 @@ std::pair Selecter::calcTermRank(const TextSearchResults& r assertrx(f < rawRes.term.opts.fieldsOpts.size()); const auto fboost = rawRes.term.opts.fieldsOpts[f].boost; if (fboost) { - calcFieldBoost(idf, f, relid, rawRes.term.opts, proc, termRank, normBm25, dontSkipCurTermRank, ranksInFields, field); + calcFieldBoost(bm25Calc, f, relid, rawRes.term.opts, proc, termRank, normBm25, dontSkipCurTermRank, ranksInFields, field); } } @@ -801,7 +818,7 @@ std::pair Selecter::calcTermRank(const TextSearchResults& r } template -template +template void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, IDataHolder::MergeData& merged, std::vector

& merged_rd, std::vector& idoffsets, std::vector& present, @@ -816,7 +833,8 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra // loop on subterm (word, translit, stemmmer,...) for (auto& r : rawRes) { if (!inTransaction) ThrowOnCancel(rdxCtx); - double idf = IDF(totalDocsCount, r.vids_->size()); + Bm25Calculator bm25(totalDocsCount, r.vids_->size(), holder_.cfg_->bm25Config.bm25k1, holder_.cfg_->bm25Config.bm25b); + static_assert(sizeof(bm25) <= 32, "Bm25Calculator size is greater than 32 bytes"); int vid = -1; // cycle through the documents for the given subterm for (auto&& relid : *r.vids_) { @@ -835,11 +853,11 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra if (!vdocs[vid].keyEntry) continue; // Find field with max rank - auto [termRank, field] = calcTermRank(rawRes, idf, relid, r.proc_); + auto [termRank, field] = calcTermRank(rawRes, bm25, relid, r.proc_); if (!termRank) continue; if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { - logPrintf(LogInfo, "Pattern %s, idf %f, termLenBoost %f", r.pattern, idf, rawRes.term.opts.termLenBoost); + logPrintf(LogInfo, "Pattern %s, idf %f, termLenBoost %f", r.pattern, bm25.GetIDF(), rawRes.term.opts.termLenBoost); } // match of 2-rd, and next terms @@ -941,7 +959,7 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra } template -template +template void Selecter::mergeResultsPart(std::vector& rawResults, size_t from, size_t to, IDataHolder::MergeData& merged, std::vector& mergedPos, const bool inTransaction, const RdxContext& rdxCtx) { // Current implementation supports OpAnd only @@ -962,7 +980,7 @@ void Selecter::mergeResultsPart(std::vector& rawResul std::vector exists; bool firstTerm = true; for (size_t i = from; i < to; ++i) { - mergeIterationGroup(rawResults[i], i, mergeStatuses, merged, mergedPos, idoffsets, exists, firstTerm, inTransaction, rdxCtx); + mergeIterationGroup(rawResults[i], i, mergeStatuses, merged, mergedPos, idoffsets, exists, firstTerm, inTransaction, rdxCtx); firstTerm = false; // set proc=0 (exclude) for document not containing term for (auto& info : merged) { @@ -1232,6 +1250,7 @@ bool Selecter::TyposHandler::isWordFitMaxLettPerm(const std::string_view } template +template typename IDataHolder::MergeData Selecter::mergeResults(std::vector&& rawResults, size_t totalORVids, const std::vector& synonymsBounds, bool inTransaction, FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext& rdxCtx) { @@ -1271,11 +1290,11 @@ typename IDataHolder::MergeData Selecter::mergeResults(std::vector(rawResults, i, k, mergeStatuses, merged, merged_rd, op, hasBeenAnd, - idoffsets, inTransaction, rdxCtx); + mergeGroupResult(rawResults, i, k, mergeStatuses, merged, merged_rd, op, hasBeenAnd, + idoffsets, inTransaction, rdxCtx); } else { - mergeGroupResult(rawResults, i, k, mergeStatuses, merged, merged_rd, op, hasBeenAnd, idoffsets, - inTransaction, rdxCtx); + mergeGroupResult(rawResults, i, k, mergeStatuses, merged, merged_rd, op, hasBeenAnd, + idoffsets, inTransaction, rdxCtx); } if (op == OpAnd) { hasBeenAnd = true; @@ -1294,7 +1313,10 @@ typename IDataHolder::MergeData Selecter::mergeResults(std::vector(rawResults[i], i, mergeStatuses, merged, merged_rd, idoffsets, exists[curExists], hasBeenAnd, inTransaction, + rdxCtx); + if (rawResults[i].term.opts.op == OpAnd && !exists[curExists].empty()) { hasBeenAnd = true; for (auto& info : merged) { diff --git a/cpp_src/core/ft/ft_fast/selecter.h b/cpp_src/core/ft/ft_fast/selecter.h index 0c67aea00..a1e67d400 100644 --- a/cpp_src/core/ft/ft_fast/selecter.h +++ b/cpp_src/core/ft/ft_fast/selecter.h @@ -1,7 +1,6 @@ #pragma once #include "core/ft/ftdsl.h" #include "core/ft/idrelset.h" -#include "core/selectfunc/ctx/ftctx.h" #include "dataholder.h" namespace reindexer { @@ -138,27 +137,29 @@ class Selecter { std::wstring foundWordUTF16_; }; + template IDataHolder::MergeData mergeResults(std::vector&& rawResults, size_t totalORVids, const std::vector& synonymsBounds, bool inTransaction, FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext&); + template void mergeIteration(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, IDataHolder::MergeData& merged, std::vector& merged_rd, std::vector& idoffsets, std::vector& curExists, const bool hasBeenAnd, const bool inTransaction, const RdxContext&); - template + template void mergeIterationGroup(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, IDataHolder::MergeData& merged, std::vector

& merged_rd, std::vector& idoffsets, std::vector& present, const bool firstTerm, const bool inTransaction, const RdxContext& rdxCtx); - template + template void mergeGroupResult(std::vector& rawResults, size_t from, size_t to, FtMergeStatuses::Statuses& mergeStatuses, IDataHolder::MergeData& merged, std::vector& merged_rd, OpType op, const bool hasBeenAnd, std::vector& idoffsets, const bool inTransaction, const RdxContext& rdxCtx); - template + template void mergeResultsPart(std::vector& rawResults, size_t from, size_t to, IDataHolder::MergeData& merged, std::vector& mergedPos, const bool inTransaction, const RdxContext& rdxCtx); AreaHolder createAreaFromSubMerge(const IDataHolder::MergedIdRelExArea& posInfo); @@ -169,9 +170,11 @@ class Selecter { std::vector& merged_rd, FtMergeStatuses::Statuses& mergeStatuses, std::vector& idoffsets, std::vector* checkAndOpMerge, const bool hasBeenAnd); - void calcFieldBoost(double idf, unsigned long long f, const IdRelType& relid, const FtDslOpts& opts, int termProc, double& termRank, - double& normBm25, bool& dontSkipCurTermRank, h_vector& ranksInFields, int& field); - std::pair calcTermRank(const TextSearchResults& rawRes, double idf, const IdRelType& relid, int proc); + template + void calcFieldBoost(const Calculator& bm25Calc, unsigned long long f, const IdRelType& relid, const FtDslOpts& opts, int termProc, + double& termRank, double& normBm25, bool& dontSkipCurTermRank, h_vector& ranksInFields, int& field); + template + std::pair calcTermRank(const TextSearchResults& rawRes, Calculator c, const IdRelType& relid, int proc); void addNewTerm(FtMergeStatuses::Statuses& mergeStatuses, IDataHolder::MergeData& merged, std::vector& idoffsets, std::vector& curExists, const IdRelType& relid, diff --git a/cpp_src/core/ft/ftdsl.cc b/cpp_src/core/ft/ftdsl.cc index e153c2f87..98e78623e 100644 --- a/cpp_src/core/ft/ftdsl.cc +++ b/cpp_src/core/ft/ftdsl.cc @@ -1,6 +1,7 @@ #include "core/ft/ftdsl.h" #include #include +#include "core/ft/config/baseftconfig.h" #include "tools/customlocal.h" #include "tools/errors.h" #include "tools/stringstools.h" @@ -30,7 +31,7 @@ void FtDSLQuery::parse(std::wstring &utf16str) { bool inGroup = false; bool hasAnythingExceptNot = false; int groupCounter = 0; - int maxPatternLen = 1; + size_t maxPatternLen = 1; h_vector fieldsOpts; std::string utf8str; fieldsOpts.insert(fieldsOpts.end(), std::max(int(fields_.size()), 1), {1.0, false}); @@ -49,27 +50,26 @@ void FtDSLQuery::parse(std::wstring &utf16str) { ++it; } else { if (*it == '@') { - it++; + ++it; parseFields(utf16str, it, fieldsOpts); continue; } if (*it == '-') { fte.opts.op = OpNot; - it++; + ++it; } else if (*it == '+') { fte.opts.op = OpAnd; - it++; + ++it; } if (it != utf16str.end() && (*it == '\'' || *it == '\"')) { inGroup = !inGroup; - it++; + ++it; // closing group if (!inGroup) { int distance = 1; if (it != utf16str.end() && *it == '~') { - ++it; - if (it == utf16str.end()) { + if (++it == utf16str.end()) { throw Error(errParseDSL, "Expected digit after '~' operator in phrase, but found nothing"); } if (!std::isdigit(*it)) { @@ -96,17 +96,17 @@ void FtDSLQuery::parse(std::wstring &utf16str) { fteIt->opts.groupNum = groupCounter; } groupTermCounter = 0; - groupCounter++; + ++groupCounter; } } } if (it != utf16str.end() && *it == '=') { fte.opts.exact = true; - it++; + ++it; } if (it != utf16str.end() && *it == '*') { fte.opts.suff = true; - it++; + ++it; } } auto begIt = it; @@ -120,21 +120,21 @@ void FtDSLQuery::parse(std::wstring &utf16str) { } } auto endIt = it; - for (; it != utf16str.end(); it++) { + for (; it != utf16str.end(); ++it) { if (*it == '*') { fte.opts.pref = true; } else if (*it == '~') { fte.opts.typos = true; } else if (*it == '^') { - ++it; - if (it == utf16str.end()) { + if (++it == utf16str.end()) { throw Error(errParseDSL, "Expected digit after '^' operator in search query DSL, but found nothing"); } wchar_t *end = nullptr, *start = &*it; fte.opts.boost = wcstod(start, &end); - it += end - start - 1; - if (end == start) + if (end == start) { throw Error(errParseDSL, "Expected digit after '^' operator in search query DSL, but found '%c' ", char(*start)); + } + it += end - start - 1; } else { break; } @@ -143,18 +143,14 @@ void FtDSLQuery::parse(std::wstring &utf16str) { if (endIt != begIt) { fte.pattern.assign(begIt, endIt); utf16_to_utf8(fte.pattern, utf8str); - if (is_number(utf8str)) fte.opts.number = true; - if (fte.opts.op != OpNot && groupTermCounter == 0) { - // Setting up this flag before stopWords check, to prevent error on DSL with stop word + NOT - hasAnythingExceptNot = true; - } - if (stopWords_.find(utf8str) != stopWords_.end()) { + fte.opts.number = is_number(utf8str); + // Setting up this flag before stopWords check, to prevent error on DSL with stop word + NOT + hasAnythingExceptNot = hasAnythingExceptNot || (fte.opts.op != OpNot && groupTermCounter == 0); + if (auto it = stopWords_.find(utf8str); it != stopWords_.end() && it->type == StopWord::Type::Stop) { continue; } - if (int(fte.pattern.length()) > maxPatternLen) { - maxPatternLen = fte.pattern.length(); - } + maxPatternLen = (fte.pattern.length() > maxPatternLen) ? fte.pattern.length() : maxPatternLen; emplace_back(std::move(fte)); if (inGroup) ++groupTermCounter; } diff --git a/cpp_src/core/ft/ftdsl.h b/cpp_src/core/ft/ftdsl.h index 28e36fcae..a01dbac76 100644 --- a/cpp_src/core/ft/ftdsl.h +++ b/cpp_src/core/ft/ftdsl.h @@ -49,9 +49,11 @@ struct FtDSLVariant { int proc = 0; }; +struct StopWord; + class FtDSLQuery : public RVector { public: - FtDSLQuery(const RHashMap &fields, const fast_hash_set &stopWords, + FtDSLQuery(const RHashMap &fields, const fast_hash_set &stopWords, const std::string &extraWordSymbols) noexcept : fields_(fields), stopWords_(stopWords), extraWordSymbols_(extraWordSymbols) {} void parse(std::wstring &utf16str); @@ -64,7 +66,7 @@ class FtDSLQuery : public RVector { std::function resolver_; const RHashMap &fields_; - const fast_hash_set &stopWords_; + const fast_hash_set &stopWords_; const std::string &extraWordSymbols_; }; diff --git a/cpp_src/core/ft/stopwords/stop_ru.cc b/cpp_src/core/ft/stopwords/stop_ru.cc index 6a63c1a15..7e5472da0 100644 --- a/cpp_src/core/ft/stopwords/stop_ru.cc +++ b/cpp_src/core/ft/stopwords/stop_ru.cc @@ -268,7 +268,7 @@ const char *stop_words_ru[] = { "кроме", "куда", "кругом", - "с т", + "с", "у", "я", "та", diff --git a/cpp_src/core/idsetcache.h b/cpp_src/core/idsetcache.h index 185073c22..6acdb92b5 100644 --- a/cpp_src/core/idsetcache.h +++ b/cpp_src/core/idsetcache.h @@ -75,12 +75,12 @@ T &operator<<(T &os, const IdSetCacheVal &v) { } struct equal_idset_cache_key { - bool operator()(const IdSetCacheKey &lhs, const IdSetCacheKey &rhs) const { + bool operator()(const IdSetCacheKey &lhs, const IdSetCacheKey &rhs) const noexcept { return lhs.cond == rhs.cond && lhs.sort == rhs.sort && *lhs.keys == *rhs.keys; } }; struct hash_idset_cache_key { - size_t operator()(const IdSetCacheKey &s) const { return (s.cond << 8) ^ (s.sort << 16) ^ s.keys->Hash(); } + size_t operator()(const IdSetCacheKey &s) const noexcept { return (size_t(s.cond) << 8) ^ (size_t(s.sort) << 16) ^ s.keys->Hash(); } }; using IdSetCacheBase = LRUCache; diff --git a/cpp_src/core/index/indexordered.cc b/cpp_src/core/index/indexordered.cc index 643562e35..b0ac203ce 100644 --- a/cpp_src/core/index/indexordered.cc +++ b/cpp_src/core/index/indexordered.cc @@ -60,14 +60,13 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray &keys, CondType c auto startIt = this->idx_map.begin(); auto endIt = this->idx_map.end(); auto key1 = *keys.begin(); - switch (condition) { case CondLt: endIt = this->idx_map.lower_bound(static_cast(key1)); break; case CondLe: endIt = this->idx_map.lower_bound(static_cast(key1)); - if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast(key1), endIt->first)) endIt++; + if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast(key1), endIt->first)) ++endIt; break; case CondGt: startIt = this->idx_map.upper_bound(static_cast(key1)); @@ -83,12 +82,11 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray &keys, CondType c if (startIt == this->idx_map.end()) startIt = this->idx_map.upper_bound(static_cast(key1)); endIt = this->idx_map.lower_bound(static_cast(key2)); - if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast(key2), endIt->first)) endIt++; + if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast(key2), endIt->first)) ++endIt; if (endIt != this->idx_map.end() && this->idx_map.key_comp()(endIt->first, static_cast(key1))) { return SelectKeyResults(std::move(res)); } - } break; case CondAny: case CondEq: @@ -134,9 +132,11 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray &keys, CondType c typename T::iterator startIt, endIt; } ctx = {&this->idx_map, sortId, startIt, endIt}; - auto selector = [&ctx](SelectKeyResult &res, size_t &idsCount) { + auto selector = [&ctx, count](SelectKeyResult &res, size_t &idsCount) { idsCount = 0; - for (auto it = ctx.startIt; it != ctx.endIt && it != ctx.i_map->end(); ++it) { + res.reserve(count); + for (auto it = ctx.startIt; it != ctx.endIt; ++it) { + assertrx(it != ctx.i_map->end()); // FIXME: assertrx_dbg idsCount += it->second.Unsorted().Size(); res.emplace_back(it->second, ctx.sortId); } @@ -145,7 +145,11 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray &keys, CondType c }; if (count > 1 && !opts.distinct && !opts.disableIdSetCache) { - this->tryIdsetCache(keys, condition, sortId, std::move(selector), res); + // Using btree node pointers instead of the real values from the filter and range instead all of the contidions + // to increase cache hits count + VariantArray cacheKeys = {Variant{startIt == this->idx_map.end() ? int64_t(0) : int64_t(&(*startIt))}, + Variant{endIt == this->idx_map.end() ? int64_t(0) : int64_t(&(*endIt))}}; + this->tryIdsetCache(cacheKeys, CondRange, sortId, std::move(selector), res); } else { size_t idsCount; selector(res, idsCount); diff --git a/cpp_src/core/index/indextext/ftkeyentry.h b/cpp_src/core/index/indextext/ftkeyentry.h index 5ac1ce72f..5cf257f50 100644 --- a/cpp_src/core/index/indextext/ftkeyentry.h +++ b/cpp_src/core/index/indextext/ftkeyentry.h @@ -39,7 +39,7 @@ class FtKeyEntry { IdSetPlain& Unsorted() noexcept { return impl_->Unsorted(); } const IdSetPlain& Unsorted() const noexcept { return impl_->Unsorted(); } - IdSetRef Sorted(unsigned sortId) const { return impl_->Sorted(sortId); } + IdSetRef Sorted(unsigned sortId) const noexcept { return impl_->Sorted(sortId); } void UpdateSortedIds(const UpdateSortedContext& ctx) { impl_->UpdateSortedIds(ctx); } void SetVDocID(int vdoc_id) noexcept { impl_->SetVDocID(vdoc_id); } const int& VDocID() const { return impl_->vdoc_id_; } diff --git a/cpp_src/core/index/indexunordered.cc b/cpp_src/core/index/indexunordered.cc index 06af94780..1d24cca2b 100644 --- a/cpp_src/core/index/indexunordered.cc +++ b/cpp_src/core/index/indexunordered.cc @@ -252,6 +252,8 @@ void IndexUnordered::Delete(const Variant &key, IdType id, StringsHolder &str } } +// WARNING: 'keys' is a key for LRUCache and in some cases (for ordered indexes, for example) can contain values, +// which are not correspond to the initial values from queries conditions template bool IndexUnordered::tryIdsetCache(const VariantArray &keys, CondType condition, SortType sortId, const std::function &selector, SelectKeyResult &res) { @@ -271,7 +273,7 @@ bool IndexUnordered::tryIdsetCache(const VariantArray &keys, CondType conditi cache_->Put(ckey, res.MergeIdsets(res.deferedExplicitSort, idsCount)); } } else { - res.push_back(SingleSelectKeyResult(cached.val.ids)); + res.emplace_back(std::move(cached.val.ids)); } } else { scanWin = selector(res, idsCount); diff --git a/cpp_src/core/item.cc b/cpp_src/core/item.cc index dfbed2b79..9dd1b3b18 100644 --- a/cpp_src/core/item.cc +++ b/cpp_src/core/item.cc @@ -102,7 +102,7 @@ Item::FieldRef &Item::FieldRef::operator=(span arr) { } else { itemImpl_->holder_->push_back(elem); } - pl.Set(field_, pos++, Variant(p_string{&itemImpl_->holder_->back()})); + pl.Set(field_, pos++, Variant(p_string{&itemImpl_->holder_->back()}, Variant::no_hold_t{})); } } } else { diff --git a/cpp_src/core/itemimpl.cc b/cpp_src/core/itemimpl.cc index 0c4730d86..1c8abd208 100644 --- a/cpp_src/core/itemimpl.cc +++ b/cpp_src/core/itemimpl.cc @@ -74,7 +74,7 @@ void ItemImpl::ModifyField(const IndexedTagsPath &tagsPath, const VariantArray & } tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } void ItemImpl::SetField(std::string_view jsonPath, const VariantArray &keys, const IndexExpressionEvaluator &ev) { @@ -95,7 +95,7 @@ Error ItemImpl::FromMsgPack(std::string_view buf, size_t &offset) { Error err = msgPackDecoder_->Decode(buf, pl, ser_, offset); if (err.ok()) { tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } return err; } @@ -110,7 +110,7 @@ Error ItemImpl::FromProtobuf(std::string_view buf) { Error err = decoder.Decode(buf, pl, ser_); if (err.ok()) { tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } return err; } @@ -180,7 +180,7 @@ void ItemImpl::FromCJSON(std::string_view slice, bool pkOnly, Recoder *recoder) if (!rdser.Eof()) throw Error(errParseJson, "Internal error - left unparsed data %d", rdser.Pos()); tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { @@ -231,7 +231,7 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { // Put tuple to field[0] tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); return err; } diff --git a/cpp_src/core/itemmodifier.cc b/cpp_src/core/itemmodifier.cc index b03f00862..02055a4ed 100644 --- a/cpp_src/core/itemmodifier.cc +++ b/cpp_src/core/itemmodifier.cc @@ -360,8 +360,10 @@ void ItemModifier::modifyCJSON(IdType id, FieldData &field, VariantArray &values } catch (const Error &) { ns_.krefs.resize(0); } + } else if (index.Opts().IsArray()) { + pl.Get(fieldIdx, ns_.krefs, Variant::hold_t{}); } else { - pl.Get(fieldIdx, ns_.krefs, index.Opts().IsArray()); + pl.Get(fieldIdx, ns_.krefs); } if (ns_.krefs == ns_.skrefs) continue; bool needClearCache{false}; @@ -576,7 +578,7 @@ void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, Vari if (index.Opts().IsSparse()) { pl.GetByJsonPath(field.tagspathWithLastIndex(), ns_.skrefs, index.KeyType()); } else { - pl.Get(field.index(), ns_.skrefs, true); + pl.Get(field.index(), ns_.skrefs, Variant::hold_t{}); } // Required when updating index array field with several tagpaths diff --git a/cpp_src/core/keyvalue/variant.cc b/cpp_src/core/keyvalue/variant.cc index 7e68ec9d9..64877208f 100644 --- a/cpp_src/core/keyvalue/variant.cc +++ b/cpp_src/core/keyvalue/variant.cc @@ -16,24 +16,26 @@ namespace reindexer { -Variant::Variant(const PayloadValue &v) : variant_{0, 1, KeyValueType::Composite{}} { new (cast()) PayloadValue(v); } +Variant::Variant(const PayloadValue &v) noexcept : variant_{0, 1, KeyValueType::Composite{}} { new (cast()) PayloadValue(v); } -Variant::Variant(PayloadValue &&v) : variant_{0, 1, KeyValueType::Composite{}} { new (cast()) PayloadValue(std::move(v)); } +Variant::Variant(PayloadValue &&v) noexcept : variant_{0, 1, KeyValueType::Composite{}} { new (cast()) PayloadValue(std::move(v)); } Variant::Variant(const std::string &v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(v)); } Variant::Variant(std::string &&v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(std::move(v))); } -Variant::Variant(const key_string &v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(v); } -Variant::Variant(key_string &&v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(std::move(v)); } -Variant::Variant(const char *v) : Variant(p_string(v)) {} -Variant::Variant(p_string v, bool enableHold) : variant_{0, 0, KeyValueType::String{}} { - if (v.type() == p_string::tagKeyString && enableHold) { +Variant::Variant(const key_string &v) noexcept : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(v); } +Variant::Variant(key_string &&v) noexcept : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(std::move(v)); } +Variant::Variant(const char *v) noexcept : Variant(p_string(v), Variant::no_hold_t{}) {} +Variant::Variant(p_string v, no_hold_t) noexcept : variant_{0, 0, KeyValueType::String{}} { *cast() = v; } +Variant::Variant(p_string v, hold_t) : variant_{0, 0, KeyValueType::String{}} { + if (v.type() == p_string::tagKeyString) { variant_.hold = 1; new (cast()) key_string(v.getKeyString()); } else { *cast() = v; } } +Variant::Variant(p_string v) noexcept : Variant(v, no_hold_t{}) {} Variant::Variant(const VariantArray &values) : variant_{0, 1, KeyValueType::Tuple{}} { WrSerializer ser; @@ -44,7 +46,7 @@ Variant::Variant(const VariantArray &values) : variant_{0, 1, KeyValueType::Tupl new (cast()) key_string(make_key_string(ser.Slice())); } -Variant::Variant(Point p) : Variant{VariantArray{p}} {} +Variant::Variant(Point p) noexcept : Variant{VariantArray{p}} {} Variant::Variant(Uuid uuid) noexcept : uuid_() { if (uuid.data_[0] == 0 && uuid.data_[1] == 0) { @@ -469,7 +471,7 @@ int Variant::RelaxCompare(const Variant &other, const CollateOpts &collateOpts) return Uuid{*this}.Compare(*otherUuid); } else { Uuid{*this}.PutToStr(uuidStrBuf); - return -other.Compare(Variant{uuidStrBufPString, false}); + return -other.Compare(Variant{uuidStrBufPString}); } } else if constexpr (withString == WithString::Yes) { Uuid{*this}.PutToStr(uuidStrBuf); @@ -484,7 +486,7 @@ int Variant::RelaxCompare(const Variant &other, const CollateOpts &collateOpts) return uuid->Compare(Uuid{other}); } else { Uuid{other}.PutToStr(uuidStrBuf); - return Compare(Variant{uuidStrBufPString, false}); + return Compare(Variant{uuidStrBufPString}); } } else if constexpr (withString == WithString::Yes) { Uuid{other}.PutToStr(uuidStrBuf); diff --git a/cpp_src/core/keyvalue/variant.h b/cpp_src/core/keyvalue/variant.h index efdf304ed..6fe5b532a 100644 --- a/cpp_src/core/keyvalue/variant.h +++ b/cpp_src/core/keyvalue/variant.h @@ -24,21 +24,26 @@ class Variant { friend Uuid; public: + struct no_hold_t {}; + struct hold_t {}; + Variant() noexcept : variant_{0, 0, KeyValueType::Null{}, uint64_t{}} {} explicit Variant(int v) noexcept : variant_{0, 0, KeyValueType::Int{}, v} {} explicit Variant(bool v) noexcept : variant_{0, 0, KeyValueType::Bool{}, v} {} explicit Variant(int64_t v) noexcept : variant_{0, 0, KeyValueType::Int64{}, v} {} explicit Variant(double v) noexcept : variant_{0, 0, KeyValueType::Double{}, v} {} - explicit Variant(const char *v); - explicit Variant(p_string v, bool enableHold = true); + explicit Variant(const char *v) noexcept; + Variant(p_string v, no_hold_t) noexcept; + Variant(p_string v, hold_t); + explicit Variant(p_string v) noexcept; explicit Variant(const std::string &v); explicit Variant(std::string &&v); - explicit Variant(const key_string &v); - explicit Variant(key_string &&v); - explicit Variant(const PayloadValue &v); - explicit Variant(PayloadValue &&v); + explicit Variant(const key_string &v) noexcept; + explicit Variant(key_string &&v) noexcept; + explicit Variant(const PayloadValue &v) noexcept; + explicit Variant(PayloadValue &&v) noexcept; explicit Variant(const VariantArray &values); - explicit Variant(Point); + explicit Variant(Point) noexcept; explicit Variant(Uuid) noexcept; Variant(const Variant &other) : uuid_{other.uuid_} { if (!isUuid()) { diff --git a/cpp_src/core/namespace/namespace.cc b/cpp_src/core/namespace/namespace.cc index 01632f4f8..6379e92d5 100644 --- a/cpp_src/core/namespace/namespace.cc +++ b/cpp_src/core/namespace/namespace.cc @@ -42,8 +42,8 @@ void Namespace::CommitTransaction(Transaction& tx, QueryResults& result, const R NsContext nsCtx(ctx); nsCtx.CopiedNsRequest(); nsCopy_->CommitTransaction(tx, result, nsCtx, statCalculator); - if (nsCopy_->lastUpdateTime_) { - nsCopy_->lastUpdateTime_ -= nsCopy_->config_.optimizationTimeout * 2; + if (nsCopy_->lastUpdateTime_.load(std::memory_order_relaxed)) { + nsCopy_->lastUpdateTime_.fetch_sub(nsCopy_->config_.optimizationTimeout * 2, std::memory_order_relaxed); nsCopy_->optimizeIndexes(nsCtx); nsCopy_->warmupFtIndexes(); } diff --git a/cpp_src/core/namespace/namespace.h b/cpp_src/core/namespace/namespace.h index 012f0856e..3cc8adba3 100644 --- a/cpp_src/core/namespace/namespace.h +++ b/cpp_src/core/namespace/namespace.h @@ -14,6 +14,9 @@ class Namespace { while (true) { try { auto ns = atomicLoadMainNs(); + if (!ns) { + throw Error(errLogic, "Ns is nullptr"); + } return (*ns.*fn)(std::forward(args)...); } catch (const Error &e) { if (e.code() != errNamespaceInvalidated) { @@ -214,7 +217,6 @@ class Namespace { void DumpIndex(std::ostream &os, std::string_view index, const RdxContext &ctx) { return nsFuncWrapper<&NamespaceImpl::DumpIndex>(os, index, ctx); } - void SetDestroyFlag() { return nsFuncWrapper<&NamespaceImpl::SetDestroyFlag>(); } protected: friend class ReindexerImpl; diff --git a/cpp_src/core/namespace/namespaceimpl.cc b/cpp_src/core/namespace/namespaceimpl.cc index 4c254163e..8e782a694 100644 --- a/cpp_src/core/namespace/namespaceimpl.cc +++ b/cpp_src/core/namespace/namespaceimpl.cc @@ -288,12 +288,6 @@ void NamespaceImpl::OnConfigUpdated(DBConfigProvider& configProvider, const RdxC if (isSystem()) return; if (serverId_ != replicationConf.serverId) { - if (itemsCount_ != 0) { - serverIdChanged_ = true; - repl_.slaveMode = true; - repl_.replicatorEnabled = false; - logPrintf(LogWarning, "Change serverId on non empty ns [%s]. Set read only mode.", name_); - } serverId_ = replicationConf.serverId; logPrintf(LogWarning, "[repl:%s]:%d Change serverId", name_, serverId_); replStateUpdates_.fetch_add(1, std::memory_order_release); @@ -566,7 +560,7 @@ NamespaceImpl::RollBack_updateItems NamespaceImpl::updateItems(con for (auto fieldIdx : changedFields) { auto& index = *indexes_[fieldIdx]; if ((fieldIdx == 0) || deltaFields <= 0) { - oldValue.Get(fieldIdx, skrefsDel, true); + oldValue.Get(fieldIdx, skrefsDel, Variant::hold_t{}); bool needClearCache{false}; index.Delete(skrefsDel, rowId, *strHolder_, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); @@ -1500,9 +1494,12 @@ void NamespaceImpl::doDelete(IdType id) { if (index.Opts().IsSparse()) { assertrx(index.Fields().getTagsPathsLength() > 0); pl.GetByJsonPath(index.Fields().getTagsPath(0), skrefs, index.KeyType()); + } else if (index.Opts().IsArray()) { + pl.Get(field, skrefs, Variant::hold_t{}); } else { - pl.Get(field, skrefs, index.Opts().IsArray()); + pl.Get(field, skrefs); } + // Delete value from index bool needClearCache{false}; index.Delete(skrefs, id, *strHolder_, needClearCache); @@ -1848,8 +1845,10 @@ void NamespaceImpl::doUpsert(ItemImpl* ritem, IdType id, bool doUpdate) { } catch (const Error&) { krefs.resize(0); } + } else if (index.Opts().IsArray()) { + pl.Get(field, krefs, Variant::hold_t{}); } else { - pl.Get(field, krefs, index.Opts().IsArray()); + pl.Get(field, krefs); } if ((krefs.ArrayType().Is() && skrefs.ArrayType().Is()) || krefs == skrefs) continue; bool needClearCache{false}; @@ -2070,7 +2069,6 @@ void NamespaceImpl::optimizeIndexes(const NsContext& ctx) { // If optimizationState_ == OptimizationCompleted is true, then indexes are completely built. // In this case reset optimizationState_ and/or any idset's and sort orders builds are allowed only protected by write lock if (optimizationState_.load(std::memory_order_relaxed) == OptimizationCompleted) return; - int64_t now = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); auto lastUpdateTime = lastUpdateTime_.load(std::memory_order_acquire); Locker::RLockT rlck; @@ -2078,15 +2076,20 @@ void NamespaceImpl::optimizeIndexes(const NsContext& ctx) { rlck = rLock(ctx.rdxContext); } - if (isSystem() || repl_.temporary || !indexes_.size()) { + if (isSystem() || repl_.temporary || !indexes_.size() || !lastUpdateTime || !config_.optimizationTimeout) { return; } - if (!lastUpdateTime || !config_.optimizationTimeout || now - lastUpdateTime < config_.optimizationTimeout) { + const auto optState{optimizationState_.load(std::memory_order_acquire)}; + if (optState == OptimizationCompleted || cancelCommitCnt_.load(std::memory_order_relaxed)) { + return; + } + + using namespace std::chrono; + const int64_t now = duration_cast(system_clock::now().time_since_epoch()).count(); + if (now - lastUpdateTime < config_.optimizationTimeout) { return; } - const auto optState{optimizationState_.load(std::memory_order_acquire)}; - if (optState == OptimizationCompleted || cancelCommitCnt_.load(std::memory_order_relaxed)) return; const bool forceBuildAllIndexes = optState == NotOptimized; logPrintf(LogTrace, "Namespace::optimizeIndexes(%s) enter", name_); @@ -2141,13 +2144,13 @@ void NamespaceImpl::optimizeIndexes(const NsContext& ctx) { if (cancelCommitCnt_.load(std::memory_order_relaxed)) { logPrintf(LogTrace, "Namespace::optimizeIndexes(%s) done", name_); } else { - lastUpdateTime_.store(0, std::memory_order_release); logPrintf(LogTrace, "Namespace::optimizeIndexes(%s) was cancelled by concurent update", name_); } } void NamespaceImpl::markUpdated(bool forceOptimizeAllIndexes) { using namespace std::string_view_literals; + using namespace std::chrono; itemsCount_.store(items_.size(), std::memory_order_relaxed); itemsCapacity_.store(items_.capacity(), std::memory_order_relaxed); if (forceOptimizeAllIndexes) { @@ -2158,20 +2161,17 @@ void NamespaceImpl::markUpdated(bool forceOptimizeAllIndexes) { } queryCountCache_->Clear(); joinCache_->Clear(); - lastUpdateTime_.store( - std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(), - std::memory_order_release); + lastUpdateTime_.store(duration_cast(system_clock::now().time_since_epoch()).count(), std::memory_order_release); if (!nsIsLoading_) { repl_.updatedUnixNano = getTimeNow("nsec"sv); } } void NamespaceImpl::updateSelectTime() { - lastSelectTime_ = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + using namespace std::chrono; + lastSelectTime_ = duration_cast(system_clock::now().time_since_epoch()).count(); } -int64_t NamespaceImpl::getLastSelectTime() const { return lastSelectTime_; } - void NamespaceImpl::Select(QueryResults& result, SelectCtx& params, const RdxContext& ctx) { if (!params.query.IsWALQuery()) { NsSelecter selecter(this); diff --git a/cpp_src/core/namespace/namespaceimpl.h b/cpp_src/core/namespace/namespaceimpl.h index f05ab9b76..72f53cd46 100644 --- a/cpp_src/core/namespace/namespaceimpl.h +++ b/cpp_src/core/namespace/namespaceimpl.h @@ -281,12 +281,12 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. void OnConfigUpdated(DBConfigProvider &configProvider, const RdxContext &ctx); StorageOpts GetStorageOpts(const RdxContext &); std::shared_ptr GetSchemaPtr(const RdxContext &ctx) const; - int getNsNumber() const { return schema_ ? schema_->GetProtobufNsNumber() : 0; } + int getNsNumber() const noexcept { return schema_ ? schema_->GetProtobufNsNumber() : 0; } IndexesCacheCleaner GetIndexesCacheCleaner() { return IndexesCacheCleaner{*this}; } // Separate method for the v3/v4 replication compatibility. // It should not be used outside of this scenario void SetTagsMatcher(TagsMatcher &&tm, const RdxContext &ctx); - void SetDestroyFlag() { dbDestroyed_ = true; } + void SetDestroyFlag() noexcept { dbDestroyed_ = true; } Error FlushStorage(const RdxContext &ctx) { const auto flushOpts = StorageFlushOpts().WithImmediateReopen(); auto lck = rLock(ctx); @@ -405,6 +405,7 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. void getFromJoinCache(const Query &, JoinCacheRes &out) const; void getFromJoinCacheImpl(JoinCacheRes &out) const; void getIndsideFromJoinCache(JoinCacheRes &ctx) const; + int64_t lastUpdateTimeNano() const noexcept { return repl_.updatedUnixNano; } const FieldsSet &pkFields(); @@ -412,7 +413,6 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. void warmupFtIndexes(); void updateSelectTime(); - int64_t getLastSelectTime() const; void markReadOnly() { locker_.MarkReadOnly(); } Locker::WLockT wLock(const RdxContext &ctx) const { return locker_.WLock(ctx); } Locker::RLockT rLock(const RdxContext &ctx) const { return locker_.RLock(ctx); } diff --git a/cpp_src/core/nsselecter/btreeindexiterator.h b/cpp_src/core/nsselecter/btreeindexiterator.h index a805955a8..7a0b6bc25 100644 --- a/cpp_src/core/nsselecter/btreeindexiterator.h +++ b/cpp_src/core/nsselecter/btreeindexiterator.h @@ -10,8 +10,8 @@ namespace reindexer { template class BtreeIndexIterator final : public IndexIterator { public: - explicit BtreeIndexIterator(const T& idxMap) : idxMap_(idxMap), first_(idxMap.begin()), last_(idxMap.end()) {} - BtreeIndexIterator(const T& idxMap, const typename T::iterator& first, const typename T::iterator& last) + explicit BtreeIndexIterator(const T& idxMap) noexcept : idxMap_(idxMap), first_(idxMap.begin()), last_(idxMap.end()) {} + BtreeIndexIterator(const T& idxMap, const typename T::iterator& first, const typename T::iterator& last) noexcept : idxMap_(idxMap), first_(first), last_(last) {} ~BtreeIndexIterator() override final = default; diff --git a/cpp_src/core/nsselecter/explaincalc.cc b/cpp_src/core/nsselecter/explaincalc.cc index 642cade9c..4dfff967e 100644 --- a/cpp_src/core/nsselecter/explaincalc.cc +++ b/cpp_src/core/nsselecter/explaincalc.cc @@ -202,6 +202,16 @@ std::string ExplainCalc::GetJSON() { json.Put("postprocess_us"sv, To_us(postprocess_)); json.Put("loop_us"sv, To_us(loop_)); json.Put("general_sort_us"sv, To_us(sort_)); + if (!subqueries_.empty()) { + auto subQuries = json.Array("subqueries"); + for (const auto &sq : subqueries_) { + auto s = subQuries.Object(); + s.Put("namespace", sq.NsName()); + s.Raw("explain", sq.Explain()); + std::visit(overloaded{[&](size_t k) { s.Put("keys", k); }, [&](const std::string &f) { s.Put("field", f); }}, + sq.FieldOrKeys()); + } + } } json.Put("sort_index"sv, sortIndex_); json.Put("sort_by_uncommitted_index"sv, sortOptimization_); @@ -305,45 +315,6 @@ std::string SelectIteratorContainer::explainJSON(const_iterator begin, const_ite return name.str(); } -ExplainCalc::Duration ExplainCalc::lap() noexcept { - auto now = Clock::now(); - Duration d = now - last_point_; - last_point_ = now; - return d; -} - int ExplainCalc::To_us(const ExplainCalc::Duration &d) noexcept { return duration_cast(d).count(); } -void ExplainCalc::StartTiming() noexcept { - if (enabled_) lap(); -} - -void ExplainCalc::StopTiming() noexcept { - if (enabled_) total_ = preselect_ + prepare_ + select_ + postprocess_ + loop_; -} - -void ExplainCalc::AddPrepareTime() noexcept { - if (enabled_) prepare_ += lap(); -} - -void ExplainCalc::AddSelectTime() noexcept { - if (enabled_) select_ += lap(); -} - -void ExplainCalc::AddPostprocessTime() noexcept { - if (enabled_) postprocess_ += lap(); -} - -void ExplainCalc::AddLoopTime() noexcept { - if (enabled_) loop_ += lap(); -} - -void ExplainCalc::StartSort() noexcept { - if (enabled_) sort_start_point_ = Clock::now(); -} - -void ExplainCalc::StopSort() noexcept { - if (enabled_) sort_ = Clock::now() - sort_start_point_; -} - } // namespace reindexer diff --git a/cpp_src/core/nsselecter/explaincalc.h b/cpp_src/core/nsselecter/explaincalc.h index 837dfafde..ab23d8290 100644 --- a/cpp_src/core/nsselecter/explaincalc.h +++ b/cpp_src/core/nsselecter/explaincalc.h @@ -2,6 +2,7 @@ #include #include +#include #include #include "core/type_consts.h" @@ -17,6 +18,24 @@ struct ConditionInjection; typedef std::vector JoinedSelectors; typedef std::vector OnConditionInjections; +class SubQueryExplain { +public: + SubQueryExplain(const std::string& ns, std::string&& exp) : explain_{std::move(exp)}, namespace_{ns} {} + [[nodiscard]] const std::string& NsName() const& noexcept { return namespace_; } + [[nodiscard]] const auto& FieldOrKeys() const& noexcept { return fieldOrKeys_; } + [[nodiscard]] const std::string& Explain() const& noexcept { return explain_; } + void SetFieldOrKeys(std::variant&& fok) noexcept { fieldOrKeys_ = std::move(fok); } + + auto NsName() const&& = delete; + auto FieldOrKeys() const&& = delete; + auto Explain() const&& = delete; + +private: + std::string explain_; + std::string namespace_; + std::variant fieldOrKeys_{size_t(0)}; +}; + class ExplainCalc { public: typedef std::chrono::high_resolution_clock Clock; @@ -29,24 +48,40 @@ class ExplainCalc { ExplainCalc() = default; ExplainCalc(bool enable) noexcept : enabled_(enable) {} - void StartTiming() noexcept; - void StopTiming() noexcept; - - void AddPrepareTime() noexcept; - void AddSelectTime() noexcept; - void AddPostprocessTime() noexcept; - void AddLoopTime() noexcept; + void StartTiming() noexcept { + if (enabled_) lap(); + } + void StopTiming() noexcept { + if (enabled_) total_ = preselect_ + prepare_ + select_ + postprocess_ + loop_; + } + void AddPrepareTime() noexcept { + if (enabled_) prepare_ += lap(); + } + void AddSelectTime() noexcept { + if (enabled_) select_ += lap(); + } + void AddPostprocessTime() noexcept { + if (enabled_) postprocess_ += lap(); + } + void AddLoopTime() noexcept { + if (enabled_) loop_ += lap(); + } void AddIterations(int iters) noexcept { iters_ += iters; } - void StartSort() noexcept; - void StopSort() noexcept; + void StartSort() noexcept { + if (enabled_) sort_start_point_ = Clock::now(); + } + void StopSort() noexcept { + if (enabled_) sort_ = Clock::now() - sort_start_point_; + } void PutCount(int cnt) noexcept { count_ = cnt; } void PutSortIndex(std::string_view index) noexcept { sortIndex_ = index; } - void PutSelectors(const SelectIteratorContainer *qres) noexcept { selectors_ = qres; } - void PutJoinedSelectors(const JoinedSelectors *jselectors) noexcept { jselectors_ = jselectors; } + void PutSelectors(const SelectIteratorContainer* qres) noexcept { selectors_ = qres; } + void PutJoinedSelectors(const JoinedSelectors* jselectors) noexcept { jselectors_ = jselectors; } void SetPreselectTime(Duration preselectTime) noexcept { preselect_ = preselectTime; } - void PutOnConditionInjections(const OnConditionInjections *onCondInjections) noexcept { onInjections_ = onCondInjections; } + void PutOnConditionInjections(const OnConditionInjections* onCondInjections) noexcept { onInjections_ = onCondInjections; } void SetSortOptimization(bool enable) noexcept { sortOptimization_ = enable; } + void SetSubQueriesExplains(std::vector&& subQueriesExpl) noexcept { subqueries_ = std::move(subQueriesExpl); } void LogDump(int logLevel); std::string GetJSON(); @@ -59,14 +94,21 @@ class ExplainCalc { Duration Sort() const noexcept { return sort_; } size_t Iterations() const noexcept { return iters_; } - static int To_us(const Duration &d) noexcept; bool IsEnabled() const noexcept { return enabled_; } + static int To_us(const Duration &d) noexcept; + private: - Duration lap() noexcept; + Duration lap() noexcept { + const auto now = Clock::now(); + Duration d = now - last_point_; + last_point_ = now; + return d; + } time_point last_point_, sort_start_point_; - Duration total_, prepare_ = Duration::zero(); + Duration total_ = Duration::zero(); + Duration prepare_ = Duration::zero(); Duration preselect_ = Duration::zero(); Duration select_ = Duration::zero(); Duration postprocess_ = Duration::zero(); @@ -74,9 +116,10 @@ class ExplainCalc { Duration sort_ = Duration::zero(); std::string_view sortIndex_; - const SelectIteratorContainer *selectors_ = nullptr; - const JoinedSelectors *jselectors_ = nullptr; - const OnConditionInjections *onInjections_ = nullptr; ///< Optional + const SelectIteratorContainer* selectors_ = nullptr; + const JoinedSelectors* jselectors_ = nullptr; + const OnConditionInjections* onInjections_ = nullptr; ///< Optional + std::vector subqueries_; int iters_ = 0; int count_ = 0; diff --git a/cpp_src/core/nsselecter/fieldscomparator.cc b/cpp_src/core/nsselecter/fieldscomparator.cc index 2005861c8..1734a944c 100644 --- a/cpp_src/core/nsselecter/fieldscomparator.cc +++ b/cpp_src/core/nsselecter/fieldscomparator.cc @@ -41,8 +41,8 @@ class ArrayAdapter { [&](reindexer::KeyValueType::Double) noexcept { return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i)}; }, - [&](reindexer::KeyValueType::String) { - return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i), false}; + [&](reindexer::KeyValueType::String) noexcept { + return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i)}; }, [&](reindexer::KeyValueType::Bool) noexcept { return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i)}; }, [&](reindexer::KeyValueType::Int) noexcept { return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i)}; }, diff --git a/cpp_src/core/nsselecter/joinedselector.h b/cpp_src/core/nsselecter/joinedselector.h index 531a1c25e..ff002974f 100644 --- a/cpp_src/core/nsselecter/joinedselector.h +++ b/cpp_src/core/nsselecter/joinedselector.h @@ -72,7 +72,8 @@ class JoinedSelector { public: JoinedSelector(JoinType joinType, NamespaceImpl::Ptr leftNs, NamespaceImpl::Ptr rightNs, JoinCacheRes &&joinRes, Query &&itemQuery, QueryResults &result, const JoinedQuery &joinQuery, JoinPreResult::Ptr preResult, uint32_t joinedFieldIdx, - SelectFunctionsHolder &selectFunctions, uint32_t joinedSelectorsCount, bool inTransaction, const RdxContext &rdxCtx) + SelectFunctionsHolder &selectFunctions, uint32_t joinedSelectorsCount, bool inTransaction, int64_t lastUpdateTime, + const RdxContext &rdxCtx) : joinType_(joinType), called_(0), matched_(0), @@ -88,7 +89,8 @@ class JoinedSelector { joinedSelectorsCount_(joinedSelectorsCount), rdxCtx_(rdxCtx), optimized_(false), - inTransaction_{inTransaction} { + inTransaction_{inTransaction}, + lastUpdateTime_{lastUpdateTime} { #ifndef NDEBUG for (const auto &jqe : joinQuery_.joinEntries_) { assertrx_throw(jqe.FieldsHaveBeenSet()); @@ -105,6 +107,7 @@ class JoinedSelector { JoinType Type() const noexcept { return joinType_; } void SetType(JoinType type) noexcept { joinType_ = type; } const std::string &RightNsName() const noexcept { return itemQuery_.NsName(); } + int64_t LastUpdateTime() const noexcept { return lastUpdateTime_; } const JoinedQuery &JoinQuery() const noexcept { return joinQuery_; } int Called() const noexcept { return called_; } int Matched() const noexcept { return matched_; } @@ -136,8 +139,9 @@ class JoinedSelector { SelectFunctionsHolder &selectFunctions_; uint32_t joinedSelectorsCount_; const RdxContext &rdxCtx_; - bool optimized_{false}; - bool inTransaction_{false}; + bool optimized_ = false; + bool inTransaction_ = false; + int64_t lastUpdateTime_ = 0; }; using JoinedSelectors = std::vector; diff --git a/cpp_src/core/nsselecter/nsselecter.cc b/cpp_src/core/nsselecter/nsselecter.cc index 3a7c22bc0..9f69f7b0d 100644 --- a/cpp_src/core/nsselecter/nsselecter.cc +++ b/cpp_src/core/nsselecter/nsselecter.cc @@ -27,6 +27,7 @@ void NsSelecter::operator()(QueryResults &result, SelectCtx &ctx, const RdxConte auto &explain = ctx.explain; explain = ExplainCalc(ctx.query.GetExplain() || logLevel >= LogInfo); + explain.SetSubQueriesExplains(std::move(ctx.subQueriesExplains)); ActiveQueryScope queryScope(ctx, ns_->optimizationState_, explain, ns_->locker_.IsReadOnly(), ns_->strHolder_.get()); explain.SetPreselectTime(ctx.preResultTimeTotal); @@ -49,16 +50,20 @@ void NsSelecter::operator()(QueryResults &result, SelectCtx &ctx, const RdxConte QueryCacheKey ckey; if (aggregationQueryRef.CalcTotal() == ModeCachedTotal || containAggCountCached) { - ckey = QueryCacheKey{ctx.query}; + ckey = QueryCacheKey{ctx.query, kCountCachedKeyMode, ctx.joinedSelectors}; auto cached = ns_->queryCountCache_->Get(ckey); if (cached.valid && cached.val.total_count >= 0) { result.totalCount += cached.val.total_count; - logPrintf(LogTrace, "[%s] using value from cache: %d", ns_->name_, result.totalCount); + if (logLevel >= LogTrace) { + logPrintf(LogInfo, "[%s] using total count value from cache: %d", ns_->name_, result.totalCount); + } } else { needPutCachedTotal = cached.valid; - logPrintf(LogTrace, "[%s] value for cache will be calculated by query", ns_->name_); needCalcTotal = true; + if (logLevel >= LogTrace) { + logPrintf(LogTrace, "[%s] total count value for cache will be calculated by query", ns_->name_); + } } } diff --git a/cpp_src/core/nsselecter/nsselecter.h b/cpp_src/core/nsselecter/nsselecter.h index 6d4c87f34..329fa1e44 100644 --- a/cpp_src/core/nsselecter/nsselecter.h +++ b/cpp_src/core/nsselecter/nsselecter.h @@ -33,6 +33,7 @@ struct SelectCtx { const Query *parentQuery = nullptr; ExplainCalc explain; bool requiresCrashTracking = false; + std::vector subQueriesExplains; RX_ALWAYS_INLINE bool isMergeQuerySubQuery() const noexcept { return isMergeQuery == IsMergeQuery::Yes && parentQuery; } }; @@ -47,7 +48,7 @@ class NsSelecter { class JoinedNsValueGetter; public: - NsSelecter(NamespaceImpl *parent) : ns_(parent) {} + NsSelecter(NamespaceImpl *parent) noexcept : ns_(parent) {} void operator()(QueryResults &result, SelectCtx &ctx, const RdxContext &); diff --git a/cpp_src/core/nsselecter/selectiterator.cc b/cpp_src/core/nsselecter/selectiterator.cc index 05ab657be..481085245 100644 --- a/cpp_src/core/nsselecter/selectiterator.cc +++ b/cpp_src/core/nsselecter/selectiterator.cc @@ -7,9 +7,6 @@ namespace reindexer { -SelectIterator::SelectIterator(SelectKeyResult res, bool dist, std::string n, IteratorFieldKind fKind, bool forcedFirst) - : SelectKeyResult(std::move(res)), distinct(dist), name(std::move(n)), fieldKind(fKind), forcedFirst_(forcedFirst), type_(Forward) {} - void SelectIterator::Bind(const PayloadType &type, int field) { for (Comparator &cmp : comparators_) cmp.Bind(type, field); } @@ -18,62 +15,69 @@ void SelectIterator::Start(bool reverse, int maxIterations) { const bool explicitSort = applyDeferedSort(maxIterations); isReverse_ = reverse; - lastIt_ = begin(); + const auto begIt = begin(); + lastIt_ = begIt; - for (auto it = begin(); it != end(); it++) { + for (auto it = begIt, endIt = end(); it != endIt; ++it) { if (it->isRange_) { if (isReverse_) { - auto rrBegin = it->rEnd_ - 1; + const auto rrBegin = it->rEnd_ - 1; it->rrEnd_ = it->rBegin_ - 1; it->rrBegin_ = rrBegin; - it->rrIt_ = it->rrBegin_; + it->rrIt_ = rrBegin; } else { it->rIt_ = it->rBegin_; } } else { if (it->useBtree_) { - assertrx(it->set_); + assertrx_dbg(it->set_); if (reverse) { - it->setrbegin_ = it->set_->rbegin(); + const auto setRBegin = it->set_->rbegin(); + it->ritset_ = setRBegin; + it->setrbegin_ = setRBegin; it->setrend_ = it->set_->rend(); - it->ritset_ = it->set_->rbegin(); } else { - it->setbegin_ = it->set_->begin(); + const auto setBegin = it->set_->begin(); + it->itset_ = setBegin; + it->setbegin_ = setBegin; it->setend_ = it->set_->end(); - it->itset_ = it->setbegin_; } } else { if (isReverse_) { - it->rbegin_ = it->ids_.rbegin(); + const auto idsRBegin = it->ids_.rbegin(); it->rend_ = it->ids_.rend(); - it->rit_ = it->ids_.rbegin(); + it->rit_ = idsRBegin; + it->rbegin_ = idsRBegin; } else { - it->begin_ = it->ids_.begin(); + const auto idsBegin = it->ids_.begin(); it->end_ = it->ids_.end(); - it->it_ = it->ids_.begin(); + it->it_ = idsBegin; + it->begin_ = idsBegin; } } } } lastVal_ = isReverse_ ? INT_MAX : INT_MIN; - type_ = isReverse_ ? Reverse : Forward; - if (size() == 1 && begin()->indexForwardIter_) { + + if (size() == 0) { + type_ = OnlyComparator; + lastVal_ = isReverse_ ? INT_MIN : INT_MAX; + } else if (size() == 1 && begIt->indexForwardIter_) { type_ = UnbuiltSortOrdersIndex; - begin()->indexForwardIter_->Start(reverse); + begIt->indexForwardIter_->Start(reverse); } else if (isUnsorted) { type_ = Unsorted; } else if (size() == 1) { if (!isReverse_) { - type_ = begin()->isRange_ ? SingleRange : (explicitSort ? SingleIdSetWithDeferedSort : SingleIdset); + type_ = begIt->isRange_ ? SingleRange : (explicitSort ? SingleIdSetWithDeferedSort : SingleIdset); } else { - type_ = begin()->isRange_ ? RevSingleRange : (explicitSort ? RevSingleIdSetWithDeferedSort : RevSingleIdset); + type_ = begIt->isRange_ ? RevSingleRange : (explicitSort ? RevSingleIdSetWithDeferedSort : RevSingleIdset); } + } else { + type_ = isReverse_ ? Reverse : Forward; } - if (size() == 0) { - type_ = OnlyComparator; - lastVal_ = isReverse_ ? INT_MIN : INT_MAX; - } + ClearDistinct(); } @@ -81,7 +85,7 @@ void SelectIterator::Start(bool reverse, int maxIterations) { bool SelectIterator::nextFwd(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; int minVal = INT_MAX; - for (auto it = begin(); it != end(); it++) { + for (auto it = begin(), endIt = end(); it != endIt; ++it) { if (it->useBtree_) { if (it->itset_ != it->setend_) { it->itset_ = it->set_->upper_bound(lastVal_); @@ -100,7 +104,7 @@ bool SelectIterator::nextFwd(IdType minHint) noexcept { } } else if (!it->isRange_ && it->it_ != it->end_) { - for (; it->it_ != it->end_ && *it->it_ <= lastVal_; it->it_++) { + for (; it->it_ != it->end_ && *it->it_ <= lastVal_; ++it->it_) { } if (it->it_ != it->end_ && *it->it_ < minVal) { minVal = *it->it_; @@ -117,7 +121,7 @@ bool SelectIterator::nextRev(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; int maxVal = INT_MIN; - for (auto it = begin(); it != end(); it++) { + for (auto it = begin(), endIt = end(); it != endIt; ++it) { if (it->useBtree_ && it->ritset_ != it->setrend_) { for (; it->ritset_ != it->setrend_ && *it->ritset_ >= lastVal_; ++it->ritset_) { } @@ -133,7 +137,7 @@ bool SelectIterator::nextRev(IdType maxHint) noexcept { lastIt_ = it; } } else if (!it->isRange_ && !it->useBtree_ && it->rit_ != it->rend_) { - for (; it->rit_ != it->rend_ && *it->rit_ >= lastVal_; it->rit_++) { + for (; it->rit_ != it->rend_ && *it->rit_ >= lastVal_; ++it->rit_) { } if (it->rit_ != it->rend_ && *it->rit_ > maxVal) { maxVal = *it->rit_; @@ -160,7 +164,7 @@ bool SelectIterator::nextFwdSingleIdset(IdType minHint) noexcept { it->it_ = std::upper_bound(it->it_, it->end_, lastVal_); } } else { - for (; it->it_ != it->end_ && *it->it_ <= lastVal_; it->it_++) { + for (; it->it_ != it->end_ && *it->it_ <= lastVal_; ++it->it_) { } } lastVal_ = (it->it_ != it->end_) ? *it->it_ : INT_MAX; @@ -174,11 +178,11 @@ bool SelectIterator::nextRevSingleIdset(IdType maxHint) noexcept { auto it = begin(); if (it->useBtree_) { - for (; it->ritset_ != it->setrend_ && *it->ritset_ >= lastVal_; it->ritset_++) { + for (; it->ritset_ != it->setrend_ && *it->ritset_ >= lastVal_; ++it->ritset_) { } lastVal_ = (it->ritset_ != it->setrend_) ? *it->ritset_ : INT_MIN; } else { - for (; it->rit_ != it->rend_ && *it->rit_ >= lastVal_; it->rit_++) { + for (; it->rit_ != it->rend_ && *it->rit_ >= lastVal_; ++it->rit_) { } lastVal_ = (it->rit_ != it->rend_) ? *it->rit_ : INT_MIN; } @@ -192,41 +196,44 @@ bool SelectIterator::nextUnbuiltSortOrders() noexcept { return begin()->indexFor bool SelectIterator::nextFwdSingleRange(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; - if (lastVal_ < begin()->rBegin_) lastVal_ = begin()->rBegin_ - 1; + const auto begIt = begin(); + if (lastVal_ < begIt->rBegin_) lastVal_ = begIt->rBegin_ - 1; - lastVal_ = (lastVal_ < begin()->rEnd_) ? lastVal_ + 1 : begin()->rEnd_; - if (lastVal_ == begin()->rEnd_) lastVal_ = INT_MAX; + lastVal_ = (lastVal_ < begIt->rEnd_) ? lastVal_ + 1 : begIt->rEnd_; + if (lastVal_ == begIt->rEnd_) lastVal_ = INT_MAX; return (lastVal_ != INT_MAX); } bool SelectIterator::nextRevSingleRange(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; - if (lastVal_ > begin()->rrBegin_) lastVal_ = begin()->rrBegin_ + 1; + const auto begIt = begin(); + if (lastVal_ > begIt->rrBegin_) lastVal_ = begIt->rrBegin_ + 1; - lastVal_ = (lastVal_ > begin()->rrEnd_) ? lastVal_ - 1 : begin()->rrEnd_; - if (lastVal_ == begin()->rrEnd_) lastVal_ = INT_MIN; + lastVal_ = (lastVal_ > begIt->rrEnd_) ? lastVal_ - 1 : begIt->rrEnd_; + if (lastVal_ == begIt->rrEnd_) lastVal_ = INT_MIN; return (lastVal_ != INT_MIN); } // Unsorted next implementation bool SelectIterator::nextUnsorted() noexcept { - if (lastIt_ == end()) { + const auto endIt = end(); + if (lastIt_ == endIt) { return false; } else if (lastIt_->it_ == lastIt_->end_) { ++lastIt_; - while (lastIt_ != end()) { + while (lastIt_ != endIt) { if (lastIt_->it_ != lastIt_->end_) { lastVal_ = *lastIt_->it_; - lastIt_->it_++; + ++lastIt_->it_; return true; } ++lastIt_; } } else { lastVal_ = *lastIt_->it_; - lastIt_->it_++; + ++lastIt_->it_; return true; } @@ -236,8 +243,9 @@ bool SelectIterator::nextUnsorted() noexcept { void SelectIterator::ExcludeLastSet(const PayloadValue &value, IdType rowId, IdType properRowId) { for (auto &comp : comparators_) comp.ExcludeDistinct(value, properRowId); if (type_ == UnbuiltSortOrdersIndex) { - if (begin()->indexForwardIter_->Value() == rowId) { - begin()->indexForwardIter_->ExcludeLastSet(); + const auto begIt = begin(); + if (begIt->indexForwardIter_->Value() == rowId) { + begIt->indexForwardIter_->ExcludeLastSet(); } } else if (!End() && lastIt_ != end() && lastVal_ == rowId) { assertrx(!lastIt_->isRange_); @@ -284,12 +292,13 @@ double SelectIterator::Cost(int expectedIterations) const noexcept { // Comparatos with non index fields must have much higher cost, than comparators with index fields result = jsonPathComparators ? (8 * double(expectedIterations) + jsonPathComparators + 1) : (double(expectedIterations) + 1); } + const auto sz = size(); if (distinct) { - result += size(); + result += sz; } else if (type_ != SingleIdSetWithDeferedSort && type_ != RevSingleIdSetWithDeferedSort && !deferedExplicitSort) { - result += static_cast(GetMaxIterations()) * size(); + result += static_cast(GetMaxIterations()) * sz; } else { - result += static_cast(CostWithDefferedSort(size(), GetMaxIterations(), expectedIterations)); + result += static_cast(CostWithDefferedSort(sz, GetMaxIterations(), expectedIterations)); } return isNotOperation_ ? expectedIterations + result : result; } diff --git a/cpp_src/core/nsselecter/selectiterator.h b/cpp_src/core/nsselecter/selectiterator.h index 55286e183..ef4606b1e 100644 --- a/cpp_src/core/nsselecter/selectiterator.h +++ b/cpp_src/core/nsselecter/selectiterator.h @@ -24,7 +24,13 @@ class SelectIterator : public SelectKeyResult { }; SelectIterator() = default; - SelectIterator(SelectKeyResult res, bool distinct, std::string name, IteratorFieldKind fieldKind, bool forcedFirst = false); + SelectIterator(SelectKeyResult res, bool dist, std::string n, IteratorFieldKind fKind, bool forcedFirst = false) noexcept + : SelectKeyResult(std::move(res)), + distinct(dist), + name(std::move(n)), + fieldKind(fKind), + forcedFirst_(forcedFirst), + type_(Forward) {} /// Starts iteration process: prepares /// object for further work. diff --git a/cpp_src/core/nsselecter/sortingcontext.cc b/cpp_src/core/nsselecter/sortingcontext.cc deleted file mode 100644 index 65376dbd9..000000000 --- a/cpp_src/core/nsselecter/sortingcontext.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include "sortingcontext.h" -#include "core/index/index.h" -#include "core/query/query.h" - -namespace reindexer { - -Index *SortingContext::sortIndex() const noexcept { - if (entries.empty()) return nullptr; - return std::visit(overloaded{[](const OneOf &) noexcept -> Index * { return nullptr; }, - [](const FieldEntry &e) noexcept { return e.index; }}, - entries[0]); -} - -const Index *SortingContext::sortIndexIfOrdered() const noexcept { - if (entries.empty() || !isIndexOrdered() || !enableSortOrders) return nullptr; - return std::visit(overloaded{[](const OneOf &) noexcept -> Index * { return nullptr; }, - [](const FieldEntry &e) noexcept { return e.index; }}, - entries[0]); -} - -int SortingContext::sortId() const noexcept { - if (!enableSortOrders) return 0; - Index *sortIdx = sortIndex(); - return sortIdx ? sortIdx->SortId() : 0; -} - -bool SortingContext::isIndexOrdered() const noexcept { - if (entries.empty()) return false; - return std::visit(overloaded{[](const OneOf &) noexcept { return false; }, - [](const FieldEntry &e) noexcept { return e.index && e.index->IsOrdered(); }}, - entries[0]); -} - -bool SortingContext::isOptimizationEnabled() const noexcept { return (uncommitedIndex >= 0) && sortIndex(); } - -const SortingContext::Entry &SortingContext::getFirstColumnEntry() const noexcept { - assertrx(!entries.empty()); - return entries[0]; -} - -void SortingContext::resetOptimization() noexcept { - uncommitedIndex = -1; - if (!entries.empty()) { - std::visit( - overloaded{[](const OneOf &) noexcept {}, [](FieldEntry &e) noexcept { e.index = nullptr; }}, - entries[0]); - } -} - -SortingOptions::SortingOptions(const SortingContext &sortingContext) noexcept - : forcedMode{sortingContext.forcedMode}, - multiColumn{sortingContext.entries.size() > 1}, - haveExpression{!sortingContext.expressions.empty()} { - if (sortingContext.entries.empty()) { - usingGeneralAlgorithm = false; - byBtreeIndex = false; - } else { - std::visit(overloaded{[](const OneOf &) noexcept {}, - [&](const SortingContext::FieldEntry &sortEntry) noexcept { - if (sortEntry.index && sortEntry.index->IsOrdered()) { - byBtreeIndex = (sortingContext.isOptimizationEnabled() || sortingContext.enableSortOrders); - multiColumnByBtreeIndex = (byBtreeIndex && multiColumn); - } - usingGeneralAlgorithm = !byBtreeIndex; - }}, - sortingContext.entries[0]); - } -} - -bool SortingOptions::postLoopSortingRequired() const noexcept { - return multiColumn || usingGeneralAlgorithm || forcedMode || haveExpression; -} - -} // namespace reindexer diff --git a/cpp_src/core/nsselecter/sortingcontext.h b/cpp_src/core/nsselecter/sortingcontext.h index 22bccbcec..a246eb7d4 100644 --- a/cpp_src/core/nsselecter/sortingcontext.h +++ b/cpp_src/core/nsselecter/sortingcontext.h @@ -1,5 +1,6 @@ #pragma once +#include "core/index/index.h" #include "core/indexopts.h" #include "estl/h_vector.h" #include "sortexpression.h" @@ -27,13 +28,49 @@ struct SortingContext { }; using Entry = std::variant; - [[nodiscard]] int sortId() const noexcept; - [[nodiscard]] Index *sortIndex() const noexcept; - [[nodiscard]] const Index *sortIndexIfOrdered() const noexcept; - [[nodiscard]] bool isOptimizationEnabled() const noexcept; - [[nodiscard]] bool isIndexOrdered() const noexcept; - [[nodiscard]] const Entry &getFirstColumnEntry() const noexcept; - void resetOptimization() noexcept; + [[nodiscard]] int sortId() const noexcept { + if (!enableSortOrders) return 0; + const Index *sortIdx = sortIndex(); + return sortIdx ? int(sortIdx->SortId()) : 0; + } + [[nodiscard]] Index *sortIndex() const noexcept { + if (entries.empty()) return nullptr; + // get_if is truly noexcept, so using it instead of std::visit + if (const auto *fe = std::get_if(&entries[0]); fe) { + return fe->index; + } + return nullptr; + } + [[nodiscard]] const Index *sortIndexIfOrdered() const noexcept { + if (entries.empty() || !isIndexOrdered() || !enableSortOrders) return nullptr; + // get_if is truly noexcept, so using it instead of std::visit + if (const auto *fe = std::get_if(&entries[0]); fe) { + return fe->index; + } + return nullptr; + } + [[nodiscard]] bool isOptimizationEnabled() const noexcept { return (uncommitedIndex >= 0) && sortIndex(); } + [[nodiscard]] bool isIndexOrdered() const noexcept { + if (entries.empty()) return false; + // get_if is truly noexcept, so using it instead of std::visit + if (const auto *fe = std::get_if(&entries[0]); fe) { + return fe->index && fe->index->IsOrdered(); + } + return false; + } + [[nodiscard]] const Entry &getFirstColumnEntry() const noexcept { + assertrx(!entries.empty()); + return entries[0]; + } + void resetOptimization() noexcept { + uncommitedIndex = -1; + if (!entries.empty()) { + // get_if is truly noexcept, so using it instead of std::visit + if (auto *fe = std::get_if(&entries[0]); fe) { + fe->index = nullptr; + } + } + } bool enableSortOrders = false; h_vector entries; @@ -44,8 +81,27 @@ struct SortingContext { }; struct SortingOptions { - SortingOptions(const SortingContext &sortingContext) noexcept; - [[nodiscard]] bool postLoopSortingRequired() const noexcept; + SortingOptions(const SortingContext &sortingContext) noexcept + : forcedMode{sortingContext.forcedMode}, + multiColumn{sortingContext.entries.size() > 1}, + haveExpression{!sortingContext.expressions.empty()} { + if (sortingContext.entries.empty()) { + usingGeneralAlgorithm = false; + byBtreeIndex = false; + } else { + // get_if is truly noexcept, so using it instead of std::visit + if (auto *sortEntry = std::get_if(&sortingContext.entries[0]); sortEntry) { + if (sortEntry->index && sortEntry->index->IsOrdered()) { + byBtreeIndex = (sortingContext.isOptimizationEnabled() || sortingContext.enableSortOrders); + multiColumnByBtreeIndex = (byBtreeIndex && multiColumn); + } + usingGeneralAlgorithm = !byBtreeIndex; + } + } + } + [[nodiscard]] bool postLoopSortingRequired() const noexcept { + return multiColumn || usingGeneralAlgorithm || forcedMode || haveExpression; + } bool byBtreeIndex = false; bool usingGeneralAlgorithm = true; diff --git a/cpp_src/core/payload/payloadfieldvalue.h b/cpp_src/core/payload/payloadfieldvalue.h index 011b01ee8..b9dfcf94a 100644 --- a/cpp_src/core/payload/payloadfieldvalue.h +++ b/cpp_src/core/payload/payloadfieldvalue.h @@ -47,19 +47,36 @@ class PayloadFieldValue { abort(); }); } - Variant Get(bool enableHold = false) const { + Variant Get() noexcept { return Get(Variant::no_hold_t{}); } + template + Variant Get(HoldT h) const noexcept(noexcept(Variant(std::declval(), h))) { return t_.Type().EvaluateOneOf( [&](KeyValueType::Bool) noexcept { return Variant(*reinterpret_cast(p_)); }, [&](KeyValueType::Int) noexcept { return Variant(*reinterpret_cast(p_)); }, [&](KeyValueType::Int64) noexcept { return Variant(*reinterpret_cast(p_)); }, [&](KeyValueType::Double) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::String) { return Variant(*reinterpret_cast(p_), enableHold); }, + [&](KeyValueType::String) noexcept(noexcept(Variant(std::declval(), h))) { + return Variant(*reinterpret_cast(p_), h); + }, [&](KeyValueType::Uuid) noexcept { return Variant(*reinterpret_cast(p_)); }, [](OneOf) noexcept -> Variant { assertrx(0); abort(); }); } + // Variant Get(Variant::hold_t) const noexcept { + // return t_.Type().EvaluateOneOf( + // [&](KeyValueType::Bool) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::Int) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::Int64) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::Double) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::String) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::Uuid) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [](OneOf) noexcept -> Variant { + // assertrx(0); + // abort(); + // }); + // } size_t Hash() const noexcept { return t_.Type().EvaluateOneOf( [&](KeyValueType::Bool) noexcept { return std::hash()(*reinterpret_cast(p_)); }, diff --git a/cpp_src/core/payload/payloadiface.cc b/cpp_src/core/payload/payloadiface.cc index 9c6a0fa02..5e2cd5dea 100644 --- a/cpp_src/core/payload/payloadiface.cc +++ b/cpp_src/core/payload/payloadiface.cc @@ -10,8 +10,39 @@ namespace reindexer { +// Get element(s) by field index +template +void PayloadIface::Get(int field, VariantArray &keys, Variant::hold_t h) const { + get(field, keys, h); +} +template +void PayloadIface::Get(int field, VariantArray &keys) const { + get(field, keys, Variant::no_hold_t{}); +} + +// Get element by field and array index +template +Variant PayloadIface::Get(int field, int idx, Variant::hold_t h) const { + return get(field, idx, h); +} +template +Variant PayloadIface::Get(int field, int idx) const { + return get(field, idx, Variant::no_hold_t{}); +} + +// Get element(s) by field name template -void PayloadIface::Get(int field, VariantArray &keys, bool enableHold) const { +void PayloadIface::Get(std::string_view field, VariantArray &kvs, Variant::hold_t h) const { + get(t_.FieldByName(field), kvs, h); +} +template +void PayloadIface::Get(std::string_view field, VariantArray &kvs) const { + get(t_.FieldByName(field), kvs, Variant::no_hold_t{}); +} + +template +template +void PayloadIface::get(int field, VariantArray &keys, HoldT h) const { assertrx(field < NumFields()); keys.clear(); if (t_.Field(field).IsArray()) { @@ -20,15 +51,16 @@ void PayloadIface::Get(int field, VariantArray &keys, bool enableHold) const for (int i = 0; i < arr->len; i++) { PayloadFieldValue pv(t_.Field(field), v_->Ptr() + arr->offset + i * t_.Field(field).ElemSizeof()); - keys.push_back(pv.Get(enableHold)); + keys.push_back(pv.Get(h)); } } else { - keys.push_back(Field(field).Get(enableHold)); + keys.push_back(Field(field).Get(h)); } } template -Variant PayloadIface::Get(int field, int idx, bool enableHold) const { +template +Variant PayloadIface::get(int field, int idx, HoldT h) const { assertrx(field < NumFields()); if (t_.Field(field).IsArray()) { @@ -36,20 +68,13 @@ Variant PayloadIface::Get(int field, int idx, bool enableHold) const { assertf(idx < arr->len, "Field '%s.%s' bound exceed idx %d > len %d", Type().Name(), Type().Field(field).Name(), idx, arr->len); PayloadFieldValue pv(t_.Field(field), v_->Ptr() + arr->offset + idx * t_.Field(field).ElemSizeof()); - return pv.Get(enableHold); - + return pv.Get(h); } else { assertf(idx == 0, "Field '%s.%s' is not array, can't get idx %d", Type().Name(), Type().Field(field).Name(), idx); - return Field(field).Get(enableHold); + return Field(field).Get(h); } } -// Get element(s) by field index -template -void PayloadIface::Get(std::string_view field, VariantArray &kvs, bool enableHold) const { - Get(t_.FieldByName(field), kvs, enableHold); -} - template void PayloadIface::GetByJsonPath(std::string_view jsonPath, TagsMatcher &tagsMatcher, VariantArray &kvs, KeyValueType expectedType) const { diff --git a/cpp_src/core/payload/payloadiface.h b/cpp_src/core/payload/payloadiface.h index b95f6a791..d1931b3f0 100644 --- a/cpp_src/core/payload/payloadiface.h +++ b/cpp_src/core/payload/payloadiface.h @@ -27,9 +27,11 @@ class PayloadIface { void Reset() noexcept { memset(v_->Ptr(), 0, t_.TotalSize()); } // Get element(s) by field index - void Get(int field, VariantArray &, bool enableHold = false) const; + void Get(int field, VariantArray &, Variant::hold_t) const; + void Get(int field, VariantArray &) const; // Get element by field and array index - [[nodiscard]] Variant Get(int field, int idx, bool enableHold = false) const; + [[nodiscard]] Variant Get(int field, int idx, Variant::hold_t) const; + [[nodiscard]] Variant Get(int field, int idx) const; // Get array as span of typed elements template @@ -105,8 +107,9 @@ class PayloadIface { template ::value>::type * = nullptr> T CopyTo(PayloadType t, bool newFields = true); - // Get element(s) by field index - void Get(std::string_view field, VariantArray &, bool enableHold = false) const; + // Get element(s) by field name + void Get(std::string_view field, VariantArray &, Variant::hold_t) const; + void Get(std::string_view field, VariantArray &) const; // Get element(s) by json path void GetByJsonPath(std::string_view jsonPath, TagsMatcher &tagsMatcher, VariantArray &, KeyValueType expectedType) const; @@ -163,6 +166,7 @@ class PayloadIface { void GetJSON(const TagsMatcher &tm, WrSerializer &ser); private: + enum class HoldPolicy : bool { Hold, NoHold }; template ::value>::type * = nullptr> T CopyWithNewOrUpdatedFields(PayloadType t); @@ -174,6 +178,12 @@ class PayloadIface { void getByJsonPath(const P &path, VariantArray &, KeyValueType expectedType) const; template ::value>::type * = nullptr> void setArray(int field, const VariantArray &keys, bool append); + template + void get(int field, VariantArray &, HoldT h) const; + template + [[nodiscard]] Variant get(int field, int idx, HoldT h) const; + template + void get(std::string_view field, VariantArray &, HoldT h) const; // Array of elements types , not owning const PayloadTypeImpl &t_; diff --git a/cpp_src/core/payload/payloadvalue.h b/cpp_src/core/payload/payloadvalue.h index b94ba1198..87772037b 100644 --- a/cpp_src/core/payload/payloadvalue.h +++ b/cpp_src/core/payload/payloadvalue.h @@ -54,8 +54,8 @@ class PayloadValue { void Resize(size_t oldSize, size_t newSize); // Get data pointer uint8_t *Ptr() const noexcept { return p_ + sizeof(dataHeader); } - void SetLSN(int64_t lsn) { header()->lsn = lsn; } - int64_t GetLSN() const { return p_ ? header()->lsn : 0; } + void SetLSN(int64_t lsn) noexcept { header()->lsn = lsn; } + int64_t GetLSN() const noexcept { return p_ ? header()->lsn : 0; } bool IsFree() const noexcept { return bool(p_ == nullptr); } void Free() noexcept { release(); } size_t GetCapacity() const noexcept { return header()->cap; } diff --git a/cpp_src/core/query/query.cc b/cpp_src/core/query/query.cc index c8e5fe0fa..b4e37c837 100644 --- a/cpp_src/core/query/query.cc +++ b/cpp_src/core/query/query.cc @@ -293,7 +293,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { Debug(ser.GetVarUint()); break; case QueryStrictMode: - strictMode_ = StrictMode(ser.GetVarUint()); + Strict(StrictMode(ser.GetVarUint())); break; case QueryLimit: count_ = ser.GetVarUint(); @@ -305,7 +305,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { calcTotal_ = CalcTotalMode(ser.GetVarUint()); break; case QuerySelectFilter: - selectFilter_.push_back(std::string(ser.GetVString())); + selectFilter_.emplace_back(ser.GetVString()); break; case QueryEqualPosition: { const unsigned bracketPosition = ser.GetVarUint(); @@ -315,16 +315,16 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { break; } case QueryExplain: - explain_ = true; + Explain(true); break; case QueryWithRank: withRank_ = true; break; case QuerySelectFunction: - selectFunctions_.push_back(std::string(ser.GetVString())); + selectFunctions_.emplace_back(ser.GetVString()); break; case QueryDropField: { - Drop(std::string(ser.GetVString())); + Drop(ser.GetVString()); break; } case QueryUpdateFieldV2: { @@ -416,35 +416,39 @@ void Query::Serialize(WrSerializer &ser, uint8_t mode) const { ser.PutVString(NsName()); entries_.Serialize(ser, subQueries_); - for (const auto &agg : aggregations_) { - ser.PutVarUint(QueryAggregation); - ser.PutVarUint(agg.Type()); - ser.PutVarUint(agg.Fields().size()); - for (const auto &field : agg.Fields()) { - ser.PutVString(field); - } - for (const auto &se : agg.Sorting()) { - ser.PutVarUint(QueryAggregationSort); - ser.PutVString(se.expression); - ser.PutVarUint(se.desc); - } - if (agg.Limit() != QueryEntry::kDefaultLimit) { - ser.PutVarUint(QueryAggregationLimit); - ser.PutVarUint(agg.Limit()); - } - if (agg.Offset() != QueryEntry::kDefaultOffset) { - ser.PutVarUint(QueryAggregationOffset); - ser.PutVarUint(agg.Offset()); + if (!(mode & SkipAggregations)) { + for (const auto &agg : aggregations_) { + ser.PutVarUint(QueryAggregation); + ser.PutVarUint(agg.Type()); + ser.PutVarUint(agg.Fields().size()); + for (const auto &field : agg.Fields()) { + ser.PutVString(field); + } + for (const auto &se : agg.Sorting()) { + ser.PutVarUint(QueryAggregationSort); + ser.PutVString(se.expression); + ser.PutVarUint(se.desc); + } + if (agg.Limit() != QueryEntry::kDefaultLimit) { + ser.PutVarUint(QueryAggregationLimit); + ser.PutVarUint(agg.Limit()); + } + if (agg.Offset() != QueryEntry::kDefaultOffset) { + ser.PutVarUint(QueryAggregationOffset); + ser.PutVarUint(agg.Offset()); + } } } - for (const auto &sortginEntry : sortingEntries_) { - ser.PutVarUint(QuerySortIndex); - ser.PutVString(sortginEntry.expression); - ser.PutVarUint(sortginEntry.desc); - int cnt = forcedSortOrder_.size(); - ser.PutVarUint(cnt); - for (auto &kv : forcedSortOrder_) ser.PutVariant(kv); + if (!(mode & SkipSortEntries)) { + for (const auto &sortingEntry : sortingEntries_) { + ser.PutVarUint(QuerySortIndex); + ser.PutVString(sortingEntry.expression); + ser.PutVarUint(sortingEntry.desc); + int cnt = forcedSortOrder_.size(); + ser.PutVarUint(cnt); + for (auto &kv : forcedSortOrder_) ser.PutVariant(kv); + } } if (mode & WithJoinEntries) { @@ -475,12 +479,14 @@ void Query::Serialize(WrSerializer &ser, uint8_t mode) const { } } - ser.PutVarUint(QueryDebugLevel); - ser.PutVarUint(debugLevel_); + if (!(mode & SkipExtraParams)) { + ser.PutVarUint(QueryDebugLevel); + ser.PutVarUint(debugLevel_); - if (strictMode_ != StrictModeNotSet) { - ser.PutVarUint(QueryStrictMode); - ser.PutVarUint(int(strictMode_)); + if (strictMode_ != StrictModeNotSet) { + ser.PutVarUint(QueryStrictMode); + ser.PutVarUint(int(strictMode_)); + } } if (!(mode & SkipLimitOffset)) { @@ -494,22 +500,24 @@ void Query::Serialize(WrSerializer &ser, uint8_t mode) const { } } - if (HasCalcTotal()) { - ser.PutVarUint(QueryReqTotal); - ser.PutVarUint(CalcTotal()); - } + if (!(mode & SkipExtraParams)) { + if (HasCalcTotal()) { + ser.PutVarUint(QueryReqTotal); + ser.PutVarUint(CalcTotal()); + } - for (const auto &sf : selectFilter_) { - ser.PutVarUint(QuerySelectFilter); - ser.PutVString(sf); - } + for (const auto &sf : selectFilter_) { + ser.PutVarUint(QuerySelectFilter); + ser.PutVString(sf); + } - if (explain_) { - ser.PutVarUint(QueryExplain); - } + if (explain_) { + ser.PutVarUint(QueryExplain); + } - if (withRank_) { - ser.PutVarUint(QueryWithRank); + if (withRank_) { + ser.PutVarUint(QueryWithRank); + } } for (const auto &field : updateFields_) { @@ -534,15 +542,17 @@ void Query::Serialize(WrSerializer &ser, uint8_t mode) const { if (!(mode & SkipJoinQueries)) { for (const auto &jq : joinQueries_) { - ser.PutVarUint(static_cast(jq.joinType)); - jq.Serialize(ser, WithJoinEntries); + if (!(mode & SkipLeftJoinQueries) || jq.joinType != JoinType::LeftJoin) { + ser.PutVarUint(static_cast(jq.joinType)); + jq.Serialize(ser, WithJoinEntries); + } } } if (!(mode & SkipMergeQueries)) { for (const auto &mq : mergeQueries_) { ser.PutVarUint(static_cast(mq.joinType)); - mq.Serialize(ser, mode | WithJoinEntries); + mq.Serialize(ser, (mode | WithJoinEntries) & (~SkipSortEntries)); } } } diff --git a/cpp_src/core/query/query.h b/cpp_src/core/query/query.h index bad029f06..f4d5f6c3f 100644 --- a/cpp_src/core/query/query.h +++ b/cpp_src/core/query/query.h @@ -86,13 +86,15 @@ class Query { /// @param cond - type of condition. /// @param val - value of index to be compared with. /// @return Query object ready to be executed. - template > * = nullptr> - Query &Where(Str &&field, CondType cond, Input val) & { - return Where(std::forward(field), cond, {std::forward(val)}); + template > * = nullptr, + std::enable_if_t> * = nullptr> + Query &Where(Str &&field, CondType cond, Input &&val) & { + return Where(std::forward(field), cond, VariantArray{Variant{std::forward(val)}}); } - template > * = nullptr> - [[nodiscard]] Query &&Where(Str &&field, CondType cond, Input val) && { - return std::move(Where(std::forward(field), cond, {std::move(val)})); + template > * = nullptr, + std::enable_if_t> * = nullptr> + [[nodiscard]] Query &&Where(Str &&field, CondType cond, Input &&val) && { + return std::move(Where(std::forward(field), cond, VariantArray{Variant{std::forward(val)}})); } /// Adds a condition with several values. Analog to sql Where clause. @@ -220,6 +222,7 @@ class Query { } else { q.checkSubQueryWithData(); if (!q.selectFilter_.empty() && !q.HasLimit() && !q.HasOffset()) { + // Transforms main query condition into subquerie's condition q.sortingEntries_.clear(); q.Where(q.selectFilter_[0], cond, std::move(values)); q.selectFilter_.clear(); @@ -249,6 +252,14 @@ class Query { [[nodiscard]] Query &&Where(Query &&q, CondType cond, std::initializer_list values) && { return std::move(Where(std::move(q), cond, VariantArray::Create(values))); } + template > * = nullptr> + [[nodiscard]] Query &Where(Query &&q, CondType cond, Input &&val) & { + return Where(std::move(q), cond, VariantArray{Variant{std::forward(val)}}); + } + template > * = nullptr> + [[nodiscard]] Query &&Where(Query &&q, CondType cond, Input &&val) && { + return std::move(Where(std::move(q), cond, VariantArray{Variant{std::forward(val)}})); + } template > * = nullptr> Query &Where(Str &&field, CondType cond, Query &&q) & { diff --git a/cpp_src/core/query/sql/sqlencoder.cc b/cpp_src/core/query/sql/sqlencoder.cc index 6a745bf1c..b90f97685 100644 --- a/cpp_src/core/query/sql/sqlencoder.cc +++ b/cpp_src/core/query/sql/sqlencoder.cc @@ -56,7 +56,7 @@ namespace reindexer { void SQLEncoder::DumpSingleJoinQuery(size_t idx, WrSerializer &ser, bool stripArgs) const { assertrx(idx < query_.GetJoinQueries().size()); const auto &jq = query_.GetJoinQueries()[idx]; - ser << ' ' << jq.joinType; + ser << jq.joinType; if (jq.Entries().Empty() && !jq.HasLimit() && jq.sortingEntries_.empty()) { ser << ' ' << jq.NsName() << " ON "; } else { @@ -82,6 +82,7 @@ void SQLEncoder::DumpSingleJoinQuery(size_t idx, WrSerializer &ser, bool stripAr void SQLEncoder::dumpJoined(WrSerializer &ser, bool stripArgs) const { for (size_t i = 0; i < query_.GetJoinQueries().size(); ++i) { if (query_.GetJoinQueries()[i].joinType == JoinType::LeftJoin) { + ser << ' '; DumpSingleJoinQuery(i, ser, stripArgs); } } @@ -95,7 +96,7 @@ void SQLEncoder::dumpMerged(WrSerializer &ser, bool stripArgs) const { } } -std::string escapeQuotes(std::string str) { +static std::string escapeQuotes(std::string str) { for (size_t i = 0; i < str.size(); ++i) { if (str[i] == '\'' && (i == 0 || str[i - 1] != '\\')) str.insert(i++, 1, '\\'); } diff --git a/cpp_src/core/query/sql/sqlparser.cc b/cpp_src/core/query/sql/sqlparser.cc index fc13f00b3..a17077266 100644 --- a/cpp_src/core/query/sql/sqlparser.cc +++ b/cpp_src/core/query/sql/sqlparser.cc @@ -23,13 +23,13 @@ Query SQLParser::Parse(std::string_view q) { bool SQLParser::reachedAutocompleteToken(tokenizer &parser, const token &tok) { size_t pos = parser.getPos() + tok.text().length(); - return (pos > ctx_.suggestionsPos); + return pos > ctx_.suggestionsPos; } -token SQLParser::peekSqlToken(tokenizer &parser, int tokenType, bool toLower) { +token SQLParser::peekSqlToken(tokenizer &parser, SqlTokenType tokenType, bool toLower) { token tok = parser.peek_token(toLower ? tokenizer::flags::to_lower : tokenizer::flags::no_flags); - bool eof = ((parser.getPos() + tok.text().length()) == parser.length()); - if (ctx_.autocompleteMode && !tok.text().empty() && reachedAutocompleteToken(parser, tok)) { + const bool eof = ((parser.getPos() + tok.text().length()) == parser.length()); + if (ctx_.autocompleteMode && reachedAutocompleteToken(parser, tok)) { size_t tokenLen = 0; if (ctx_.suggestionsPos >= parser.getPos()) { tokenLen = ctx_.suggestionsPos - parser.getPos() + 1; @@ -47,6 +47,10 @@ token SQLParser::peekSqlToken(tokenizer &parser, int tokenType, bool toLower) { int SQLParser::Parse(tokenizer &parser) { parser.skip_space(); + if (parser.length() == 0) { + ctx_.suggestions.emplace_back(std::string(), Start); + return 0; + } token tok = peekSqlToken(parser, Start); if (tok.text() == "explain"sv) { query_.Explain(true); @@ -57,7 +61,7 @@ int SQLParser::Parse(tokenizer &parser) { if (tok.text() == "select"sv) { query_.type_ = QuerySelect; parser.next_token(); - selectParse(parser); + selectParse(parser); } else if (tok.text() == "delete"sv) { query_.type_ = QueryDelete; tok = parser.next_token(); @@ -82,15 +86,16 @@ int SQLParser::Parse(tokenizer &parser) { return 0; } +template int SQLParser::selectParse(tokenizer &parser) { // Get filter token tok; bool wasSelectFilter = false; std::vector selectFilters; - while (!parser.end()) { + while (true) { auto nameWithCase = peekSqlToken(parser, SingleSelectFieldSqlToken, false); auto name = parser.next_token(); - tok = peekSqlToken(parser, SelectFieldsListSqlToken); + tok = peekSqlToken(parser, FromSqlToken); if (tok.text() == "("sv) { parser.next_token(); tok = peekSqlToken(parser, SingleSelectFieldSqlToken); @@ -165,7 +170,7 @@ int SQLParser::selectParse(tokenizer &parser) { throw Error(errParams, "Expected ')', but found %s, %s", tok.text(), parser.where()); } parser.next_token(); - tok = peekSqlToken(parser, SelectFieldsListSqlToken); + tok = peekSqlToken(parser, FromSqlToken); } else if (name.text() != "*"sv) { if (!query_.CanAddSelectFilter()) { @@ -190,18 +195,19 @@ int SQLParser::selectParse(tokenizer &parser) { } peekSqlToken(parser, FromSqlToken); - if (parser.next_token().text() != "from"sv) + if (parser.next_token().text() != "from"sv) { throw Error(errParams, "Expected 'FROM', but found '%s' in query, %s", tok.text(), parser.where()); + } peekSqlToken(parser, NamespaceSqlToken); query_.SetNsName(parser.next_token().text()); ctx_.updateLinkedNs(query_.NsName()); - while (!parser.end()) { - tok = peekSqlToken(parser, SelectConditionsStart); + do { + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedSelectConditionsStart : SelectConditionsStart); if (tok.text() == "where"sv) { parser.next_token(); - parseWhere(parser); + parseWhere(parser); } else if (tok.text() == "limit"sv) { parser.next_token(); tok = parser.next_token(); @@ -218,40 +224,44 @@ int SQLParser::selectParse(tokenizer &parser) { parser.next_token(); parseOrderBy(parser, query_.sortingEntries_, query_.forcedSortOrder_); ctx_.updateLinkedNs(query_.NsName()); - } else if (tok.text() == "join"sv) { - parser.next_token(); - parseJoin(JoinType::LeftJoin, parser); - } else if (tok.text() == "left"sv) { - parser.next_token(); - peekSqlToken(parser, LeftSqlToken); - if (parser.next_token().text() != "join"sv) { - throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); - } - parseJoin(JoinType::LeftJoin, parser); - } else if (tok.text() == "inner"sv) { - parser.next_token(); - peekSqlToken(parser, InnerSqlToken); - if (parser.next_token().text() != "join") { - throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } else if constexpr (nested == Nested::No) { + if (tok.text() == "join"sv) { + parser.next_token(); + parseJoin(JoinType::LeftJoin, parser); + } else if (tok.text() == "left"sv) { + parser.next_token(); + peekSqlToken(parser, LeftSqlToken); + if (parser.next_token().text() != "join"sv) { + throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } + parseJoin(JoinType::LeftJoin, parser); + } else if (tok.text() == "inner"sv) { + parser.next_token(); + peekSqlToken(parser, InnerSqlToken); + if (parser.next_token().text() != "join") { + throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } + auto jtype = (query_.NextOp() == OpOr) ? JoinType::OrInnerJoin : JoinType::InnerJoin; + query_.And(); + parseJoin(jtype, parser); + } else if (tok.text() == "merge"sv) { + parser.next_token(); + parseMerge(parser); + } else if (tok.text() == "or"sv) { + parser.next_token(); + query_.Or(); + } else { + break; } - auto jtype = (query_.NextOp() == OpOr) ? JoinType::OrInnerJoin : JoinType::InnerJoin; - query_.And(); - parseJoin(jtype, parser); - } else if (tok.text() == "merge"sv) { - parser.next_token(); - parseMerge(parser); - } else if (tok.text() == "or"sv) { - parser.next_token(); - query_.Or(); } else { break; } - } + } while (!parser.end()); return 0; } template -static void MoveAppend(T &dst, T &src) { +static void moveAppend(T &dst, T &src) { if (dst.empty()) { dst = std::move(src); } else { @@ -261,16 +271,20 @@ static void MoveAppend(T &dst, T &src) { } } -int SQLParser::nestedSelectParse(SQLParser &parser, tokenizer &tok) { - try { - int res = parser.selectParse(tok); - MoveAppend(ctx_.suggestions, parser.ctx_.suggestions); - return res; - } catch (...) { - MoveAppend(ctx_.suggestions, parser.ctx_.suggestions); - throw; +class SQLParser::ParserContextsAppendGuard { +public: + ParserContextsAppendGuard(SqlParsingCtx &mainCtx, SqlParsingCtx &nestedCtx) noexcept : mainCtx_{mainCtx}, nestedCtx_{nestedCtx} {} + ~ParserContextsAppendGuard() { + moveAppend(mainCtx_.suggestions, nestedCtx_.suggestions); + if (!mainCtx_.foundPossibleSuggestions && nestedCtx_.foundPossibleSuggestions) { + mainCtx_.suggestionLinkedNs = std::move(nestedCtx_.suggestionLinkedNs); + } } -} + +private: + SqlParsingCtx &mainCtx_; + SqlParsingCtx &nestedCtx_; +}; static KeyValueType detectValueType(const token &currTok) { const std::string_view val = currTok.text(); @@ -412,30 +426,8 @@ int SQLParser::deleteParse(tokenizer &parser) { query_.SetNsName(parser.next_token().text()); ctx_.updateLinkedNs(query_.NsName()); - while (!parser.end()) { - tok = peekSqlToken(parser, DeleteConditionsStart); - if (tok.text() == "where"sv) { - parser.next_token(); - parseWhere(parser); - } else if (tok.text() == "limit"sv) { - parser.next_token(); - tok = parser.next_token(); - if (tok.type != TokenNumber) - throw Error(errParseSQL, "Expected number, but found '%s' in query, %s", tok.text(), parser.where()); - query_.Limit(stoi(tok.text())); - } else if (tok.text() == "offset"sv) { - parser.next_token(); - tok = parser.next_token(); - if (tok.type != TokenNumber) - throw Error(errParseSQL, "Expected number, but found '%s' in query, %s", tok.text(), parser.where()); - query_.Offset(stoi(tok.text())); - } else if (tok.text() == "order"sv) { - parser.next_token(); - parseOrderBy(parser, query_.sortingEntries_, query_.forcedSortOrder_); - ctx_.updateLinkedNs(query_.NsName()); - } else - break; - } + parseModifyConditions(parser); + return 0; } @@ -463,8 +455,9 @@ static void addUpdateValue(const token &currTok, tokenizer &parser, UpdateEntry auto eof = [](tokenizer &parser, bool &inArray) -> bool { if (parser.end()) return true; token nextTok = parser.peek_token(); - bool result = - (nextTok.text() == "where"sv) || (!inArray && nextTok.text() == "]"sv) || (!inArray && nextTok.text() == ","sv); + bool result = (nextTok.text() == "where"sv) || (nextTok.text() == "order"sv) || (nextTok.text() == "limit"sv) || + (nextTok.text() == "offset"sv) || (!inArray && nextTok.text() == "]"sv) || + (!inArray && nextTok.text() == ","sv); if (nextTok.text() == "["sv && !inArray) inArray = true; if (nextTok.text() == "]"sv && inArray) inArray = false; return result; @@ -581,15 +574,39 @@ int SQLParser::updateParse(tokenizer &parser) { throw Error(errParseSQL, "Expected 'SET' or 'DROP' but found '%s' in query %s", tok.text(), parser.where()); } - tok = peekSqlToken(parser, WhereSqlToken); - if (tok.text() == "where"sv) { - parser.next_token(); - parseWhere(parser); - } + parseModifyConditions(parser); return 0; } +void SQLParser::parseModifyConditions(tokenizer &parser) { + while (!parser.end()) { + auto tok = peekSqlToken(parser, ModifyConditionsStart); + if (tok.text() == "where"sv) { + parser.next_token(); + parseWhere(parser); + } else if (tok.text() == "limit"sv) { + parser.next_token(); + tok = parser.next_token(); + if (tok.type != TokenNumber) + throw Error(errParseSQL, "Expected number, but found '%s' in query, %s", tok.text(), parser.where()); + query_.Limit(stoi(tok.text())); + } else if (tok.text() == "offset"sv) { + parser.next_token(); + tok = parser.next_token(); + if (tok.type != TokenNumber) + throw Error(errParseSQL, "Expected number, but found '%s' in query, %s", tok.text(), parser.where()); + query_.Offset(stoi(tok.text())); + } else if (tok.text() == "order"sv) { + parser.next_token(); + parseOrderBy(parser, query_.sortingEntries_, query_.forcedSortOrder_); + ctx_.updateLinkedNs(query_.NsName()); + } else { + break; + } + } +} + int SQLParser::truncateParse(tokenizer &parser) { parser.next_token(); token tok = peekSqlToken(parser, NamespaceSqlToken); @@ -599,7 +616,7 @@ int SQLParser::truncateParse(tokenizer &parser) { return 0; } -bool isCondition(std::string_view text) noexcept { +static bool isCondition(std::string_view text) noexcept { return text == "="sv || text == "=="sv || text == "<>"sv || iequals(text, "is"sv) || text == ">"sv || text == ">="sv || text == "<"sv || text == "<="sv || iequals(text, "in"sv) || iequals(text, "range"sv) || iequals(text, "like"sv) || iequals(text, "allset"sv); } @@ -607,13 +624,16 @@ bool isCondition(std::string_view text) noexcept { Query SQLParser::parseSubQuery(tokenizer &parser) { Query subquery; SQLParser subparser(subquery); + const ParserContextsAppendGuard guard{ctx_, subparser.ctx_}; if (ctx_.autocompleteMode) { subparser.ctx_.suggestionsPos = ctx_.suggestionsPos; subparser.ctx_.autocompleteMode = true; + subparser.ctx_.foundPossibleSuggestions = ctx_.foundPossibleSuggestions; + subparser.ctx_.possibleSuggestionDetectedInThisClause = ctx_.possibleSuggestionDetectedInThisClause; } // skip select auto tok = parser.next_token(); - nestedSelectParse(subparser, parser); + subparser.selectParse(parser); tok = parser.next_token(); if (tok.text() != ")"sv) { throw Error(errParseSQL, "Expected ')', but found %s, %s", tok.text(), parser.where()); @@ -654,7 +674,8 @@ void SQLParser::parseWhereCondition(tokenizer &parser, T &&firstArg, OpType op) tok = parser.next_token(false); } else if (tok.text() == "("sv) { if constexpr (!std::is_same_v) { - if (iequals(parser.peek_token().text(), "select"sv) && !isCondition(parser.peek_second_token().text())) { + if (iequals(peekSqlToken(parser, WhereFieldValueOrSubquerySqlToken, false).text(), "select"sv) && + !isCondition(parser.peek_second_token().text())) { query_.NextOp(op).Where(std::forward(firstArg), condition, parseSubQuery(parser)); return; } @@ -682,11 +703,12 @@ void SQLParser::parseWhereCondition(tokenizer &parser, T &&firstArg, OpType op) } } +template int SQLParser::parseWhere(tokenizer &parser) { token tok; OpType nextOp = OpAnd; - tok = peekSqlToken(parser, WhereFieldSqlToken, false); + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldSqlToken, false); if (iequals(tok.text(), "not"sv)) { nextOp = OpNot; @@ -696,14 +718,11 @@ int SQLParser::parseWhere(tokenizer &parser) { size_t lastBracketPosition = 0; int openBracketsCount = 0; while (!parser.end()) { - tok = peekSqlToken(parser, WhereFieldSqlToken, false); + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldSqlToken, false); parser.next_token(false); if (tok.text() == "("sv) { - tok = peekSqlToken(parser, WhereFieldSqlToken, false); - if (iequals(tok.text(), "select"sv) && !isCondition(parser.peek_second_token().text())) { - parseWhereCondition(parser, parseSubQuery(parser), nextOp); - nextOp = OpAnd; - } else { + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldOrSubquerySqlToken, false); + if (nested == Nested::Yes || !iequals(tok.text(), "select"sv) || isCondition(parser.peek_second_token().text())) { query_.NextOp(nextOp); query_.OpenBracket(); ++openBracketsCount; @@ -714,40 +733,42 @@ int SQLParser::parseWhere(tokenizer &parser) { } else { nextOp = OpAnd; } + continue; } - continue; - } - if (tok.type == TokenNumber) { - throw Error(errParseSQL, "Number is invalid at this location. (text = '%s' location = %s)", tok.text(), parser.where()); - } - if (tok.type == TokenString) { - throw Error(errParseSQL, "String is invalid at this location. (text = '%s' location = %s)", tok.text(), parser.where()); - } - - if (tok.type == TokenName) { - if (iequals(tok.text(), "join"sv)) { - parseJoin(JoinType::LeftJoin, parser); - } else if (iequals(tok.text(), "left"sv)) { - peekSqlToken(parser, LeftSqlToken); - if (parser.next_token().text() != "join"sv) { - throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); - } - parseJoin(JoinType::LeftJoin, parser); - } else if (iequals(tok.text(), "inner"sv)) { - peekSqlToken(parser, InnerSqlToken); - if (parser.next_token().text() != "join") { - throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); - } - auto jtype = nextOp == OpOr ? JoinType::OrInnerJoin : JoinType::InnerJoin; - query_.And(); - parseJoin(jtype, parser); - } else if (iequals(tok.text(), "st_dwithin"sv)) { + parseWhereCondition(parser, parseSubQuery(parser), nextOp); + nextOp = OpAnd; + } else if (tok.type == TokenName) { + if (iequals(tok.text(), "st_dwithin"sv)) { parseDWithin(parser, nextOp); nextOp = OpAnd; + } else if constexpr (nested == Nested::No) { + if (iequals(tok.text(), "join"sv)) { + parseJoin(JoinType::LeftJoin, parser); + } else if (iequals(tok.text(), "left"sv)) { + peekSqlToken(parser, LeftSqlToken); + if (parser.next_token().text() != "join"sv) { + throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } + parseJoin(JoinType::LeftJoin, parser); + } else if (iequals(tok.text(), "inner"sv)) { + peekSqlToken(parser, InnerSqlToken); + if (parser.next_token().text() != "join") { + throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } + auto jtype = nextOp == OpOr ? JoinType::OrInnerJoin : JoinType::InnerJoin; + query_.And(); + parseJoin(jtype, parser); + } else { + parseWhereCondition(parser, std::string{tok.text()}, nextOp); + nextOp = OpAnd; + } } else { parseWhereCondition(parser, std::string{tok.text()}, nextOp); nextOp = OpAnd; } + } else if (tok.type == TokenNumber || tok.type == TokenString) { + throw Error(errParseSQL, "%s is invalid at this location. (text = '%s' location = %s)", + tok.type == TokenNumber ? "Number" : "String", tok.text(), parser.where()); } tok = parser.peek_token(); @@ -768,7 +789,7 @@ int SQLParser::parseWhere(tokenizer &parser) { if (iequals(tok.text(), "and"sv)) { nextOp = OpAnd; parser.next_token(); - tok = peekSqlToken(parser, AndSqlToken, false); + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedAndSqlToken : AndSqlToken, false); if (iequals(tok.text(), "not"sv)) { parser.next_token(); nextOp = OpNot; @@ -961,10 +982,14 @@ void SQLParser::parseDWithin(tokenizer &parser, OpType nextOp) { void SQLParser::parseJoin(JoinType type, tokenizer &parser) { JoinedQuery jquery; SQLParser jparser(jquery); + const ParserContextsAppendGuard guard{ctx_, jparser.ctx_}; if (ctx_.autocompleteMode) { jparser.ctx_.suggestionsPos = ctx_.suggestionsPos; jparser.ctx_.autocompleteMode = true; + jparser.ctx_.foundPossibleSuggestions = ctx_.foundPossibleSuggestions; + jparser.ctx_.possibleSuggestionDetectedInThisClause = ctx_.possibleSuggestionDetectedInThisClause; } + peekSqlToken(parser, NamespaceSqlToken); auto tok = parser.next_token(); if (tok.text() == "("sv) { peekSqlToken(parser, SelectSqlToken); @@ -973,7 +998,7 @@ void SQLParser::parseJoin(JoinType type, tokenizer &parser) { throw Error(errParseSQL, "Expected 'SELECT', but found %s, %s", tok.text(), parser.where()); } - nestedSelectParse(jparser, parser); + jparser.selectParse(parser); tok = parser.next_token(); if (tok.text() != ")"sv) { @@ -992,9 +1017,12 @@ void SQLParser::parseJoin(JoinType type, tokenizer &parser) { void SQLParser::parseMerge(tokenizer &parser) { JoinedQuery mquery; SQLParser mparser(mquery); + const ParserContextsAppendGuard guard{ctx_, mparser.ctx_}; if (ctx_.autocompleteMode) { mparser.ctx_.suggestionsPos = ctx_.suggestionsPos; mparser.ctx_.autocompleteMode = true; + mparser.ctx_.foundPossibleSuggestions = ctx_.foundPossibleSuggestions; + mparser.ctx_.possibleSuggestionDetectedInThisClause = ctx_.possibleSuggestionDetectedInThisClause; } auto tok = parser.next_token(); if (tok.text() == "("sv) { @@ -1004,7 +1032,7 @@ void SQLParser::parseMerge(tokenizer &parser) { throw Error(errParseSQL, "Expected 'SELECT', but found %s, %s", tok.text(), parser.where()); } - nestedSelectParse(mparser, parser); + mparser.selectParse(parser); tok = parser.next_token(); if (tok.text() != ")"sv) { diff --git a/cpp_src/core/query/sql/sqlparser.h b/cpp_src/core/query/sql/sqlparser.h index b7f01f4f7..6ef7ed86c 100644 --- a/cpp_src/core/query/sql/sqlparser.h +++ b/cpp_src/core/query/sql/sqlparser.h @@ -1,8 +1,10 @@ #pragma once +#include #include #include "core/keyvalue/variant.h" #include "estl/tokenizer.h" +#include "sqltokentype.h" /// @namespace reindexer /// The base namespace @@ -15,6 +17,9 @@ class UpdateEntry; using EqualPosition_t = h_vector; class SQLParser { + class ParserContextsAppendGuard; + enum class Nested : bool { Yes = true, No = false }; + public: /// Parses pure sql select query and initializes Query object data members as a result. /// @param q - sql query. @@ -26,10 +31,10 @@ class SQLParser { /// Sql parser context struct SqlParsingCtx { struct SuggestionData { - SuggestionData(std::string tok, int tokType) : token(std::move(tok)), tokenType(tokType) {} + SuggestionData(std::string tok, SqlTokenType tokType) : token(std::move(tok)), tokenType(tokType) {} std::string token; - int tokenType = 0; - std::vector variants; + SqlTokenType tokenType = Start; + std::unordered_set variants; }; void updateLinkedNs(const std::string &ns) { if (autocompleteMode && (!foundPossibleSuggestions || possibleSuggestionDetectedInThisClause)) { @@ -41,7 +46,7 @@ class SQLParser { bool foundPossibleSuggestions = false; bool possibleSuggestionDetectedInThisClause = false; size_t suggestionsPos = 0; - std::vector tokens; + std::vector tokens; std::vector suggestions; std::string suggestionLinkedNs; }; @@ -56,7 +61,7 @@ class SQLParser { /// @param tokenType - token type. /// @param toLower - transform to lower representation. /// @return sql token object. - token peekSqlToken(tokenizer &parser, int tokenType, bool toLower = true); + token peekSqlToken(tokenizer &parser, SqlTokenType tokenType, bool toLower = true); /// Is current token last in autocomplete mode? bool reachedAutocompleteToken(tokenizer &parser, const token &tok); @@ -64,14 +69,9 @@ class SQLParser { /// Parses filter part of sql query. /// @param parser - tokenizer object instance. /// @return always returns zero. + template int selectParse(tokenizer &parser); - /// Parses filter part of sql query and gets suggestions from nested SQLParser - /// @param parser - nested parser object instance. - /// @param tok - tokenizer object instance. - /// @return always returns zero. - int nestedSelectParse(SQLParser &parser, tokenizer &tok); - /// Parses filter part of sql delete query. /// @param parser - tokenizer object instance. /// @return always returns zero. @@ -88,6 +88,7 @@ class SQLParser { int truncateParse(tokenizer &parser); /// Parse where entries + template int parseWhere(tokenizer &parser); template void parseWhereCondition(tokenizer &, T &&firstArg, OpType); @@ -116,6 +117,8 @@ class SQLParser { /// Parse merge entries void parseMerge(tokenizer &parser); + void parseModifyConditions(tokenizer &parser); + Query parseSubQuery(tokenizer &); static CondType getCondType(std::string_view cond); diff --git a/cpp_src/core/query/sql/sqlsuggester.cc b/cpp_src/core/query/sql/sqlsuggester.cc index 4cb9e73da..249e05d06 100644 --- a/cpp_src/core/query/sql/sqlsuggester.cc +++ b/cpp_src/core/query/sql/sqlsuggester.cc @@ -4,7 +4,6 @@ #include "core/query/query.h" #include "sqltokentype.h" -#include #include namespace reindexer { @@ -36,17 +35,18 @@ std::vector SQLSuggester::GetSuggestions(std::string_view q, size_t for (auto &it : suggester.ctx_.suggestions) { if (!it.variants.empty()) { - return it.variants; + return {it.variants.begin(), it.variants.end()}; } } - return std::vector(); + return {}; } -std::unordered_map> sqlTokenMatchings = { +std::unordered_map> sqlTokenMatchings = { {Start, {"explain", "select", "delete", "update", "truncate"}}, {StartAfterExplain, {"select", "delete", "update"}}, - {AggregationSqlToken, {"sum", "avg", "max", "min", "facet", "count", "distinct", "rank"}}, + {AggregationSqlToken, {"sum", "avg", "max", "min", "facet", "count", "distinct", "rank", "count_cached"}}, {SelectConditionsStart, {"where", "limit", "offset", "order", "join", "left", "inner", "equal_position", "merge", "or", ";"}}, + {NestedSelectConditionsStart, {"where", "limit", "offset", "order", "equal_position"}}, {ConditionSqlToken, {">", ">=", "<", "<=", "<>", "in", "allset", "range", "is", "==", "="}}, {WhereFieldValueSqlToken, {"null", "empty", "not"}}, {WhereFieldNegateValueSqlToken, {"null", "empty"}}, @@ -65,53 +65,55 @@ std::unordered_map> sqlTokenMatchings = { {SetSqlToken, {"set"}}, {WhereSqlToken, {"where"}}, {AllFieldsToken, {"*"}}, - {DeleteConditionsStart, {"where", "limit", "offset", "order"}}, + {ModifyConditionsStart, {"where", "limit", "offset", "order"}}, {UpdateOptionsSqlToken, {"set", "drop"}}, {EqualPositionSqlToken, {"equal_position"}}, {ST_DWithinSqlToken, {"ST_DWithin"}}, {ST_GeomFromTextSqlToken, {"ST_GeomFromText"}}, }; -static void getMatchingTokens(int tokenType, const std::string &token, std::vector &variants) { - const std::set &suggestions = sqlTokenMatchings[tokenType]; +static void getMatchingTokens(int tokenType, const std::string &token, std::unordered_set &variants) { + const std::unordered_set &suggestions = sqlTokenMatchings[tokenType]; for (auto it = suggestions.begin(); it != suggestions.end(); ++it) { if (isBlank(token) || checkIfStartsWith(token, *it)) { - variants.push_back(*it); + variants.insert(*it); } } } -void SQLSuggester::getMatchingNamespacesNames(const std::string &token, std::vector &variants) { +void SQLSuggester::getMatchingNamespacesNames(const std::string &token, std::unordered_set &variants) { auto namespaces = enumNamespaces_(EnumNamespacesOpts().OnlyNames()); for (auto &ns : namespaces) { - if (isBlank(token) || checkIfStartsWith(token, ns.name)) variants.push_back(ns.name); + if (isBlank(token) || checkIfStartsWith(token, ns.name)) variants.insert(ns.name); } } -void SQLSuggester::getMatchingFieldsNames(const std::string &token, std::vector &variants) { +void SQLSuggester::getMatchingFieldsNames(const std::string &token, std::unordered_set &variants) { auto namespaces = enumNamespaces_(EnumNamespacesOpts().WithFilter(ctx_.suggestionLinkedNs)); - if (namespaces.empty()) return; + if (namespaces.empty() || (namespaces.size() > 1 && isBlank(token))) return; auto dotPos = token.find('.'); - for (auto &idx : namespaces[0].indexes) { - if (idx.name_ == "#pk" || idx.name_ == "-tuple") continue; - if (isBlank(token) || (dotPos != std::string::npos ? checkIfStartsWith(token, idx.name_) - : checkIfStartsWith(token, idx.name_))) { - if (dotPos == std::string::npos) { - variants.push_back(idx.name_); - } else { - variants.push_back(idx.name_.substr(dotPos)); + for (const auto &ns : namespaces) { + for (auto &idx : ns.indexes) { + if (idx.name_ == "#pk" || idx.name_ == "-tuple") continue; + if (isBlank(token) || (dotPos != std::string::npos ? checkIfStartsWith(token, idx.name_) + : checkIfStartsWith(token, idx.name_))) { + if (dotPos == std::string::npos) { + variants.insert(idx.name_); + } else { + variants.insert(idx.name_.substr(dotPos)); + } } } } if (getSchema_) { - auto schema = getSchema_(namespaces[0].name); - if (schema) { - auto fieldsSuggestions = schema->GetSuggestions(token); - for (auto &suggestion : fieldsSuggestions) { - if (std::find(variants.begin(), variants.end(), suggestion) == variants.end()) { - variants.emplace_back(std::move(suggestion)); + for (const auto &ns : namespaces) { + auto schema = getSchema_(ns.name); + if (schema) { + auto fieldsSuggestions = schema->GetSuggestions(token); + for (auto &suggestion : fieldsSuggestions) { + variants.insert(std::move(suggestion)); } } } @@ -124,7 +126,8 @@ void SQLSuggester::getSuggestionsForToken(SqlParsingCtx::SuggestionData &ctx) { case StartAfterExplain: case FromSqlToken: case SelectConditionsStart: - case DeleteConditionsStart: + case NestedSelectConditionsStart: + case ModifyConditionsStart: case ConditionSqlToken: case WhereFieldValueSqlToken: case WhereFieldNegateValueSqlToken: @@ -145,21 +148,22 @@ void SQLSuggester::getSuggestionsForToken(SqlParsingCtx::SuggestionData &ctx) { getMatchingTokens(AggregationSqlToken, ctx.token, ctx.variants); getMatchingFieldsNames(ctx.token, ctx.variants); break; - case SelectFieldsListSqlToken: - getMatchingTokens(FromSqlToken, ctx.token, ctx.variants); - getMatchingTokens(AggregationSqlToken, ctx.token, ctx.variants); - getMatchingFieldsNames(ctx.token, ctx.variants); - break; case NamespaceSqlToken: getMatchingNamespacesNames(ctx.token, ctx.variants); break; + case WhereFieldOrSubquerySqlToken: + getMatchingTokens(SelectSqlToken, ctx.token, ctx.variants); + [[fallthrough]]; case AndSqlToken: case WhereFieldSqlToken: + getMatchingTokens(JoinTypesSqlToken, ctx.token, ctx.variants); + [[fallthrough]]; + case NestedAndSqlToken: + case NestedWhereFieldSqlToken: getMatchingTokens(NotSqlToken, ctx.token, ctx.variants); getMatchingTokens(ST_DWithinSqlToken, ctx.token, ctx.variants); getMatchingFieldsNames(ctx.token, ctx.variants); getMatchingTokens(EqualPositionSqlToken, ctx.token, ctx.variants); - getMatchingTokens(JoinTypesSqlToken, ctx.token, ctx.variants); break; case GeomFieldSqlToken: getMatchingTokens(ST_GeomFromTextSqlToken, ctx.token, ctx.variants); @@ -176,13 +180,31 @@ void SQLSuggester::getSuggestionsForToken(SqlParsingCtx::SuggestionData &ctx) { getMatchingNamespacesNames(ctx.token, ctx.variants); getMatchingFieldsNames(ctx.token, ctx.variants); break; + case WhereFieldValueOrSubquerySqlToken: + getMatchingTokens(SelectSqlToken, ctx.token, ctx.variants); + getMatchingTokens(WhereFieldValueSqlToken, ctx.token, ctx.variants); + break; + case DeleteSqlToken: + case AggregationSqlToken: + case NullSqlToken: + case EmptySqlToken: + case NotSqlToken: + case OrSqlToken: + case AllFieldsToken: + case FieldSqlToken: + case JoinSqlToken: + case MergeSqlToken: + case EqualPositionSqlToken: + case JoinTypesSqlToken: + case ST_DWithinSqlToken: + case ST_GeomFromTextSqlToken: default: break; } } bool SQLSuggester::findInPossibleTokens(int type, const std::string &v) { - const std::set &values = sqlTokenMatchings[type]; + const std::unordered_set &values = sqlTokenMatchings[type]; return (values.find(v) != values.end()); } @@ -226,29 +248,6 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) getSuggestionsForToken(data); } } break; - case SelectFieldsListSqlToken: { - if (isBlank(data.token)) { - getSuggestionsForToken(data); - break; - } - - if ((data.token == ",") || (data.token == "(")) break; - - bool fromKeywordReached = false; - if (ctx_.tokens.size() > 1) { - int prevTokenType = ctx_.tokens.back(); - if ((prevTokenType == SingleSelectFieldSqlToken) || (prevTokenType == SelectFieldsListSqlToken)) { - fromKeywordReached = checkIfStartsWith(data.token, "from"); - if (fromKeywordReached && data.token.length() < strlen("from")) { - getSuggestionsForToken(data); - } - } - } - - if (!fromKeywordReached && !findInPossibleFields(data.token)) { - getSuggestionsForToken(data); - } - } break; case FromSqlToken: if (isBlank(data.token) || !iequals(data.token, "from")) { getSuggestionsForToken(data); @@ -260,13 +259,15 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) } break; case SelectConditionsStart: - case DeleteConditionsStart: + case NestedSelectConditionsStart: + case ModifyConditionsStart: if (isBlank(data.token) || !findInPossibleTokens(data.tokenType, data.token)) { getSuggestionsForToken(data); } break; case GeomFieldSqlToken: case WhereFieldSqlToken: + case NestedWhereFieldSqlToken: if (isBlank(data.token)) { getSuggestionsForToken(data); break; @@ -310,7 +311,49 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) case OnSqlToken: data.tokenType = NamespaceSqlToken; break; - default: + case Start: + case SelectSqlToken: + case DeleteSqlToken: + case StartAfterExplain: + case SingleSelectFieldSqlToken: + case AggregationSqlToken: + case FromSqlToken: + case NamespaceSqlToken: + case SelectConditionsStart: + case NestedSelectConditionsStart: + case WhereFieldSqlToken: + case NestedWhereFieldSqlToken: + case ConditionSqlToken: + case OpSqlToken: + case WhereOpSqlToken: + case FieldNameSqlToken: + case WhereFieldValueSqlToken: + case WhereFieldNegateValueSqlToken: + case NullSqlToken: + case EmptySqlToken: + case NotSqlToken: + case AndSqlToken: + case NestedAndSqlToken: + case OrSqlToken: + case BySqlToken: + case AllFieldsToken: + case SortDirectionSqlToken: + case FieldSqlToken: + case LeftSqlToken: + case InnerSqlToken: + case JoinSqlToken: + case MergeSqlToken: + case JoinedFieldNameSqlToken: + case ModifyConditionsStart: + case SetSqlToken: + case UpdateOptionsSqlToken: + case EqualPositionSqlToken: + case JoinTypesSqlToken: + case ST_DWithinSqlToken: + case ST_GeomFromTextSqlToken: + case GeomFieldSqlToken: + case WhereFieldOrSubquerySqlToken: + case WhereFieldValueOrSubquerySqlToken: break; } getSuggestionsForToken(data); @@ -334,6 +377,7 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) } break; case AndSqlToken: + case NestedAndSqlToken: if (isBlank(data.token)) { getSuggestionsForToken(data); break; @@ -380,6 +424,22 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) break; } break; + case DeleteSqlToken: + case AggregationSqlToken: + case NullSqlToken: + case EmptySqlToken: + case NotSqlToken: + case OrSqlToken: + case AllFieldsToken: + case FieldSqlToken: + case JoinSqlToken: + case MergeSqlToken: + case EqualPositionSqlToken: + case JoinTypesSqlToken: + case ST_DWithinSqlToken: + case ST_GeomFromTextSqlToken: + case WhereFieldOrSubquerySqlToken: + case WhereFieldValueOrSubquerySqlToken: default: getSuggestionsForToken(data); break; diff --git a/cpp_src/core/query/sql/sqlsuggester.h b/cpp_src/core/query/sql/sqlsuggester.h index 5ece47455..ded27b5e6 100644 --- a/cpp_src/core/query/sql/sqlsuggester.h +++ b/cpp_src/core/query/sql/sqlsuggester.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "core/schema.h" #include "sqlparser.h" @@ -37,8 +38,8 @@ class SQLSuggester : public SQLParser { /// Tries to find among possible namespaces. [[nodiscard]] bool findInPossibleNamespaces(const std::string &tok); /// Gets names of indexes that start with 'token'. - void getMatchingFieldsNames(const std::string &token, std::vector &variants); - void getMatchingNamespacesNames(const std::string &token, std::vector &variants); + void getMatchingFieldsNames(const std::string &token, std::unordered_set &variants); + void getMatchingNamespacesNames(const std::string &token, std::unordered_set &variants); EnumNamespacesF enumNamespaces_; GetSchemaF getSchema_; }; diff --git a/cpp_src/core/query/sql/sqltokentype.h b/cpp_src/core/query/sql/sqltokentype.h index 5e67dfaab..ccc7b685e 100644 --- a/cpp_src/core/query/sql/sqltokentype.h +++ b/cpp_src/core/query/sql/sqltokentype.h @@ -7,13 +7,14 @@ enum SqlTokenType { DeleteSqlToken, StartAfterExplain, SingleSelectFieldSqlToken, - SelectFieldsListSqlToken, AggregationSqlToken, FromSqlToken, NamespaceSqlToken, SelectConditionsStart, + NestedSelectConditionsStart, WhereSqlToken, WhereFieldSqlToken, + NestedWhereFieldSqlToken, ConditionSqlToken, OpSqlToken, WhereOpSqlToken, @@ -24,6 +25,7 @@ enum SqlTokenType { EmptySqlToken, NotSqlToken, AndSqlToken, + NestedAndSqlToken, OrSqlToken, BySqlToken, AllFieldsToken, @@ -35,7 +37,7 @@ enum SqlTokenType { MergeSqlToken, OnSqlToken, JoinedFieldNameSqlToken, - DeleteConditionsStart, + ModifyConditionsStart, SetSqlToken, UpdateOptionsSqlToken, EqualPositionSqlToken, @@ -43,5 +45,7 @@ enum SqlTokenType { ST_DWithinSqlToken, ST_GeomFromTextSqlToken, GeomFieldSqlToken, + WhereFieldOrSubquerySqlToken, + WhereFieldValueOrSubquerySqlToken, }; } diff --git a/cpp_src/core/querycache.h b/cpp_src/core/querycache.h index a9c0c4c14..566015220 100644 --- a/cpp_src/core/querycache.h +++ b/cpp_src/core/querycache.h @@ -17,34 +17,52 @@ struct QueryCountCacheVal { int total_count = -1; }; -struct QueryCacheKey { +constexpr uint8_t kCountCachedKeyMode = + SkipMergeQueries | SkipLimitOffset | SkipAggregations | SkipSortEntries | SkipExtraParams | SkipLeftJoinQueries; + +class QueryCacheKey { +public: + using BufT = h_vector; + QueryCacheKey() = default; QueryCacheKey(QueryCacheKey&& other) = default; QueryCacheKey(const QueryCacheKey& other) = default; QueryCacheKey& operator=(QueryCacheKey&& other) = default; QueryCacheKey& operator=(const QueryCacheKey& other) = delete; - QueryCacheKey(const Query& q) { + template + QueryCacheKey(const Query& q, uint8_t mode, const JoinedSelectorsT* jnss) { WrSerializer ser; - q.Serialize(ser, (SkipJoinQueries | SkipMergeQueries | SkipLimitOffset)); - buf.reserve(ser.Len()); - buf.assign(ser.Buf(), ser.Buf() + ser.Len()); + q.Serialize(ser, mode); + if (jnss) { + for (auto& jns : *jnss) { + ser.PutVString(jns.RightNsName()); + ser.PutUInt64(jns.LastUpdateTime()); + } + } + if rx_unlikely (ser.Len() > BufT::max_size()) { + throw Error(errLogic, "QueryCacheKey: buffer overflow"); + } + buf_.assign(ser.Buf(), ser.Buf() + ser.Len()); } - size_t Size() const noexcept { return sizeof(QueryCacheKey) + (buf.is_hdata() ? 0 : buf.size()); } + size_t Size() const noexcept { return sizeof(QueryCacheKey) + (buf_.is_hdata() ? 0 : buf_.size()); } + + QueryCacheKey(WrSerializer& ser) : buf_(ser.Buf(), ser.Buf() + ser.Len()) {} + const BufT& buf() const noexcept { return buf_; } - QueryCacheKey(WrSerializer& ser) : buf(ser.Buf(), ser.Buf() + ser.Len()) {} - h_vector buf; +private: + BufT buf_; }; struct EqQueryCacheKey { bool operator()(const QueryCacheKey& lhs, const QueryCacheKey& rhs) const noexcept { - return (lhs.buf.size() == rhs.buf.size()) && (memcmp(lhs.buf.data(), rhs.buf.data(), lhs.buf.size()) == 0); + return (lhs.buf().size() == rhs.buf().size()) && (memcmp(lhs.buf().data(), rhs.buf().data(), lhs.buf().size()) == 0); } }; struct HashQueryCacheKey { size_t operator()(const QueryCacheKey& q) const noexcept { uint64_t hash[2]; - MurmurHash3_x64_128(q.buf.data(), q.buf.size(), 0, &hash); + MurmurHash3_x64_128(q.buf().data(), q.buf().size(), 0, &hash); return hash[0]; } }; diff --git a/cpp_src/core/queryresults/joinresults.cc b/cpp_src/core/queryresults/joinresults.cc index 0a467edb5..9d27d51e9 100644 --- a/cpp_src/core/queryresults/joinresults.cc +++ b/cpp_src/core/queryresults/joinresults.cc @@ -1,6 +1,7 @@ #include "joinresults.h" #include "core/cjson/tagsmatcher.h" #include "core/payload/payloadiface.h" +#include "joinresults.h" #include "queryresults.h" #include @@ -8,11 +9,6 @@ namespace reindexer { namespace joins { -JoinedFieldIterator::JoinedFieldIterator(const NamespaceResults* parent, const ItemOffsets& offsets, uint8_t joinedFieldOrder) - : joinRes_(parent), offsets_(&offsets), order_(joinedFieldOrder) { - if (offsets_->size() > 0) updateOffset(); -} - bool JoinedFieldIterator::operator==(const JoinedFieldIterator& other) const { if (joinRes_ != other.joinRes_) throw Error(errLogic, "Comparising joined fields of different namespaces!"); if (offsets_ != other.offsets_) throw Error(errLogic, "Comparising joined fields of different items!"); @@ -20,25 +16,7 @@ bool JoinedFieldIterator::operator==(const JoinedFieldIterator& other) const { return true; } -bool JoinedFieldIterator::operator!=(const JoinedFieldIterator& other) const { return !operator==(other); } - -JoinedFieldIterator::const_reference JoinedFieldIterator::operator[](size_t idx) const { - assertrx(currOffset_ + idx < joinRes_->items_.size()); - return joinRes_->items_[currOffset_ + idx]; -} - -JoinedFieldIterator::reference JoinedFieldIterator::operator[](size_t idx) { - assertrx(currOffset_ + idx < joinRes_->items_.size()); - return const_cast(joinRes_->items_[currOffset_ + idx]); -} - -JoinedFieldIterator& JoinedFieldIterator::operator++() { - ++order_; - updateOffset(); - return *this; -} - -void JoinedFieldIterator::updateOffset() { +void JoinedFieldIterator::updateOffset() noexcept { currField_ = -1; if (order_ == joinRes_->GetJoinedSelectorsCount()) return; @@ -66,7 +44,7 @@ QueryResults JoinedFieldIterator::ToQueryResults() const { return QueryResults(begin, end); } -int JoinedFieldIterator::ItemsCount() const { +int JoinedFieldIterator::ItemsCount() const noexcept { assertrx(order_ < joinRes_->GetJoinedSelectorsCount()); if ((currField_ != -1) && (currField_ < uint8_t(offsets_->size()))) { @@ -76,35 +54,32 @@ int JoinedFieldIterator::ItemsCount() const { return 0; } -const JoinedFieldIterator noJoinedDataIt(nullptr, {}, 0); - -ItemIterator::ItemIterator(const NamespaceResults* parent, IdType rowid) : joinRes_(parent), rowid_(rowid) {} +static const ItemOffsets kEmptyOffsets; +static const JoinedFieldIterator kNoJoinedDataIt(nullptr, kEmptyOffsets, 0); -JoinedFieldIterator ItemIterator::begin() const { +JoinedFieldIterator ItemIterator::begin() const noexcept { auto it = joinRes_->offsets_.find(rowid_); - if (it == joinRes_->offsets_.end()) return noJoinedDataIt; - if (it->second.empty()) return noJoinedDataIt; + if (it == joinRes_->offsets_.end()) return kNoJoinedDataIt; + if (it->second.empty()) return kNoJoinedDataIt; return JoinedFieldIterator(joinRes_, it->second, 0); } JoinedFieldIterator ItemIterator::at(uint8_t joinedField) const { auto it = joinRes_->offsets_.find(rowid_); - if (it == joinRes_->offsets_.end()) return noJoinedDataIt; - if (it->second.empty()) return noJoinedDataIt; + if (it == joinRes_->offsets_.end()) return kNoJoinedDataIt; + if (it->second.empty()) return kNoJoinedDataIt; assertrx(joinedField < joinRes_->GetJoinedSelectorsCount()); return JoinedFieldIterator(joinRes_, it->second, joinedField); } -JoinedFieldIterator ItemIterator::end() const { +JoinedFieldIterator ItemIterator::end() const noexcept { auto it = joinRes_->offsets_.find(rowid_); - if (it == joinRes_->offsets_.end()) return noJoinedDataIt; - if (it->second.empty()) return noJoinedDataIt; + if (it == joinRes_->offsets_.end()) return kNoJoinedDataIt; + if (it->second.empty()) return kNoJoinedDataIt; return JoinedFieldIterator(joinRes_, it->second, joinRes_->GetJoinedSelectorsCount()); } -int ItemIterator::getJoinedFieldsCount() const { return joinRes_->GetJoinedSelectorsCount(); } - -int ItemIterator::getJoinedItemsCount() const { +int ItemIterator::getJoinedItemsCount() const noexcept { if (joinedItemsCount_ == -1) { joinedItemsCount_ = 0; auto it = joinRes_->offsets_.find(rowid_); @@ -116,7 +91,7 @@ int ItemIterator::getJoinedItemsCount() const { return joinedItemsCount_; } -ItemIterator ItemIterator::CreateFrom(const QueryResults::Iterator& it) { +ItemIterator ItemIterator::CreateFrom(const QueryResults::Iterator& it) noexcept { static NamespaceResults empty; static ItemIterator ret(&empty, 0); auto& itemRef = it.qr_->Items()[it.idx_]; diff --git a/cpp_src/core/queryresults/joinresults.h b/cpp_src/core/queryresults/joinresults.h index dede70033..065752d43 100644 --- a/cpp_src/core/queryresults/joinresults.h +++ b/cpp_src/core/queryresults/joinresults.h @@ -74,22 +74,35 @@ class JoinedFieldIterator { using reference = ItemRef&; using const_reference = const ItemRef&; - JoinedFieldIterator(const NamespaceResults* parent, const ItemOffsets& offsets, uint8_t joinedFieldOrder); + JoinedFieldIterator(const NamespaceResults* parent, const ItemOffsets& offsets, uint8_t joinedFieldOrder) noexcept + : joinRes_(parent), offsets_(&offsets), order_(joinedFieldOrder) { + if (offsets_->size() > 0) updateOffset(); + } bool operator==(const JoinedFieldIterator& other) const; - bool operator!=(const JoinedFieldIterator& other) const; - - const_reference operator[](size_t idx) const; - reference operator[](size_t idx); - JoinedFieldIterator& operator++(); + bool operator!=(const JoinedFieldIterator& other) const { return !operator==(other); } + + const_reference operator[](size_t idx) const noexcept { + assertrx(currOffset_ + idx < joinRes_->items_.size()); + return joinRes_->items_[currOffset_ + idx]; + } + reference operator[](size_t idx) noexcept { + assertrx(currOffset_ + idx < joinRes_->items_.size()); + return const_cast(joinRes_->items_[currOffset_ + idx]); + } + JoinedFieldIterator& operator++() noexcept { + ++order_; + updateOffset(); + return *this; + } ItemImpl GetItem(int itemIdx, const PayloadType& pt, const TagsMatcher& tm) const; QueryResults ToQueryResults() const; - int ItemsCount() const; + int ItemsCount() const noexcept; private: - void updateOffset(); + void updateOffset() noexcept; const NamespaceResults* joinRes_ = nullptr; const ItemOffsets* offsets_ = nullptr; uint8_t order_ = 0; @@ -101,16 +114,16 @@ class JoinedFieldIterator { /// Iterates over joined fields (if there are some) of item. class ItemIterator { public: - ItemIterator(const NamespaceResults* parent, IdType rowid); + ItemIterator(const NamespaceResults* parent, IdType rowid) noexcept : joinRes_(parent), rowid_(rowid) {} JoinedFieldIterator at(uint8_t joinedField) const; - JoinedFieldIterator begin() const; - JoinedFieldIterator end() const; + JoinedFieldIterator begin() const noexcept; + JoinedFieldIterator end() const noexcept; - int getJoinedFieldsCount() const; - int getJoinedItemsCount() const; + int getJoinedFieldsCount() const noexcept { return joinRes_->GetJoinedSelectorsCount(); } + int getJoinedItemsCount() const noexcept; - static ItemIterator CreateFrom(const QueryResults::Iterator& it); + static ItemIterator CreateFrom(const QueryResults::Iterator& it) noexcept; private: const NamespaceResults* joinRes_; diff --git a/cpp_src/core/queryresults/queryresults.cc b/cpp_src/core/queryresults/queryresults.cc index a17df13d5..a76aba904 100644 --- a/cpp_src/core/queryresults/queryresults.cc +++ b/cpp_src/core/queryresults/queryresults.cc @@ -139,7 +139,7 @@ h_vector QueryResults::GetNamespaces() const { return ret; } -int QueryResults::GetJoinedNsCtxIndex(int nsid) const { +int QueryResults::GetJoinedNsCtxIndex(int nsid) const noexcept { int ctxIndex = joined_.size(); for (int ns = 0; ns < nsid; ++ns) { ctxIndex += joined_[ns].GetJoinedSelectorsCount(); @@ -396,16 +396,6 @@ Error QueryResults::Iterator::GetCJSON(WrSerializer &ser, bool withHdrLen) { return errOK; } -bool QueryResults::Iterator::IsRaw() const { - auto &itemRef = qr_->items_[idx_]; - return itemRef.Raw(); -} -std::string_view QueryResults::Iterator::GetRaw() const { - auto &itemRef = qr_->items_[idx_]; - assertrx(itemRef.Raw()); - return std::string_view(reinterpret_cast(itemRef.Value().Ptr()), itemRef.Value().GetCapacity()); -} - Item QueryResults::Iterator::GetItem(bool enableHold) { auto &itemRef = qr_->items_[idx_]; @@ -445,25 +435,23 @@ void QueryResults::AddItem(Item &item, bool withData, bool enableHold) { } } -const TagsMatcher &QueryResults::getTagsMatcher(int nsid) const { return ctxs[nsid].tagsMatcher_; } +const TagsMatcher &QueryResults::getTagsMatcher(int nsid) const noexcept { return ctxs[nsid].tagsMatcher_; } -const PayloadType &QueryResults::getPayloadType(int nsid) const { return ctxs[nsid].type_; } +const PayloadType &QueryResults::getPayloadType(int nsid) const noexcept { return ctxs[nsid].type_; } -const FieldsSet &QueryResults::getFieldsFilter(int nsid) const { return ctxs[nsid].fieldsFilter_; } +const FieldsSet &QueryResults::getFieldsFilter(int nsid) const noexcept { return ctxs[nsid].fieldsFilter_; } -TagsMatcher &QueryResults::getTagsMatcher(int nsid) { return ctxs[nsid].tagsMatcher_; } +TagsMatcher &QueryResults::getTagsMatcher(int nsid) noexcept { return ctxs[nsid].tagsMatcher_; } -PayloadType &QueryResults::getPayloadType(int nsid) { return ctxs[nsid].type_; } +PayloadType &QueryResults::getPayloadType(int nsid) noexcept { return ctxs[nsid].type_; } -std::shared_ptr QueryResults::getSchema(int nsid) const { return ctxs[nsid].schema_; } +std::shared_ptr QueryResults::getSchema(int nsid) const noexcept { return ctxs[nsid].schema_; } -int QueryResults::getNsNumber(int nsid) const { +int QueryResults::getNsNumber(int nsid) const noexcept { assertrx(ctxs[nsid].schema_); return ctxs[nsid].schema_->GetProtobufNsNumber(); } -int QueryResults::getMergedNSCount() const { return ctxs.size(); } - void QueryResults::addNSContext(const PayloadType &type, const TagsMatcher &tagsMatcher, const FieldsSet &filter, std::shared_ptr schema) { if (filter.getTagsPathsLength()) nonCacheableData = true; diff --git a/cpp_src/core/queryresults/queryresults.h b/cpp_src/core/queryresults/queryresults.h index 4c06d9d5f..10f4cf545 100644 --- a/cpp_src/core/queryresults/queryresults.h +++ b/cpp_src/core/queryresults/queryresults.h @@ -54,9 +54,10 @@ class QueryResults { void Erase(ItemRefVector::iterator begin, ItemRefVector::iterator end); size_t Count() const noexcept { return items_.size(); } size_t TotalCount() const noexcept { return totalCount; } - const std::string &GetExplainResults() const &noexcept { return explainResults; } + const std::string &GetExplainResults() const & noexcept { return explainResults; } const std::string &GetExplainResults() const && = delete; - const std::vector &GetAggregationResults() const &noexcept { return aggregationResults; } + std::string &&MoveExplainResults() & noexcept { return std::move(explainResults); } + const std::vector &GetAggregationResults() const & noexcept { return aggregationResults; } const std::vector &GetAggregationResults() const && = delete; void Clear(); h_vector GetNamespaces() const; @@ -75,10 +76,14 @@ class QueryResults { // use enableHold = false only if you are sure that the item will be destroyed before the queryResults Item GetItem(bool enableHold = true); joins::ItemIterator GetJoined(); - const ItemRef &GetItemRef() const { return qr_->items_[idx_]; } - int64_t GetLSN() const { return qr_->items_[idx_].Value().GetLSN(); } - bool IsRaw() const; - std::string_view GetRaw() const; + const ItemRef &GetItemRef() const noexcept { return qr_->items_[idx_]; } + int64_t GetLSN() const noexcept { return qr_->items_[idx_].Value().GetLSN(); } + bool IsRaw() const noexcept { return qr_->items_[idx_].Raw(); } + std::string_view GetRaw() const noexcept { + auto &itemRef = qr_->items_[idx_]; + assertrx(itemRef.Raw()); + return std::string_view(reinterpret_cast(itemRef.Value().Ptr()), itemRef.Value().GetCapacity()); + } Iterator &operator++() noexcept { idx_++; return *this; @@ -98,9 +103,9 @@ class QueryResults { Error err_; }; - Iterator begin() const { return Iterator{this, 0, errOK}; } - Iterator end() const { return Iterator{this, int(items_.size()), errOK}; } - Iterator operator[](int idx) const { return Iterator{this, idx, errOK}; } + Iterator begin() const noexcept { return Iterator{this, 0, errOK}; } + Iterator end() const noexcept { return Iterator{this, int(items_.size()), errOK}; } + Iterator operator[](int idx) const noexcept { return Iterator{this, idx, errOK}; } std::vector joined_; std::vector aggregationResults; @@ -122,17 +127,17 @@ class QueryResults { void addNSContext(const PayloadType &type, const TagsMatcher &tagsMatcher, const FieldsSet &fieldsFilter, std::shared_ptr schema); - const TagsMatcher &getTagsMatcher(int nsid) const; - const PayloadType &getPayloadType(int nsid) const; - const FieldsSet &getFieldsFilter(int nsid) const; - TagsMatcher &getTagsMatcher(int nsid); - PayloadType &getPayloadType(int nsid); - std::shared_ptr getSchema(int nsid) const; - int getNsNumber(int nsid) const; - int getMergedNSCount() const; - ItemRefVector &Items() { return items_; } + const TagsMatcher &getTagsMatcher(int nsid) const noexcept; + const PayloadType &getPayloadType(int nsid) const noexcept; + const FieldsSet &getFieldsFilter(int nsid) const noexcept; + TagsMatcher &getTagsMatcher(int nsid) noexcept; + PayloadType &getPayloadType(int nsid) noexcept; + std::shared_ptr getSchema(int nsid) const noexcept; + int getNsNumber(int nsid) const noexcept; + int getMergedNSCount() const noexcept { return ctxs.size(); } + ItemRefVector &Items() noexcept { return items_; } const ItemRefVector &Items() const { return items_; } - int GetJoinedNsCtxIndex(int nsid) const; + int GetJoinedNsCtxIndex(int nsid) const noexcept; // Add owning ns pointer // noLock has always to be 'true' (i.e. this method can only be called unders Namespace's lock) void AddNamespace(NamespaceImplPtr, bool noLock); diff --git a/cpp_src/core/reindexer_impl/reindexerimpl.cc b/cpp_src/core/reindexer_impl/reindexerimpl.cc index 242bb31aa..4651e6920 100644 --- a/cpp_src/core/reindexer_impl/reindexerimpl.cc +++ b/cpp_src/core/reindexer_impl/reindexerimpl.cc @@ -77,7 +77,12 @@ ReindexerImpl::ReindexerImpl(ReindexerConfig cfg) ReindexerImpl::~ReindexerImpl() { for (auto& ns : namespaces_) { - ns.second->SetDestroyFlag(); + // Add extra checks to avoid GCC 13 warnings in Release build. Actually namespaces are never null + if (ns.second) { + if (auto mainNs = ns.second->getMainNs(); mainNs) { + mainNs->SetDestroyFlag(); + } + } } dbDestroyed_ = true; diff --git a/cpp_src/core/reindexer_impl/rx_selector.cc b/cpp_src/core/reindexer_impl/rx_selector.cc index ffc87deb5..0d5302eea 100644 --- a/cpp_src/core/reindexer_impl/rx_selector.cc +++ b/cpp_src/core/reindexer_impl/rx_selector.cc @@ -38,12 +38,16 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc } std::vector queryResultsHolder; std::optional queryCopy; + ExplainCalc::Duration preselectTimeTotal{0}; + std::vector subQueryExplains; if (!q.GetSubQueries().empty()) { if (q.GetDebugLevel() >= LogInfo || ns->config_.logLevel >= LogInfo) { logPrintf(LogInfo, "Query before subqueries substitution: %s", q.GetSQL()); } queryCopy.emplace(q); - preselectSubQueries(*queryCopy, queryResultsHolder, locks, func, ctx); + const auto preselectStartTime = ExplainCalc::Clock::now(); + subQueryExplains = preselectSubQueries(*queryCopy, queryResultsHolder, locks, func, ctx); + preselectTimeTotal += ExplainCalc::Clock::now() - preselectStartTime; } const Query& query = queryCopy ? *queryCopy : q; std::vector joinQueryResultsContexts; @@ -58,12 +62,11 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc } JoinedSelectors mainJoinedSelectors; - ExplainCalc::Duration preselectTimeTotal{0}; if (thereAreJoins) { const auto preselectStartTime = ExplainCalc::Clock::now(); mainJoinedSelectors = prepareJoinedSelectors(query, result, locks, func, joinQueryResultsContexts, ctx); result.joined_.resize(1 + query.GetMergeQueries().size()); - preselectTimeTotal = ExplainCalc::Clock::now() - preselectStartTime; + preselectTimeTotal += ExplainCalc::Clock::now() - preselectStartTime; } IsFTQuery isFtQuery{IsFTQuery::NotSet}; { @@ -73,6 +76,7 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc selCtx.contextCollectingMode = true; selCtx.functions = &func; selCtx.nsid = 0; + selCtx.subQueriesExplains = std::move(subQueryExplains); if (!query.GetMergeQueries().empty()) { selCtx.isMergeQuery = IsMergeQuery::Yes; if rx_unlikely (!query.sortingEntries_.empty()) { @@ -142,13 +146,16 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc std::optional mQueryCopy; if (!mq.GetSubQueries().empty()) { mQueryCopy.emplace(mq); - preselectSubQueries(*mQueryCopy, queryResultsHolder, locks, func, ctx); } const JoinedQuery& mQuery = mQueryCopy ? *mQueryCopy : mq; + SelectCtx mctx(mQuery, &query); + if (!mq.GetSubQueries().empty()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + mctx.subQueriesExplains = preselectSubQueries(*mQueryCopy, queryResultsHolder, locks, func, ctx); + } auto mns = locks.Get(mQuery.NsName()); assertrx_throw(mns); - SelectCtx mctx(mQuery, &query); mctx.nsid = ++counter; mctx.isMergeQuery = IsMergeQuery::Yes; mctx.isFtQuery = isFtQuery; @@ -245,7 +252,7 @@ bool RxSelector::isPreResultValuesModeOptimizationAvailable(const Query& jItemQ, template bool RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, NsLocker& locks, SelectFunctionsHolder& func, - const RdxContext& rdxCtx) { + std::vector& explain, const RdxContext& rdxCtx) { auto ns = locks.Get(subQuery.NsName()); assertrx_throw(ns); @@ -259,12 +266,16 @@ bool RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, N QueryResults result; ns->Select(result, sctx, rdxCtx); locks.Delete(ns); + if (!result.GetExplainResults().empty()) { + explain.emplace_back(subQuery.NsName(), result.MoveExplainResults()); + } return sctx.matchedAtLeastOnce; } template VariantArray RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, NsLocker& locks, QueryResults& qr, - SelectFunctionsHolder& func, const RdxContext& rdxCtx) { + SelectFunctionsHolder& func, std::variant fieldOrKeys, + std::vector& explain, const RdxContext& rdxCtx) { NamespaceImpl::Ptr ns = locks.Get(subQuery.NsName()); assertrx_throw(ns); @@ -353,6 +364,10 @@ VariantArray RxSelector::selectSubQuery(const Query& subQuery, const Query& main } } locks.Delete(ns); + if (!qr.GetExplainResults().empty()) { + explain.emplace_back(subQuery.NsName(), std::move(qr.MoveExplainResults())); + explain.back().SetFieldOrKeys(std::move(fieldOrKeys)); + } return result; } @@ -435,6 +450,7 @@ JoinedSelectors RxSelector::prepareJoinedSelectors(const Query& q, QueryResults& queryResultsContexts.emplace_back(jns->payloadType_, jns->tagsMatcher_, FieldsSet(jns->tagsMatcher_, jq.SelectFilters()), jns->schema_); + const auto nsUpdateTime = jns->lastUpdateTimeNano(); result.AddNamespace(jns, true); if (preResult->dataMode == JoinPreResult::ModeValues) { preResult->values.PreselectAllowed(static_cast(jns->Config().maxPreselectSize) >= preResult->values.size()); @@ -443,15 +459,19 @@ JoinedSelectors RxSelector::prepareJoinedSelectors(const Query& q, QueryResults& jns.reset(); } joinedSelectors.emplace_back(jq.joinType, ns, std::move(jns), std::move(joinRes), std::move(jItemQ), result, jq, preResult, - joinedFieldIdx, func, joinedSelectorsCount, false, rdxCtx); + joinedFieldIdx, func, joinedSelectorsCount, false, nsUpdateTime, rdxCtx); ThrowOnCancel(rdxCtx); } return joinedSelectors; } template -void RxSelector::preselectSubQueries(Query& mainQuery, std::vector& queryResultsHolder, NsLocker& locks, - SelectFunctionsHolder& func, const RdxContext& ctx) { +std::vector RxSelector::preselectSubQueries(Query& mainQuery, std::vector& queryResultsHolder, + NsLocker& locks, SelectFunctionsHolder& func, const RdxContext& ctx) { + std::vector explains; + if (mainQuery.GetExplain() || mainQuery.GetDebugLevel() >= LogInfo) { + explains.reserve(mainQuery.GetSubQueries().size()); + } for (size_t i = 0, s = mainQuery.Entries().Size(); i < s; ++i) { mainQuery.Entries().InvokeAppropriate( i, Skip{}, @@ -459,14 +479,16 @@ void RxSelector::preselectSubQueries(Query& mainQuery, std::vector try { const CondType cond = sqe.Condition(); if (cond == CondAny || cond == CondEmpty) { - if (selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, func, ctx) == (cond == CondAny)) { + if (selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, func, explains, ctx) == + (cond == CondAny)) { mainQuery.SetEntry(i); } else { mainQuery.SetEntry(i); } } else { QueryResults qr; - const auto values = selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, qr, func, ctx); + const auto values = selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, qr, func, + sqe.Values().size(), explains, ctx); if (QueryEntries::CheckIfSatisfyCondition(values, sqe.Condition(), sqe.Values())) { mainQuery.SetEntry(i); } else { @@ -481,15 +503,17 @@ void RxSelector::preselectSubQueries(Query& mainQuery, std::vector [&](const SubQueryFieldEntry& sqe) { try { queryResultsHolder.resize(queryResultsHolder.size() + 1); - mainQuery.SetEntry( - i, std::move(mainQuery.GetUpdatableEntry(i)).FieldName(), sqe.Condition(), - selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, queryResultsHolder.back(), func, ctx)); + mainQuery.SetEntry(i, std::move(mainQuery.GetUpdatableEntry(i)).FieldName(), + sqe.Condition(), + selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, + queryResultsHolder.back(), func, sqe.FieldName(), explains, ctx)); } catch (const Error& err) { throw Error(err.code(), "Error during preprocessing of subquery '" + mainQuery.GetSubQuery(sqe.QueryIndex()).GetSQL() + "': " + err.what()); } }); } + return explains; } template void RxSelector::DoSelect>( diff --git a/cpp_src/core/reindexer_impl/rx_selector.h b/cpp_src/core/reindexer_impl/rx_selector.h index d77e9c5e9..1110f4651 100644 --- a/cpp_src/core/reindexer_impl/rx_selector.h +++ b/cpp_src/core/reindexer_impl/rx_selector.h @@ -83,14 +83,15 @@ class RxSelector { static JoinedSelectors prepareJoinedSelectors(const Query &q, QueryResults &result, NsLocker &locks, SelectFunctionsHolder &func, std::vector &, const RdxContext &ctx); template - static void preselectSubQueries(Query &mainQuery, std::vector &queryResultsHolder, NsLocker &, SelectFunctionsHolder &, - const RdxContext &); + [[nodiscard]] static std::vector preselectSubQueries(Query &mainQuery, std::vector &queryResultsHolder, + NsLocker &, SelectFunctionsHolder &, const RdxContext &); template [[nodiscard]] static bool selectSubQuery(const Query &subQuery, const Query &mainQuery, NsLocker &, SelectFunctionsHolder &, - const RdxContext &); + std::vector &, const RdxContext &); template [[nodiscard]] static VariantArray selectSubQuery(const Query &subQuery, const Query &mainQuery, NsLocker &, QueryResults &, - SelectFunctionsHolder &, const RdxContext &); + SelectFunctionsHolder &, std::variant fieldOrKeys, + std::vector &, const RdxContext &); static bool isPreResultValuesModeOptimizationAvailable(const Query &jItemQ, const NamespaceImpl::Ptr &jns, const Query &mainQ); }; diff --git a/cpp_src/core/schema.h b/cpp_src/core/schema.h index aa147f1c7..309af583d 100644 --- a/cpp_src/core/schema.h +++ b/cpp_src/core/schema.h @@ -136,7 +136,7 @@ class Schema { std::string_view GetJSON() const noexcept { return originalJson_; } Error BuildProtobufSchema(TagsMatcher& tm, PayloadType& pt); Error GetProtobufSchema(WrSerializer& schema) const; - int GetProtobufNsNumber() const { return protobufNsNumber_; } + int GetProtobufNsNumber() const noexcept { return protobufNsNumber_; } const PrefixTree::PrefixTreeNode* GetRoot() const { return &paths_.root_; } static std::string AppendProtobufNumber(std::string_view j, int protobufNsNumber); diff --git a/cpp_src/core/selectfunc/functions/highlight.cc b/cpp_src/core/selectfunc/functions/highlight.cc index 45c1dadab..4e4177843 100644 --- a/cpp_src/core/selectfunc/functions/highlight.cc +++ b/cpp_src/core/selectfunc/functions/highlight.cc @@ -26,6 +26,10 @@ bool Highlight::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStru pl.GetByJsonPath(func.tagsPath, kr, KeyValueType::Undefined{}); } + if (kr.empty() || !kr[0].Type().IsSame(KeyValueType::String{})) { + throw Error(errLogic, "Unable to apply highlight function to the non-string field '%s'", func.field); + } + const std::string *data = p_string(kr[0]).getCxxstr(); auto pva = dataFtCtx->area_[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) return false; diff --git a/cpp_src/core/selectfunc/functions/snippet.cc b/cpp_src/core/selectfunc/functions/snippet.cc index 17e9b0bf4..951399101 100644 --- a/cpp_src/core/selectfunc/functions/snippet.cc +++ b/cpp_src/core/selectfunc/functions/snippet.cc @@ -271,6 +271,9 @@ bool Snippet::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStruct VariantArray kr; pl.Get(func.field, kr); + if (kr.empty() || !kr[0].Type().IsSame(KeyValueType::String{})) { + throw Error(errLogic, "Unable to apply snippet function to the non-string field '%s'", func.field); + } const std::string *data = p_string(kr[0]).getCxxstr(); auto pva = dataFtCtx->area_[it->second].GetAreas(func.fieldNo); diff --git a/cpp_src/core/selectfunc/selectfuncparser.cc b/cpp_src/core/selectfunc/selectfuncparser.cc index 1836f7441..a288bc931 100644 --- a/cpp_src/core/selectfunc/selectfuncparser.cc +++ b/cpp_src/core/selectfunc/selectfuncparser.cc @@ -12,17 +12,23 @@ SelectFuncStruct &SelectFuncParser::Parse(const std::string &query) { token tok = parser.next_token(tokenizer::flags::no_flags); - selectFuncStruct_.field = std::string(tok.text()); - auto dotPos = tok.text().find('.'); - if (dotPos == std::string_view::npos) { + if (dotPos == std::string_view::npos || (parser.peek_token(tokenizer::flags::no_flags).text() == "=")) { + selectFuncStruct_.field = std::string(tok.text()); tok = parser.next_token(tokenizer::flags::no_flags); if (tok.text() != "=") { - throw Error(errParams, "`=` is expected, but found `%s`", tok.text()); + if (tok.text() == ".") { + throw Error(errParams, "Unexpected space symbol before `.` (select function delimiter)"); + } + throw Error(errParams, "Expected `=` or `.` as a select function delimiter, but found `%s`", tok.text()); } token ftok; ParseFunction(parser, false, ftok); } else { + if (dotPos == tok.text_.size() - 1) { + throw Error(errParams, "Unexpected space symbol or token after `.` (select function delimiter): `%s`", tok.text()); + } + selectFuncStruct_.field = std::string(tok.text_.begin(), tok.text_.begin() + dotPos); token ftok(TokenName); ftok.text_.assign(tok.text_.begin() + dotPos + 1, tok.text_.end()); ParseFunction(parser, false, ftok); @@ -215,10 +221,12 @@ SelectFuncStruct &SelectFuncParser::ParseFunction(tokenizer &parser, bool partOf } } if (!selectFuncStruct_.isFunction) { - throw Error(errParseDSL, "%s: The closing parenthesis is required, but found `%s`", selectFuncStruct_.funcName, tok.text()); + throw Error(errParseDSL, "%s: The closing parenthesis is required, but found `%s`. Select function name: `%s`", + selectFuncStruct_.funcName, tok.text(), selectFuncStruct_.funcName); } } else { - throw Error(errParseDSL, "%s: An open parenthesis is required, but found `%s`", selectFuncStruct_.funcName, tok.text()); + throw Error(errParseDSL, "%s: An open parenthesis is required, but found `%s`. Select function name: `%s`", + selectFuncStruct_.funcName, tok.text(), selectFuncStruct_.funcName); } return selectFuncStruct_; diff --git a/cpp_src/core/selectkeyresult.h b/cpp_src/core/selectkeyresult.h index 55f83498b..8018061bd 100644 --- a/cpp_src/core/selectkeyresult.h +++ b/cpp_src/core/selectkeyresult.h @@ -21,11 +21,11 @@ class SingleSelectKeyResult { public: SingleSelectKeyResult() noexcept {} - SingleSelectKeyResult(IndexIterator::Ptr indexForwardIter) : indexForwardIter_(std::move(indexForwardIter)) { + explicit SingleSelectKeyResult(IndexIterator::Ptr &&indexForwardIter) noexcept : indexForwardIter_(std::move(indexForwardIter)) { assertrx(indexForwardIter_ != nullptr); } template - explicit SingleSelectKeyResult(const KeyEntryT &ids, SortType sortId) { + explicit SingleSelectKeyResult(const KeyEntryT &ids, SortType sortId) noexcept { if (ids.Unsorted().IsCommited()) { ids_ = ids.Sorted(sortId); } else { @@ -35,7 +35,7 @@ class SingleSelectKeyResult { useBtree_ = true; } } - explicit SingleSelectKeyResult(IdSet::Ptr ids) noexcept : tempIds_(std::move(ids)), ids_(*tempIds_) {} + explicit SingleSelectKeyResult(IdSet::Ptr &&ids) noexcept : tempIds_(std::move(ids)), ids_(*tempIds_) {} explicit SingleSelectKeyResult(const IdSetRef &ids) noexcept : ids_(ids) {} explicit SingleSelectKeyResult(IdType rBegin, IdType rEnd) noexcept : rBegin_(rBegin), rEnd_(rEnd), isRange_(true) {} SingleSelectKeyResult(const SingleSelectKeyResult &other) noexcept @@ -253,7 +253,7 @@ class SelectKeyResult : public h_vector { } clear(); deferedExplicitSort = false; - emplace_back(mergedIds); + emplace_back(IdSet::Ptr(mergedIds)); return mergedIds; } }; diff --git a/cpp_src/core/sortingprioritiestable.cc b/cpp_src/core/sortingprioritiestable.cc index c98ee154d..8f86f59b1 100644 --- a/cpp_src/core/sortingprioritiestable.cc +++ b/cpp_src/core/sortingprioritiestable.cc @@ -1,14 +1,15 @@ #include "sortingprioritiestable.h" #include -#include "tools/assertrx.h" #include "tools/errors.h" #include "tools/stringstools.h" -using namespace reindexer; +namespace reindexer { SortingPrioritiesTable::SortingPrioritiesTable(const std::string& sortOrderUTF8) - : sortOrder_(std::make_shared()), sortOrderCharacters_(sortOrderUTF8) { - if (sortOrderCharacters_.empty()) throw Error(errLogic, "Custom sort format string cannot be empty!"); + : sortOrder_(make_intrusive()), sortOrderCharacters_(sortOrderUTF8) { + if (sortOrderCharacters_.empty()) { + throw Error(errLogic, "Custom sort format string cannot be empty!"); + } wchar_t prevCh = 0; uint16_t priority = 0; @@ -47,7 +48,7 @@ SortingPrioritiesTable::SortingPrioritiesTable(const std::string& sortOrderUTF8) if (!ranges.empty()) { auto rangeIt = ranges.begin(); uint16_t outOfRangePriority = maxPriority; - for (size_t i = 0; i < tableSize;) { + for (size_t i = 0; i < kTableSize;) { if ((rangeIt != ranges.end()) && (rangeIt->first == i)) { i += rangeIt->second; ++rangeIt; @@ -69,11 +70,4 @@ bool SortingPrioritiesTable::checkForRangeIntersection(std::map(c) < tableSize); - uint16_t ch(static_cast(c)); - return sortOrder_->operator[](ch); -} - -const std::string& SortingPrioritiesTable::GetSortOrderCharacters() const { return sortOrderCharacters_; } +} // namespace reindexer diff --git a/cpp_src/core/sortingprioritiestable.h b/cpp_src/core/sortingprioritiestable.h index 54331149a..0dc957eb3 100644 --- a/cpp_src/core/sortingprioritiestable.h +++ b/cpp_src/core/sortingprioritiestable.h @@ -2,8 +2,9 @@ #include #include -#include #include +#include "estl/intrusive_ptr.h" +#include "tools/assertrx.h" #include "type_consts.h" namespace reindexer { @@ -20,12 +21,17 @@ class SortingPrioritiesTable { explicit SortingPrioritiesTable(const std::string& sortOrderUTF8); /// Returns priority of a character. - /// @param ch - character. + /// @param c - character /// @returns int priority value - int GetPriority(wchar_t ch) const; + int GetPriority(wchar_t c) const noexcept { + assertrx(sortOrder_.get() != nullptr); + // assertrx(static_cast(c) < tableSize); + uint16_t ch(static_cast(c)); + return sortOrder_->operator[](ch); + } /// @returns string of sort order characters - const std::string& GetSortOrderCharacters() const; + const std::string& GetSortOrderCharacters() const noexcept { return sortOrderCharacters_; } private: /// Checks whether ch is in existing ranges ir not. @@ -34,10 +40,11 @@ class SortingPrioritiesTable { /// @returns true, if character is in one of existing ranges already. bool checkForRangeIntersection(std::map& ranges, wchar_t ch); - static const uint32_t tableSize = 0x10000; - using SortOrderTable = std::array; - using SortOrderTablePtr = std::shared_ptr; + constexpr static uint32_t kTableSize = 0x10000; + using SortOrderTable = intrusive_atomic_rc_wrapper>; + using SortOrderTablePtr = intrusive_ptr; SortOrderTablePtr sortOrder_; std::string sortOrderCharacters_; }; + } // namespace reindexer diff --git a/cpp_src/core/type_consts.h b/cpp_src/core/type_consts.h index 06463ce22..4f61cc00c 100644 --- a/cpp_src/core/type_consts.h +++ b/cpp_src/core/type_consts.h @@ -79,6 +79,10 @@ typedef enum QuerySerializeMode { SkipMergeQueries = 0x02, SkipLimitOffset = 0x04, WithJoinEntries = 0x08, + SkipAggregations = 0x10, + SkipSortEntries = 0x20, + SkipExtraParams = 0x40, + SkipLeftJoinQueries = 0x80, } QuerySerializeMode; typedef enum CondType { diff --git a/cpp_src/estl/h_vector.h b/cpp_src/estl/h_vector.h index 735ce3f55..88fc55921 100644 --- a/cpp_src/estl/h_vector.h +++ b/cpp_src/estl/h_vector.h @@ -1,12 +1,14 @@ #pragma once -#include +#include #include #include #include +#include #include #include #include "debug_macros.h" +#include "estl/defines.h" #include "trivial_reverse_iterator.h" namespace reindexer { @@ -39,6 +41,9 @@ class h_vector { typedef trivial_reverse_iterator reverse_iterator; typedef unsigned size_type; typedef std::ptrdiff_t difference_type; + static_assert(std::is_trivial_v, "Expecting trivial reverse iterator"); + static_assert(std::is_trivial_v, "Expecting trivial const reverse iterator"); + h_vector() noexcept : e_{0, 0}, size_(0), is_hdata_(1) {} explicit h_vector(size_type size) : h_vector() { resize(size); } h_vector(size_type size, const T& v) : h_vector() { @@ -56,7 +61,8 @@ class h_vector { reserve(other.capacity()); const pointer p = ptr(); const_pointer op = other.ptr(); - for (size_type i = 0; i < other.size(); i++) { + const size_type osz = other.size(); + for (size_type i = 0; i < osz; i++) { new (p + i) T(op[i]); } size_ = other.size_; @@ -65,9 +71,10 @@ class h_vector { if (other.is_hdata()) { const pointer p = reinterpret_cast(hdata_); const pointer op = reinterpret_cast(other.hdata_); - for (size_type i = 0; i < other.size(); i++) { + const size_type osz = other.size(); + for (size_type i = 0; i < osz; i++) { new (p + i) T(std::move(op[i])); - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { op[i].~T(); } } @@ -89,11 +96,13 @@ class h_vector { size_type i = mv; const pointer p = ptr(); const_pointer op = other.ptr(); - for (; i < other.size(); i++) { + const auto osz = other.size(); + for (; i < osz; i++) { new (p + i) T(op[i]); } - if constexpr (!std::is_trivially_destructible::value) { - for (; i < size(); i++) p[i].~T(); + if constexpr (!std::is_trivially_destructible_v) { + const auto old_sz = size(); + for (; i < old_sz; i++) p[i].~T(); } size_ = other.size_; } @@ -104,11 +113,12 @@ class h_vector { if (&other != this) { clear(); if (other.is_hdata()) { - for (size_type i = 0; i < other.size(); i++) { - const pointer p = ptr(); - const pointer op = other.ptr(); + const size_type osz = other.size(); + const pointer p = ptr(); + const pointer op = other.ptr(); + for (size_type i = 0; i < osz; i++) { new (p + i) T(std::move(op[i])); - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { op[i].~T(); } } @@ -126,9 +136,10 @@ class h_vector { bool operator==(const h_vector& other) const noexcept(noexcept(std::declval() == std::declval())) { if (&other != this) { - if (size() != other.size()) return false; - for (size_t i = 0; i < size(); ++i) { - if (!(at(i) == other.at(i))) return false; + const size_type sz = size_; + if (sz != other.size()) return false; + for (size_t i = 0; i < sz; ++i) { + if (!(operator[](i) == other[i])) return false; } return true; } @@ -138,6 +149,8 @@ class h_vector { return !operator==(other); } + static constexpr size_type max_size() noexcept { return std::numeric_limits::max() >> 1; } + template void clear() noexcept { if constexpr (FreeHeapMemory) { @@ -145,7 +158,8 @@ class h_vector { is_hdata_ = 1; } else if constexpr (!std::is_trivially_destructible_v) { const pointer p = ptr(); - for (size_type i = 0; i < size_; ++i) p[i].~T(); + const size_type sz = size_; + for (size_type i = 0; i < sz; ++i) p[i].~T(); } size_ = 0; } @@ -156,26 +170,10 @@ class h_vector { const_iterator end() const noexcept { return ptr() + size_; } const_iterator cbegin() const noexcept { return ptr(); } const_iterator cend() const noexcept { return ptr() + size_; } - const_reverse_iterator rbegin() const noexcept { - const_reverse_iterator it; - it = end(); - return it; - } - const_reverse_iterator rend() const noexcept { - const_reverse_iterator it; - it = begin(); - return it; - } - reverse_iterator rbegin() noexcept { - reverse_iterator it; - it = end(); - return it; - } - reverse_iterator rend() noexcept { - reverse_iterator it; - it = begin(); - return it; - } + const_reverse_iterator rbegin() const noexcept { return end(); } + const_reverse_iterator rend() const noexcept { return begin(); } + reverse_iterator rbegin() noexcept { return end(); } + reverse_iterator rend() noexcept { return begin(); } size_type size() const noexcept { return size_; } size_type capacity() const noexcept { return is_hdata_ ? holdSize : e_.cap_; } bool empty() const noexcept { return size_ == 0; } @@ -188,13 +186,13 @@ class h_vector { return ptr()[pos]; } const_reference at(size_type pos) const { - if (pos >= size()) { + if rx_unlikely (pos >= size()) { throw std::logic_error("h_vector: Out of range (pos: " + std::to_string(pos) + ", size: " + std::to_string(size())); } return ptr()[pos]; } reference at(size_type pos) { - if (pos >= size()) { + if rx_unlikely (pos >= size()) { throw std::logic_error("h_vector: Out of range (pos: " + std::to_string(pos) + ", size: " + std::to_string(size())); } return ptr()[pos]; @@ -222,34 +220,49 @@ class h_vector { grow(sz); if constexpr (!reindexer::is_trivially_default_constructible::value) { const pointer p = ptr(); - for (size_type i = size_; i < sz; ++i) new (p + i) T(); + const size_type old_sz = size_; + for (size_type i = old_sz; i < sz; ++i) new (p + i) T(); } - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { const pointer p = ptr(); - for (size_type i = sz; i < size_; ++i) p[i].~T(); + const size_type old_sz = size_; + for (size_type i = sz; i < old_sz; ++i) p[i].~T(); } size_ = sz; } void resize(size_type sz, const T& default_value) { grow(sz); - for (size_type i = size_; i < sz; i++) new (ptr() + i) T(default_value); - if constexpr (!std::is_trivially_destructible::value) { - for (size_type i = sz; i < size_; i++) ptr()[i].~T(); + const size_type old_sz = size_; + const pointer p = ptr(); + for (size_type i = old_sz; i < sz; ++i) { + new (p + i) T(default_value); + } + if constexpr (!std::is_trivially_destructible_v) { + for (size_type i = sz; i < old_sz; ++i) { + p[i].~T(); + } } size_ = sz; } void reserve(size_type sz) { if (sz > capacity()) { - if (sz <= holdSize) { - throw std::logic_error("Unexpected reserved size"); + if rx_unlikely (sz > max_size()) { + throw std::logic_error("h_vector: max capacity overflow (requested: " + std::to_string(sz) + + ", max_size: " + std::to_string(max_size()) + " )"); + } + if rx_unlikely (sz <= holdSize) { + throw std::logic_error("h_vector: unexpected reserved size"); } // NOLINTNEXTLINE(bugprone-sizeof-expression) pointer new_data = static_cast(operator new(sz * sizeof(T))); // ?? dynamic pointer oold_data = ptr(); pointer old_data = oold_data; - for (size_type i = 0; i < size_; i++) { + // Creating those explicit old_sz variable for better vectorization + for (size_type i = 0, old_sz = size_; i < old_sz; ++i) { new (new_data + i) T(std::move(*old_data)); - if (!std::is_trivially_destructible::value) old_data->~T(); + if constexpr (!std::is_trivially_destructible_v) { + old_data->~T(); + } ++old_data; } if (!is_hdata()) operator delete(oold_data); @@ -260,17 +273,19 @@ class h_vector { } void grow(size_type sz) { const auto cap = capacity(); - if (sz > cap) reserve(std::max(sz, cap * 2)); + if (sz > cap) { + reserve(std::max(sz, std::min(max_size(), cap * 2))); + } } void push_back(const T& v) { grow(size_ + 1); new (ptr() + size_) T(v); - size_++; + ++size_; } void push_back(T&& v) { grow(size_ + 1); new (ptr() + size_) T(std::move(v)); - size_++; + ++size_; } template reference emplace_back(Args&&... args) { @@ -282,7 +297,7 @@ class h_vector { } void pop_back() { rx_debug_check_nonempty(); - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { ptr()[--size_].~T(); } else { --size_; @@ -294,10 +309,11 @@ class h_vector { push_back(v); } else { rx_debug_check_subscript(i); - grow(size_ + 1); + const size_type sz = size_; + grow(sz + 1); const pointer p = ptr(); - new (p + size_) T(std::move(p[size_ - 1])); - for (size_type j = size_ - 1; j > i; --j) { + new (p + sz) T(std::move(p[sz - 1])); + for (size_type j = sz - 1; j > i; --j) { p[j] = std::move(p[j - 1]); } p[i] = v; @@ -311,10 +327,11 @@ class h_vector { push_back(std::move(v)); } else { rx_debug_check_subscript(i); - grow(size_ + 1); + const size_type sz = size_; + grow(sz + 1); const pointer p = ptr(); - new (p + size_) T(std::move(p[size_ - 1])); - for (size_type j = size_ - 1; j > i; --j) { + new (p + sz) T(std::move(p[sz - 1])); + for (size_type j = sz - 1; j > i; --j) { p[j] = std::move(p[j - 1]); } p[i] = std::move(v); @@ -326,16 +343,17 @@ class h_vector { if (count == 0) return const_cast(pos); difference_type i = pos - begin(); rx_debug_check_subscript_le(i); - grow(size_ + count); + const int64_t sz = size_; + grow(sz + count); const pointer p = ptr(); - difference_type j = size_ + count - 1; - for (; j >= static_cast(size_) && j >= count + i; --j) { + difference_type j = sz + count - 1; + for (; j >= sz && j >= count + i; --j) { new (p + j) T(std::move(p[j - count])); } for (; j >= count + i; --j) { p[j] = std::move(p[j - count]); } - for (; j >= size_; --j) { + for (; j >= sz; --j) { new (p + j) T(v); } for (; j >= i; --j) { @@ -347,17 +365,18 @@ class h_vector { template iterator emplace(const_iterator pos, Args&&... args) { const size_type i = pos - begin(); - if (i == size()) { + const size_type sz = size_; + if (i == sz) { emplace_back(std::forward(args)...); } else { rx_debug_check_subscript(i); - grow(size_ + 1); + grow(sz + 1); const pointer p = ptr(); - new (p + size_) T(std::move(p[size_ - 1])); - for (size_type j = size_ - 1; j > i; --j) { + new (p + sz) T(std::move(p[sz - 1])); + for (size_type j = sz - 1; j > i; --j) { p[j] = std::move(p[j - 1]); } - p[i] = {std::forward(args)...}; + p[i] = T(std::forward(args)...); ++size_; } return begin() + i; @@ -370,7 +389,7 @@ class h_vector { auto firstPtr = p + i; std::move(firstPtr + 1, p + size_, firstPtr); --size_; - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { p[size_].~T(); } return firstPtr; @@ -382,16 +401,17 @@ class h_vector { if (cnt == 0) return const_cast(pos); const difference_type i = pos - begin(); rx_debug_check_subscript_le(i); - grow(size_ + cnt); + const int64_t sz = size_; + grow(sz + cnt); const pointer p = ptr(); - difference_type j = size_ + cnt - 1; - for (; j >= static_cast(size_) && j >= cnt + i; --j) { + difference_type j = sz + cnt - 1; + for (; j >= sz && j >= cnt + i; --j) { new (p + j) T(std::move(p[j - cnt])); } for (; j >= cnt + i; --j) { p[j] = std::move(p[j - cnt]); } - for (; j >= static_cast(size_); --j) { + for (; j >= sz; --j) { new (p + j) T(*--last); } for (; j >= i; --j) { @@ -402,8 +422,20 @@ class h_vector { } template void assign(InputIt first, InputIt last) { - clear(); - insert(begin(), first, last); + static_assert(std::is_same_v::iterator_category, std::random_access_iterator_tag>, + "Expecting random access iterators here"); + rx_debug_check_valid_range(first, last); + const int64_t cnt = std::distance(first, last); + const int64_t cap = capacity(); + if (cap >= cnt && cap - (cnt >> 2) <= cnt) { + // Allow up to 25% extra memory + clear(); + } else { + clear(); + grow(cnt); + } + std::uninitialized_copy(first, last, begin()); + size_ = cnt; } iterator erase(const_iterator first, const_iterator last) { rx_debug_check_valid_range(first, last); @@ -416,21 +448,24 @@ class h_vector { return firstPtr; } rx_debug_check_subscript(i); + const size_type sz = size_; - std::move(firstPtr + cnt, p + size_, firstPtr); - const auto newSize = size_ - cnt; - if constexpr (!std::is_trivially_destructible::value) { - for (size_type j = newSize; j < size_; ++j) p[j].~T(); + std::move(std::make_move_iterator(firstPtr + cnt), std::make_move_iterator(p + sz), firstPtr); + const auto newSize = sz - cnt; + if constexpr (!std::is_trivially_destructible_v) { + for (size_type j = newSize; j < sz; ++j) p[j].~T(); } size_ = newSize; return firstPtr; } void shrink_to_fit() { - if (is_hdata() || size_ == capacity()) return; + const auto sz = size(); + if (is_hdata() || sz == capacity()) return; h_vector tmp; - tmp.reserve(size()); - tmp.insert(tmp.begin(), std::make_move_iterator(begin()), std::make_move_iterator(end())); + tmp.reserve(sz); + std::move(std::make_move_iterator(begin()), std::make_move_iterator(end()), tmp.begin()); + tmp.size_ = sz; *this = std::move(tmp); } size_t heap_size() const noexcept { return is_hdata() ? 0 : capacity() * sizeof(T); } @@ -442,11 +477,13 @@ class h_vector { void destruct() noexcept { if (is_hdata()) { if constexpr (!std::is_trivially_destructible_v) { - for (size_type i = 0; i < size_; ++i) reinterpret_cast(hdata_)[i].~T(); + const size_type sz = size_; + for (size_type i = 0; i < sz; ++i) reinterpret_cast(hdata_)[i].~T(); } } else { if constexpr (!std::is_trivially_destructible_v) { - for (size_type i = 0; i < size_; ++i) e_.data_[i].~T(); + const size_type sz = size_; + for (size_type i = 0; i < sz; ++i) e_.data_[i].~T(); } operator delete(e_.data_); } diff --git a/cpp_src/estl/span.h b/cpp_src/estl/span.h index 1ad2bac9c..d16134e89 100644 --- a/cpp_src/estl/span.h +++ b/cpp_src/estl/span.h @@ -17,6 +17,8 @@ class span { typedef trivial_reverse_iterator const_reverse_iterator; typedef trivial_reverse_iterator reverse_iterator; typedef size_t size_type; + static_assert(std::is_trivial_v, "Expecting trivial reverse iterator"); + static_assert(std::is_trivial_v, "Expecting trivial const reverse iterator"); constexpr span() noexcept : data_(nullptr), size_(0) {} constexpr span(const span& other) noexcept : data_(other.data_), size_(other.size_) {} @@ -44,16 +46,8 @@ class span { constexpr span(T (&arr)[L]) noexcept : data_(arr), size_(L) {} constexpr iterator begin() const noexcept { return data_; } constexpr iterator end() const noexcept { return data_ + size_; } - /*constexpr*/ reverse_iterator rbegin() const noexcept { - reverse_iterator it; - it = end(); - return it; - } - /*constexpr*/ reverse_iterator rend() const noexcept { - reverse_iterator it; - it = begin(); - return it; - } + constexpr reverse_iterator rbegin() const noexcept { return end(); } + constexpr reverse_iterator rend() const noexcept { return begin(); } constexpr size_type size() const noexcept { return size_; } constexpr bool empty() const noexcept { return size_ == 0; } constexpr const T& operator[](size_type pos) const noexcept { return data_[pos]; } diff --git a/cpp_src/estl/trivial_reverse_iterator.h b/cpp_src/estl/trivial_reverse_iterator.h index 3919e6bbb..ccf1aa514 100644 --- a/cpp_src/estl/trivial_reverse_iterator.h +++ b/cpp_src/estl/trivial_reverse_iterator.h @@ -6,7 +6,7 @@ using std::iterator_traits; template class trivial_reverse_iterator { -public: +public: typedef trivial_reverse_iterator this_type; typedef Iterator iterator_type; typedef typename iterator_traits::iterator_category iterator_category; @@ -16,91 +16,83 @@ class trivial_reverse_iterator { typedef typename iterator_traits::pointer pointer; public: - // if CTOR is enabled std::is_trivial> return false; - // trivial_reverse_iterator() : current_(nullptr) {} + constexpr trivial_reverse_iterator() = default; + constexpr trivial_reverse_iterator(Iterator it) noexcept : current_(it) { + static_assert(std::is_trivial_v, "Expecting std::is_trivial_v"); + } template - trivial_reverse_iterator& operator=(const trivial_reverse_iterator& u) { + trivial_reverse_iterator& operator=(const trivial_reverse_iterator& u) noexcept { current_ = u.base(); return *this; } - Iterator base() const { return current_; } - reference operator*() const { + Iterator base() const noexcept { return current_; } + reference operator*() const noexcept { Iterator tmp = current_; return *--tmp; } - pointer operator->() const { return std::addressof(operator*()); } - trivial_reverse_iterator& operator++() { + pointer operator->() const noexcept { return std::addressof(operator*()); } + trivial_reverse_iterator& operator++() noexcept { --current_; return *this; } - trivial_reverse_iterator operator++(int) { + trivial_reverse_iterator operator++(int) noexcept { trivial_reverse_iterator tmp(*this); --current_; return tmp; } - trivial_reverse_iterator& operator--() { + trivial_reverse_iterator& operator--() noexcept { ++current_; return *this; } - trivial_reverse_iterator operator--(int) { + trivial_reverse_iterator operator--(int) noexcept { trivial_reverse_iterator tmp(*this); ++current_; return tmp; } - trivial_reverse_iterator operator+(difference_type n) const { - Iterator ptr = current_ - n; - trivial_reverse_iterator tmp; - tmp = ptr; - return tmp; - } - trivial_reverse_iterator& operator+=(difference_type n) { + trivial_reverse_iterator operator+(difference_type n) const noexcept { return current_ - n; } + trivial_reverse_iterator& operator+=(difference_type n) noexcept { current_ -= n; return *this; } - trivial_reverse_iterator operator-(difference_type n) const { - Iterator ptr = current_ + n; - trivial_reverse_iterator tmp; - tmp = ptr; - return tmp; - } - trivial_reverse_iterator& operator-=(difference_type n) { + trivial_reverse_iterator operator-(difference_type n) const noexcept { return current_ + n; } + trivial_reverse_iterator& operator-=(difference_type n) noexcept { current_ += n; return *this; } - reference operator[](difference_type n) const { return *(*this + n); } + reference operator[](difference_type n) const noexcept { return *(*this + n); } // Assign operator overloading from const std::reverse_iterator template - trivial_reverse_iterator& operator=(const std::reverse_iterator& u) { + trivial_reverse_iterator& operator=(const std::reverse_iterator& u) noexcept { if (current_ != u.base()) current_ = u.base(); return *this; } // Assign operator overloading from non-const std::reverse_iterator template - trivial_reverse_iterator& operator=(std::reverse_iterator& u) { + trivial_reverse_iterator& operator=(std::reverse_iterator& u) noexcept { if (current_ != u.base()) current_ = u.base(); return *this; } // Assign native pointer template - trivial_reverse_iterator& operator=(Upn ptr) { + trivial_reverse_iterator& operator=(Upn ptr) noexcept { static_assert(std::is_pointer::value, "attempting assign a non-trivial pointer"); /*if (current_ != ptr)*/ current_ = ptr; return *this; } - inline bool operator!=(const this_type& rhs) const { return !EQ(current_, rhs.current_); } - inline bool operator==(const this_type& rhs) const { return EQ(current_, rhs.current_); } + inline bool operator!=(const this_type& rhs) const noexcept { return !EQ(current_, rhs.current_); } + inline bool operator==(const this_type& rhs) const noexcept { return EQ(current_, rhs.current_); } protected: Iterator current_; private: - inline bool EQ(Iterator lhs, Iterator rhs) const { return lhs == rhs; } + inline bool EQ(Iterator lhs, Iterator rhs) const noexcept { return lhs == rhs; } }; } // namespace reindexer diff --git a/cpp_src/gtests/bench/fixtures/api_tv_simple.cc b/cpp_src/gtests/bench/fixtures/api_tv_simple.cc index 7c8cec8b9..2fe2dd32a 100644 --- a/cpp_src/gtests/bench/fixtures/api_tv_simple.cc +++ b/cpp_src/gtests/bench/fixtures/api_tv_simple.cc @@ -111,6 +111,9 @@ void ApiTvSimple::RegisterAllCases() { Register("FromCJSONPKOnly", &ApiTvSimple::FromCJSONPKOnly, this); Register("GetCJSON", &ApiTvSimple::GetCJSON, this); Register("ExtractField", &ApiTvSimple::ExtractField, this); + Register("SubQueryEq", &ApiTvSimple::SubQueryEq, this); + Register("SubQuerySet", &ApiTvSimple::SubQuerySet, this); + Register("SubQueryAggregate", &ApiTvSimple::SubQueryAggregate, this); // Those benches should be last, because they are recreating indexes cache Register("Query4CondRangeDropCache", &ApiTvSimple::Query4CondRangeDropCache, this)->Iterations(1000); @@ -197,12 +200,14 @@ reindexer::Error ApiTvSimple::Initialize() { err = db_->Commit(stringSelectNs_); if (!err.ok()) return err; - NamespaceDef mainNsDef{innerJoinLowSelectivityMainNs_}; + NamespaceDef mainNsDef{mainNs_}; mainNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()).AddIndex("field", "hash", "int", IndexOpts()); err = db_->AddNamespace(mainNsDef); if (!err.ok()) return err; - NamespaceDef rightNsDef{innerJoinLowSelectivityRightNs_}; - rightNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()).AddIndex("field", "hash", "int", IndexOpts()); + NamespaceDef rightNsDef{rightNs_}; + rightNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()) + .AddIndex("field", "hash", "int", IndexOpts()) + .AddIndex("id_tree", "tree", "int", IndexOpts()); err = db_->AddNamespace(rightNsDef); if (!err.ok()) return err; @@ -227,6 +232,7 @@ reindexer::Error ApiTvSimple::Initialize() { reindexer::JsonBuilder bld2(wrSer_); bld2.Put("id", i); bld2.Put("field", i); + bld2.Put("id_tree", i); bld2.End(); err = rItem.FromJSON(wrSer_.Slice()); if (!err.ok()) return err; @@ -805,9 +811,9 @@ void ApiTvSimple::Query0CondInnerJoinUnlimit(benchmark::State& state) { void ApiTvSimple::Query0CondInnerJoinUnlimitLowSelectivity(benchmark::State& state) { AllocsTracker allocsTracker(state); for (auto _ : state) { // NOLINT(*deadcode.DeadStores) - Query q4join(innerJoinLowSelectivityRightNs_); + Query q4join(rightNs_); q4join.Where("id", CondLe, 250); - Query q(innerJoinLowSelectivityMainNs_); + Query q(mainNs_); q.InnerJoin("id", "id", CondEq, std::move(q4join)).ReqTotal(); QueryResults qres; @@ -816,6 +822,43 @@ void ApiTvSimple::Query0CondInnerJoinUnlimitLowSelectivity(benchmark::State& sta } } +void ApiTvSimple::SubQueryEq(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + Query q = Query(mainNs_).Where( + "id", CondEq, Query(rightNs_).Select({"field"}).Where("id", CondEq, VariantArray::Create(int(rand() % kTotalItemsMainJoinNs)))); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + +void ApiTvSimple::SubQuerySet(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + const int rangeMin = rand() % (kTotalItemsMainJoinNs - 500); + Query q = Query(mainNs_).Where( + "id", CondSet, Query(rightNs_).Select({"id"}).Where("id_tree", CondRange, VariantArray::Create(rangeMin, rangeMin + 500))); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + +void ApiTvSimple::SubQueryAggregate(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + Query q = Query(mainNs_).Where("id", CondEq, + Query(rightNs_) + .Aggregate(AggAvg, {"id"}) + .Where("id", CondLt, VariantArray::Create(int(rand() % kTotalItemsMainJoinNs))) + .Limit(500)); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + void ApiTvSimple::Query2CondInnerJoin(benchmark::State& state) { AllocsTracker allocsTracker(state); for (auto _ : state) { // NOLINT(*deadcode.DeadStores) @@ -1202,7 +1245,7 @@ void ApiTvSimple::query2CondIdSet(benchmark::State& state, const std::vector>& idsets); reindexer::Error prepareCJsonBench(); @@ -147,8 +150,8 @@ class ApiTvSimple : private BaseFixture { std::unordered_map>> idsets_; reindexer::WrSerializer wrSer_; std::string stringSelectNs_{"string_select_ns"}; - std::string innerJoinLowSelectivityMainNs_{"inner_join_low_selectivity_main_ns"}; - std::string innerJoinLowSelectivityRightNs_{"inner_join_low_selectivity_right_ns"}; + std::string mainNs_{"main_ns"}; + std::string rightNs_{"right_ns"}; std::string cjsonNsName_{"cjson_ns_name"}; std::unique_ptr itemForCjsonBench_; std::vector fieldsToExtract_; diff --git a/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc b/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc index 30846061c..4ffafedc9 100644 --- a/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc +++ b/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc @@ -4,7 +4,7 @@ #include "core/cjson/jsonbuilder.h" #include "core/nsselecter/joinedselector.h" #include "core/reindexer.h" -// #include "gtests/tools.h" +#include "gtests/tools.h" #include "tools/string_regexp_functions.h" #include "helpers.h" @@ -51,6 +51,7 @@ void ApiTvSimpleComparators::RegisterAllCases() { Register("GetEqArrayInt", &ApiTvSimpleComparators::GetEqArrayInt, this); Register("GetEqString", &ApiTvSimpleComparators::GetEqString, this); Register("GetByRangeIDAndSort", &ApiTvSimpleComparators::GetByRangeIDAndSort, this); + Register("GetUuidStr", &ApiTvSimpleComparators::GetUuidStr, this); Register("Query1Cond", &ApiTvSimpleComparators::Query1Cond, this); Register("Query1CondTotal", &ApiTvSimpleComparators::Query1CondTotal, this); @@ -93,6 +94,11 @@ reindexer::Error ApiTvSimpleComparators::Initialize() { locations_ = {"mos", "ct", "dv", "sth", "vlg", "sib", "ural"}; + uuids_.reserve(1000); + for (size_t i = 0; i < 1000; ++i) { + uuids_.emplace_back(randStrUuid()); + } + for (int i = 0; i < 10; i++) packages_.emplace_back(randomNumArray(20, 10000, 10)); for (int i = 0; i < 20; i++) priceIDs_.emplace_back(randomNumArray(10, 7000, 50)); @@ -159,6 +165,7 @@ reindexer::Item ApiTvSimpleComparators::MakeItem(benchmark::State&) { item["location"] = locations_.at(random(0, locations_.size() - 1)); item["start_time"] = start_times_.at(random(0, start_times_.size() - 1)); item["end_time"] = startTime + random(1, 5) * 1000; + item["uuid_str"] = uuids_[rand() % uuids_.size()]; return item; } @@ -237,6 +244,19 @@ void ApiTvSimpleComparators::GetByRangeIDAndSort(benchmark::State& state) { } } +void ApiTvSimpleComparators::GetUuidStr(benchmark::State& state) { + const auto& uuid = uuids_[rand() % uuids_.size()]; + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + Query q(nsdef_.name); + q.Where("uuid_str", CondEq, uuid); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + if (!qres.Count()) state.SkipWithError("Results does not contain any value"); + } +} + void ApiTvSimpleComparators::Query1Cond(benchmark::State& state) { AllocsTracker allocsTracker(state); for (auto _ : state) { // NOLINT(*deadcode.DeadStores) diff --git a/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h b/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h index 96e5167f8..91e769bce 100644 --- a/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h +++ b/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h @@ -19,7 +19,8 @@ class ApiTvSimpleComparators : private BaseFixture { .AddIndex("price_id", "-", "int", IndexOpts().Array()) .AddIndex("location", "-", "string", IndexOpts()) .AddIndex("end_time", "-", "int", IndexOpts()) - .AddIndex("start_time", "-", "int", IndexOpts()); + .AddIndex("start_time", "-", "int", IndexOpts()) + .AddIndex("uuid_str", "-", "string", IndexOpts()); } void RegisterAllCases(); @@ -36,6 +37,7 @@ class ApiTvSimpleComparators : private BaseFixture { void GetEqArrayInt(State& state); void GetEqString(State& state); void GetByRangeIDAndSort(State& state); + void GetUuidStr(State& state); void Query1Cond(State& state); void Query1CondTotal(State& state); @@ -61,6 +63,7 @@ class ApiTvSimpleComparators : private BaseFixture { std::vector start_times_; std::vector> packages_; std::vector> priceIDs_; + std::vector uuids_; #if !defined(REINDEX_WITH_ASAN) && !defined(REINDEX_WITH_TSAN) && !defined(RX_WITH_STDLIB_DEBUG) constexpr static unsigned kTotalItemsStringSelectNs = 100'000; #else // !defined(REINDEX_WITH_ASAN) && !defined(REINDEX_WITH_TSAN) && !defined(RX_WITH_STDLIB_DEBUG) diff --git a/cpp_src/gtests/bench/fixtures/ft_fixture.cc b/cpp_src/gtests/bench/fixtures/ft_fixture.cc index f7358ae57..649c9e264 100644 --- a/cpp_src/gtests/bench/fixtures/ft_fixture.cc +++ b/cpp_src/gtests/bench/fixtures/ft_fixture.cc @@ -368,16 +368,16 @@ void FullText::Fast3PhraseLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w3 = + const std::string& w3 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + w3.size() + 32); ftQuery.append("'").append(w1).append(" ").append(w2).append("' ").append(w3); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); QueryResults qres; auto err = db_->Select(q, qres); @@ -394,16 +394,16 @@ void FullText::Fast3WordsLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w3 = + const std::string& w3 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + w3.size() + 32); ftQuery.append("+").append(w1).append(" +").append(w2).append(" +").append(w3); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); QueryResults qres; auto err = db_->Select(q, qres); @@ -420,14 +420,14 @@ void FullText::Fast2PhraseLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + 32); ftQuery.append("'").append(w1).append(" ").append(w2).append("'~50"); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); QueryResults qres; auto err = db_->Select(q, qres); @@ -444,14 +444,14 @@ void FullText::Fast2AndWordLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + 32); ftQuery.append("+").append(w1).append(" +").append(w2); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); QueryResults qres; auto err = db_->Select(q, qres); @@ -468,16 +468,16 @@ void FullText::Fast3PhraseWithAreasLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w3 = + const std::string& w3 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + w3.size() + 32); ftQuery.append("'").append(w1).append(" ").append(w2).append("' ").append(w3); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); q.AddFunction("search = highlight(!,!)"); QueryResults qres; auto err = db_->Select(q, qres); @@ -492,7 +492,7 @@ void FullText::Fast1WordWithAreaHighDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - std::string& word = + const std::string& word = words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})); q.Where("searchfast", CondEq, word); q.AddFunction("search = highlight(!,!)"); @@ -509,16 +509,16 @@ void FullText::Fast3WordsWithAreasLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w3 = + const std::string& w3 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + w3.size() + 32); ftQuery.append(w1).append(" ").append(w2).append(" ").append(w3); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); q.AddFunction("search = highlight(!,!)"); QueryResults qres; auto err = db_->Select(q, qres); @@ -603,7 +603,7 @@ void FullText::Fast2WordsMatch(benchmark::State& state) { words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})) + " " + words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})); - q.Where("searchfast", CondEq, words); + q.Where("searchfast", CondEq, std::move(words)); QueryResults qres; auto err = db_->Select(q, qres); if (!err.ok()) state.SkipWithError(err.what().c_str()); @@ -639,7 +639,7 @@ void FullText::Fuzzy2WordsMatch(benchmark::State& state) { words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})) + " " + words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})); - q.Where("searchfuzzy", CondEq, words); + q.Where("searchfuzzy", CondEq, std::move(words)); QueryResults qres; auto err = db_->Select(q, qres); @@ -656,9 +656,7 @@ void FullText::Fast1PrefixMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - auto word = MakePrefixWord(); - q.Where("searchfast", CondEq, word); + q.Where("searchfast", CondEq, MakePrefixWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -675,9 +673,7 @@ void FullText::Fast2PrefixMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - auto words = MakePrefixWord() + " " + MakePrefixWord(); - q.Where("searchfast", CondEq, words); + q.Where("searchfast", CondEq, MakePrefixWord().append(" ").append(MakePrefixWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -692,9 +688,7 @@ void FullText::Fuzzy1PrefixMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - auto word = MakePrefixWord(); - q.Where("searchfuzzy", CondEq, word); + q.Where("searchfuzzy", CondEq, MakePrefixWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -709,9 +703,7 @@ void FullText::Fuzzy2PrefixMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string words = MakePrefixWord() + " " + MakePrefixWord(); - q.Where("searchfuzzy", CondEq, words); + q.Where("searchfuzzy", CondEq, MakePrefixWord().append(" ").append(MakePrefixWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -728,8 +720,7 @@ void FullText::Fast1SuffixMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - std::string word = MakeSuffixWord(); - q.Where("searchfast", CondEq, word); + q.Where("searchfast", CondEq, MakeSuffixWord()); QueryResults qres; auto err = db_->Select(q, qres); if (!err.ok()) state.SkipWithError(err.what().c_str()); @@ -745,9 +736,7 @@ void FullText::Fast2SuffixMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - std::string words = MakeSuffixWord() + " " + MakeSuffixWord(); - q.Where("searchfast", CondEq, words); + q.Where("searchfast", CondEq, MakeSuffixWord().append(" ").append(MakeSuffixWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -762,9 +751,7 @@ void FullText::Fuzzy1SuffixMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string word = MakeSuffixWord(); - q.Where("searchfuzzy", CondEq, word); + q.Where("searchfuzzy", CondEq, MakeSuffixWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -779,9 +766,7 @@ void FullText::Fuzzy2SuffixMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string words = MakeSuffixWord() + " " + MakeSuffixWord(); - q.Where("searchfuzzy", CondEq, words); + q.Where("searchfuzzy", CondEq, MakeSuffixWord().append(" ").append(MakeSuffixWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -798,9 +783,7 @@ void FullText::Fast1TypoWordMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - std::string word = MakeTypoWord(); - q.Where("searchfast", CondEq, word); + q.Where("searchfast", CondEq, MakeTypoWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -817,9 +800,7 @@ void FullText::Fast2TypoWordMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - std::string words = MakeTypoWord() + " " + MakeTypoWord(); - q.Where("searchfast", CondEq, words); + q.Where("searchfast", CondEq, MakeTypoWord().append(" ").append(MakeTypoWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -834,9 +815,7 @@ void FullText::Fuzzy1TypoWordMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string word = MakeTypoWord(); - q.Where("searchfuzzy", CondEq, word); + q.Where("searchfuzzy", CondEq, MakeTypoWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -851,9 +830,7 @@ void FullText::Fuzzy2TypoWordMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string words = MakeTypoWord() + " " + MakeTypoWord(); - q.Where("searchfuzzy", CondEq, words); + q.Where("searchfuzzy", CondEq, MakeTypoWord().append(" ").append(MakeTypoWord())); QueryResults qres; auto err = db_->Select(q, qres); diff --git a/cpp_src/gtests/tests/API/base_tests.cc b/cpp_src/gtests/tests/API/base_tests.cc index 491319105..5bfb1f41d 100644 --- a/cpp_src/gtests/tests/API/base_tests.cc +++ b/cpp_src/gtests/tests/API/base_tests.cc @@ -1,10 +1,8 @@ -#include #include #include "reindexer_api.h" #include "tools/errors.h" #include "core/item.h" -#include "core/keyvalue/key_string.h" #include "core/keyvalue/variant.h" #include "core/queryresults/joinresults.h" #include "core/reindexer.h" @@ -14,10 +12,8 @@ #include #include -#include "debug/backtrace.h" #include "core/keyvalue/p_string.h" -#include "gason/gason.h" #include "server/loggerwrapper.h" #include "tools/serializer.h" @@ -72,7 +68,7 @@ TEST_F(ReindexerApi, RenameNamespace) { err = rt.reindexer->OpenNamespace(existingNamespace); ASSERT_TRUE(err.ok()) << err.what(); - auto testInList = [&](const std::string& testNamespaceName, bool inList) { + auto testInList = [&](std::string_view testNamespaceName, bool inList) { std::vector namespacesList; err = rt.reindexer->EnumNamespaces(namespacesList, reindexer::EnumNamespacesOpts()); ASSERT_TRUE(err.ok()) << err.what(); @@ -85,7 +81,7 @@ TEST_F(ReindexerApi, RenameNamespace) { } }; - auto getRowsInJSON = [&](const std::string& namespaceName, std::vector& resStrings) { + auto getRowsInJSON = [&](std::string_view namespaceName, std::vector& resStrings) { QueryResults result; auto err = rt.reindexer->Select(Query(namespaceName), result); ASSERT_TRUE(err.ok()) << err.what(); @@ -1061,7 +1057,7 @@ TEST_F(ReindexerApi, SortByUnorderedIndexes) { } TEST_F(ReindexerApi, SortByUnorderedIndexWithJoins) { - const std::string secondNamespace = "test_namespace_2"; + constexpr std::string_view secondNamespace = "test_namespace_2"; std::vector secondNamespacePKs; auto err = rt.reindexer->OpenNamespace(default_namespace, StorageOpts().Enabled(false)); @@ -1318,7 +1314,7 @@ TEST_F(ReindexerApi, DslFieldsTest) { } TEST_F(ReindexerApi, DistinctQueriesEncodingTest) { - const std::string sql = "select distinct(country), distinct(city) from clients;"; + constexpr std::string_view sql = "select distinct(country), distinct(city) from clients;"; Query q1 = Query::FromSQL(sql); EXPECT_EQ(q1.Entries().Size(), 0); @@ -1441,18 +1437,19 @@ TEST_F(ReindexerApi, ContextCancelingTest) { } TEST_F(ReindexerApi, JoinConditionsSqlParserTest) { - const std::string sql1 = "SELECT * FROM ns WHERE a > 0 AND INNER JOIN (SELECT * FROM ns2 WHERE b > 10 AND c = 1) ON ns2.id = ns.fk_id"; + constexpr std::string_view sql1 = + "SELECT * FROM ns WHERE a > 0 AND INNER JOIN (SELECT * FROM ns2 WHERE b > 10 AND c = 1) ON ns2.id = ns.fk_id"; const auto q1 = Query::FromSQL(sql1); ASSERT_EQ(q1.GetSQL(), sql1); - const std::string sql2 = - "SELECT * FROM ns WHERE a > 0 AND INNER JOIN (SELECT * FROM ns2 WHERE b > 10 AND c = 1 LIMIT 0) ON ns2.id = ns.fk_id"; + constexpr std::string_view sql2 = + "SELECT * FROM ns WHERE a > 0 AND INNER JOIN (SELECT * FROM ns2 WHERE b > 10 AND c = 1 LIMIT 0) ON ns2.id = ns.fk_id"; const auto q2 = Query::FromSQL(sql2); ASSERT_EQ(q2.GetSQL(), sql2); } TEST_F(ReindexerApi, UpdateWithBoolParserTest) { - const std::string sql = "UPDATE ns SET flag1 = true,flag2 = false WHERE id > 100"; + constexpr std::string_view sql = "UPDATE ns SET flag1 = true,flag2 = false WHERE id > 100"; Query query = Query::FromSQL(sql); ASSERT_EQ(query.UpdateFields().size(), 2); EXPECT_EQ(query.UpdateFields().front().Column(), "flag1"); @@ -1469,7 +1466,7 @@ TEST_F(ReindexerApi, UpdateWithBoolParserTest) { } TEST_F(ReindexerApi, EqualPositionsSqlParserTest) { - const std::string sql = + constexpr std::string_view sql = "SELECT * FROM ns WHERE (f1 = 1 AND f2 = 2 OR f3 = 3 equal_position(f1, f2) equal_position(f1, f3)) OR (f4 = 4 AND f5 > 5 " "equal_position(f4, f5))"; @@ -1500,13 +1497,17 @@ TEST_F(ReindexerApi, SchemaSuggestions) { Error err = rt.reindexer->OpenNamespace(default_namespace); ASSERT_TRUE(err.ok()) << err.what(); + err = rt.reindexer->OpenNamespace("second_ns"); + ASSERT_TRUE(err.ok()) << err.what(); + // clang-format off - const std::string jsonschema = R"xxx( + constexpr std::string_view jsonschema = R"xxx( { "required": [ "Countries", "Nest_fake", - "nested" + "nested", + "second_field" ], "properties": { "Countries": { @@ -1538,29 +1539,125 @@ TEST_F(ReindexerApi, SchemaSuggestions) { "additionalProperties": false, "type": "object" } + "second_field": { + "type": "number" + }, }, "additionalProperties": false, "type": "object" })xxx"; // clang-format on + // clang-format off + constexpr std::string_view jsonschema2 = R"xxx( + { + "required": [ + "id", + "Field", + ], + "properties": { + "id": { + "type": "number" + }, + "Field": { + "type": "number" + } + }, + "additionalProperties": false, + "type": "object" + })xxx"; + // clang-format on err = rt.reindexer->SetSchema(default_namespace, jsonschema); ASSERT_TRUE(err.ok()) << err.what(); - auto validateSuggestions = [this](std::string_view sql, const std::unordered_set& expected) { + err = rt.reindexer->SetSchema("second_ns", jsonschema2); + ASSERT_TRUE(err.ok()) << err.what(); + + auto validateSuggestions = [this](std::string_view sql, const std::unordered_set& expected, size_t position) { std::vector suggestions; - auto err = rt.reindexer->GetSqlSuggestions(sql, sql.size() - 1, suggestions); + auto err = rt.reindexer->GetSqlSuggestions(sql, position, suggestions); ASSERT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(suggestions.size(), expected.size()) << sql; for (auto& sugg : suggestions) { - EXPECT_TRUE(expected.find(sugg) != expected.end()) << "Unexpected suggestion: " << sugg; + EXPECT_TRUE(expected.find(sugg) != expected.end()) << sql << '\n' + << std::string(position, ' ') << "^\nUnexpected suggestion: " << sugg; + } + for (auto& expSugg : expected) { + EXPECT_TRUE(std::find(suggestions.begin(), suggestions.end(), expSugg) != suggestions.end()) + << sql << '\n' + << std::string(position, ' ') << "^\nExpected but not found suggestion: " << expSugg; } }; - validateSuggestions("select * from test_namespace where ne", {"Nest_fake", "nested"}); - validateSuggestions("select * from test_namespace where nested", {}); - validateSuggestions("select * from test_namespace where nested.", {".Name", ".Naame", ".Age"}); - validateSuggestions("select * from test_namespace where nested.Na", {".Name", ".Naame"}); + struct { + std::string_view sql; + std::unordered_set expected; + size_t position = sql.empty() ? 0 : sql.size() - 1; + } testData[]{ + {"select * from test_namespace where ne", {"Nest_fake", "nested"}}, + {"select * from test_namespace where nested", {}}, + {"select * from test_namespace where nested.", {".Name", ".Naame", ".Age"}}, + {"select * from test_namespace where nested.Na", {".Name", ".Naame"}}, + + {"", {"explain", "select", "delete", "update", "truncate"}}, + {"s", {"select"}}, + {"select", {}}, + {"select ", {"*", "avg", "min", "max", "facet", "sum", "distinct", "rank", "count", "count_cached"}}, + {"select *,", {}}, + {"select *, ", {"*", "avg", "min", "max", "facet", "sum", "distinct", "rank", "count", "count_cached"}}, + {"select *, f", {"facet", "Field"}}, + {"select f", {"facet", "Field"}}, + {"select * ", {"from"}}, + {"select * f", {"from"}}, + {"select * from ", + {"test_namespace", "second_ns", "#memstats", "#activitystats", "#config", "#queriesperfstats", "#namespaces", "#perfstats", + "#clientsstats"}}, + {"select * from te", {"test_namespace"}}, + {"select * from test_namespace ", + {"where", ";", "equal_position", "inner", "join", "left", "limit", "merge", "offset", "or", "order"}}, + {"select * from test_namespace w", {"where"}}, + {"select * from test_namespace where ", + {"second_field", "ST_DWithin", "Countries", "nested", "Nest_fake", "inner", "join", "left", "not", "equal_position"}}, + {"select * from test_namespace where s", {"second_field", "ST_DWithin"}}, + {"select * from second_ns where i", {"id", "inner"}}, + {"select * from test_namespace where (", {}}, + {"select * from test_namespace where (s", {"second_field", "ST_DWithin", "select"}}, + {"select * from test_namespace where (select m", {"max", "min"}}, + {"select * from test_namespace where (select i", {"id", "items_count", "is_subscribed", "ip"}}, + {"select * from test_namespace where (select second_field f", {"from"}}, + {"select * from test_namespace where (select id from s", {"second_ns"}}, + {"select * from test_namespace where (select Field from second_ns where ", {"id", "ST_DWithin", "Field", "not", "equal_position"}}, + {"select * from test_namespace where C", {"Countries"}}, + {"select * from test_namespace where Countries == (", {}}, + {"select * from test_namespace where Countries == (s", {"select"}}, + {"select * from test_namespace where Countries == (select m", {"max", "min"}}, + {"select * from test_namespace where Countries == (select i", {"id", "ip", "is_subscribed", "items_count"}}, + {"select * from test_namespace where Countries == (select second_field f", {"from"}}, + {"select * from test_namespace where Countries == (select second_field from ", + {"test_namespace", "second_ns", "#memstats", "#activitystats", "#config", "#queriesperfstats", "#namespaces", "#perfstats", + "#clientsstats"}}, + {"select * from test_namespace where Countries == (select second_field from s", {"second_ns"}}, + {"select * from test_namespace where i", {"inner"}}, + {"select * from test_namespace where inner j", {"join"}}, + {"select * from test_namespace where inner join s", {"second_ns"}}, + {"select * from test_namespace where inner join (s", {"select"}}, + {"select * from test_namespace where inner join (select m", {"min", "max"}}, + {"select * from test_namespace where inner join (select i", {"id", "ip", "is_subscribed", "items_count"}}, + {"select * from test_namespace where inner join (select second_field f", {"from"}}, + {"select * from test_namespace where inner join (select second_field from s", {"second_ns"}}, + {"SELECT * FROM ns WHERE id = ( ", {"null", "empty", "not", "select"}}, + }; + + for (const auto& [sql, expected, position] : testData) { + if (sql.empty() || sql.back() == ' ') { + validateSuggestions(sql, expected, position); + } else { + for (const auto& td : testData) { + if (reindexer::checkIfStartsWith(sql, td.sql)) { + validateSuggestions(td.sql, expected, position); + } + } + } + } } TEST_F(ReindexerApi, LoggerWriteInterruptTest) { @@ -1821,7 +1918,7 @@ TEST_F(ReindexerApi, UpdateDoublesItemByPKIndex) { { reindexer::QueryResults qr; - const std::string sql = "UPDATE test_namespace SET v1=125, id = 3 WHERE id = 2"; + constexpr std::string_view sql = "UPDATE test_namespace SET v1=125, id = 3 WHERE id = 2"; Query query = Query::FromSQL(sql); err = rt.reindexer->Update(query, qr); ASSERT_EQ(err.code(), errLogic); diff --git a/cpp_src/gtests/tests/CMakeLists.txt b/cpp_src/gtests/tests/CMakeLists.txt index 05d34b1d9..fb82d5f18 100644 --- a/cpp_src/gtests/tests/CMakeLists.txt +++ b/cpp_src/gtests/tests/CMakeLists.txt @@ -12,29 +12,33 @@ include_directories(fixtures mocks) include_directories(${GTEST_INCLUDE_DIRS}) include_directories(${REINDEXER_SOURCE_PATH}) -find_package(Protobuf) set(GENERATED_PROTO_DIR "${PROJECT_BINARY_DIR}") -if (PROTOBUF_FOUND) - INCLUDE_DIRECTORIES(SYSTEM ${PROTOBUF_INCLUDE_DIR}) - list(APPEND REINDEXER_LIBRARIES ${PROTOBUF_LIBRARIES}) - file(GLOB PROTOBUF_SOURCE_FILES ${REINDEXER_SOURCE_PATH}/gtests/tests/proto/*.proto) - - SET(PROTOBUF_PROTOC_EXECUTABLE protoc) - - foreach (CUR_PROTO_FILE ${PROTOBUF_SOURCE_FILES}) - get_filename_component(PROTOBUF_INPUT_DIRECTORY "${CUR_PROTO_FILE}" DIRECTORY) - get_filename_component(PROTOBUF_INPUT_DIRECTORY "${PROTOBUF_INPUT_DIRECTORY}" ABSOLUTE) - get_filename_component(PROTO_FILENAME "${CUR_PROTO_FILE}" NAME) - get_filename_component(CUR_PROTO_FILE "${CUR_PROTO_FILE}" ABSOLUTE) - string(REGEX REPLACE ".proto" ".pb" CUR_FILE_OUT "${PROTO_FILENAME}") - execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --proto_path=${PROTOBUF_INPUT_DIRECTORY} --cpp_out=${GENERATED_PROTO_DIR} ${CUR_PROTO_FILE}) - set_source_files_properties(${GENERATED_PROTO_DIR}/${CUR_FILE_OUT}.h ${GENERATED_PROTO_DIR}/${CUR_FILE_OUT}.cc PROPERTIES COMPILE_FLAGS "-Wno-all -Wno-extra -Wno-error -Wno-old-style-cast") - endforeach (CUR_PROTO_FILE) - add_definitions(-DWITH_PROTOBUF=1) -else () - message("Protobuf not found") +if (ENABLE_GRPC) + find_package(Protobuf) + if (PROTOBUF_FOUND) + INCLUDE_DIRECTORIES(SYSTEM ${PROTOBUF_INCLUDE_DIR}) + list(APPEND REINDEXER_LIBRARIES ${PROTOBUF_LIBRARIES}) + file(GLOB PROTOBUF_SOURCE_FILES ${REINDEXER_SOURCE_PATH}/gtests/tests/proto/*.proto) + + SET(PROTOBUF_PROTOC_EXECUTABLE protoc) + + foreach (CUR_PROTO_FILE ${PROTOBUF_SOURCE_FILES}) + get_filename_component(PROTOBUF_INPUT_DIRECTORY "${CUR_PROTO_FILE}" DIRECTORY) + get_filename_component(PROTOBUF_INPUT_DIRECTORY "${PROTOBUF_INPUT_DIRECTORY}" ABSOLUTE) + get_filename_component(PROTO_FILENAME "${CUR_PROTO_FILE}" NAME) + get_filename_component(CUR_PROTO_FILE "${CUR_PROTO_FILE}" ABSOLUTE) + string(REGEX REPLACE ".proto" ".pb" CUR_FILE_OUT "${PROTO_FILENAME}") + execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --proto_path=${PROTOBUF_INPUT_DIRECTORY} --cpp_out=${GENERATED_PROTO_DIR} ${CUR_PROTO_FILE}) + set_source_files_properties(${GENERATED_PROTO_DIR}/${CUR_FILE_OUT}.h ${GENERATED_PROTO_DIR}/${CUR_FILE_OUT}.cc PROPERTIES COMPILE_FLAGS "-Wno-all -Wno-extra -Wno-error -Wno-old-style-cast") + endforeach (CUR_PROTO_FILE) + add_definitions(-DWITH_PROTOBUF=1) + include_directories(${REINDEXER_BINARY_PATH}/server/grpc ${GENERATED_PROTO_DIR}) + else () + message("Protobuf not found") + endif () + endif () -include_directories(${REINDEXER_BINARY_PATH}/server/grpc ${GENERATED_PROTO_DIR}) + file(GLOB_RECURSE SRCS *.cc *.h ${GENERATED_PROTO_DIR}/*.cc) file(GLOB_RECURSE FUZZING_SRCS */fuzzing/* *fuzzing/*) diff --git a/cpp_src/gtests/tests/fixtures/ft_api.cc b/cpp_src/gtests/tests/fixtures/ft_api.cc index 08e93be72..764c90118 100644 --- a/cpp_src/gtests/tests/fixtures/ft_api.cc +++ b/cpp_src/gtests/tests/fixtures/ft_api.cc @@ -157,12 +157,12 @@ void FTApi::AddInBothFields(std::string_view ns, std::string_view w1, std::strin } reindexer::QueryResults FTApi::SimpleSelect(std::string word, bool withHighlight) { - auto qr{reindexer::Query("nm1").Where("ft3", CondEq, std::move(word))}; + auto q{reindexer::Query("nm1").Where("ft3", CondEq, std::move(word)).WithRank()}; reindexer::QueryResults res; if (withHighlight) { - qr.AddFunction("ft3 = highlight(!,!)"); + q.AddFunction("ft3 = highlight(!,!)"); } - auto err = rt.reindexer->Select(qr, res); + auto err = rt.reindexer->Select(q, res); EXPECT_TRUE(err.ok()) << err.what(); return res; diff --git a/cpp_src/gtests/tests/fixtures/ft_api.h b/cpp_src/gtests/tests/fixtures/ft_api.h index 542630dec..4664eaea2 100644 --- a/cpp_src/gtests/tests/fixtures/ft_api.h +++ b/cpp_src/gtests/tests/fixtures/ft_api.h @@ -111,7 +111,7 @@ class FTApi : public ::testing::TestWithParam fields; - reindexer::fast_hash_set stopWords; + reindexer::fast_hash_set stopWords; std::string extraWordSymbols = "-/+"; }; int counter_ = 0; diff --git a/cpp_src/gtests/tests/fixtures/join_selects_api.h b/cpp_src/gtests/tests/fixtures/join_selects_api.h index 084e2b307..e2d881249 100644 --- a/cpp_src/gtests/tests/fixtures/join_selects_api.h +++ b/cpp_src/gtests/tests/fixtures/join_selects_api.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -112,10 +113,24 @@ class JoinSelectsApi : public ReindexerApi { { std::unique_lock lck(authorsMutex); - authorsIds.push_back(DostoevskyAuthorId); + if (std::find_if(authorsIds.begin(), authorsIds.end(), [this](int id) { return DostoevskyAuthorId == id; }) == + authorsIds.end()) { + authorsIds.push_back(DostoevskyAuthorId); + } } } + void RemoveLastAuthors(int32_t count) { + VariantArray idsToRemove; + idsToRemove.reserve(std::min(size_t(count), authorsIds.size())); + auto rend = authorsIds.rbegin() + std::min(size_t(count), authorsIds.size()); + for (auto ait = authorsIds.rbegin(); ait != rend; ++ait) { + idsToRemove.emplace_back(*ait); + } + const auto removed = Delete(Query(authors_namespace).Where(authorid, CondSet, idsToRemove)); + ASSERT_EQ(removed, count); + } + void FillBooksNamespace(int32_t since, int32_t count) { int authorIdIdx = 0; { @@ -135,7 +150,7 @@ class JoinSelectsApi : public ReindexerApi { item[authorid_fk] = authorsIds[authorIdIdx]; } - item[genreId_fk] = genresIds[rand() % genresIds.size()]; + item[genreId_fk] = genres[rand() % genres.size()].id; Upsert(books_namespace, item); const auto err = Commit(books_namespace); ASSERT_TRUE(err.ok()) << err.what(); @@ -176,9 +191,17 @@ class JoinSelectsApi : public ReindexerApi { item[genreid] = id; item[genrename] = name; Upsert(genres_namespace, item); + auto found = std::find_if(genres.begin(), genres.end(), [id](const Genre& g) { return g.id == id; }); + ASSERT_EQ(found, genres.end()); + genres.push_back(Genre{id, name}); + } + void RemoveGenre(int id) { + Item item = NewItem(genres_namespace); + item[genreid] = id; + Delete(genres_namespace, item); const auto err = Commit(genres_namespace); ASSERT_TRUE(err.ok()) << err.what(); - genresIds.push_back(id); + genres.erase(std::remove_if(genres.begin(), genres.end(), [id](const Genre& g) { return g.id == id; }), genres.end()); } void FillQueryResultFromItem(Item& item, QueryResultRow& resultRow) { @@ -398,12 +421,21 @@ class JoinSelectsApi : public ReindexerApi { EXPECT_EQ(err.what(), expectedText) << sql; } { - Query q = Query::FromSQL(sql); + const Query q = Query::FromSQL(sql); auto err = rt.reindexer->Select(q, qr); EXPECT_EQ(err.code(), expectedCode) << sql; EXPECT_EQ(err.what(), expectedText) << sql; } } + void ValidateQueryThrow(std::string_view sql, ErrorCode expectedCode, std::string_view expectedRegex) { + QueryResults qr; + { + auto err = rt.reindexer->Select(sql, qr); + EXPECT_EQ(err.code(), expectedCode) << sql; + EXPECT_THAT(err.what(), testing::ContainsRegex(expectedRegex)) << sql; + } + EXPECT_THROW(const Query q = Query::FromSQL(sql), Error) << sql; + } static std::string addQuotes(const std::string& str) { std::string output; @@ -413,6 +445,14 @@ class JoinSelectsApi : public ReindexerApi { return output; } + void SetQueriesCacheHitsCount(unsigned hitsCount) { + auto q = reindexer::Query("#config") + .Set("namespaces.cache.query_count_hit_to_cache", int64_t(hitsCount)) + .Where("type", CondEq, "namespaces"); + auto updated = Update(q); + ASSERT_EQ(updated, 1); + } + const char* id = "id"; const char* authorid = "authorid"; const char* authorid_fk = "authorid_fk"; @@ -440,8 +480,13 @@ class JoinSelectsApi : public ReindexerApi { const std::string location_namespace = "location_namespace"; const std::string config_namespace = "#config"; + struct Genre { + int id; + std::string name; + }; + std::vector authorsIds; - std::vector genresIds; + std::vector genres; // clang-format off const std::vector locations = { diff --git a/cpp_src/gtests/tests/fixtures/queries_verifier.h b/cpp_src/gtests/tests/fixtures/queries_verifier.h index 6929958fc..033a149ed 100644 --- a/cpp_src/gtests/tests/fixtures/queries_verifier.h +++ b/cpp_src/gtests/tests/fixtures/queries_verifier.h @@ -1,7 +1,16 @@ #pragma once #include + +#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#include +#pragma GCC diagnostic pop +#else // REINDEX_WITH_ASAN #include +#endif // REINDEX_WITH_ASAN + #include #include "core/nsselecter/joinedselectormock.h" #include "core/nsselecter/sortexpression.h" diff --git a/cpp_src/gtests/tests/fixtures/reindexer_api.h b/cpp_src/gtests/tests/fixtures/reindexer_api.h index 194c32c72..f7cdc01fd 100644 --- a/cpp_src/gtests/tests/fixtures/reindexer_api.h +++ b/cpp_src/gtests/tests/fixtures/reindexer_api.h @@ -47,6 +47,9 @@ class ReindexerApi : public virtual ::testing::Test { [[nodiscard]] Error Commit(std::string_view ns) { return rt.Commit(ns); } void Upsert(std::string_view ns, Item &item) { rt.Upsert(ns, item); } + size_t Update(const Query &q) { return rt.Update(q); } + void Delete(std::string_view ns, Item &item) { rt.Delete(ns, item); } + size_t Delete(const Query &q) { return rt.Delete(q); } void PrintQueryResults(const std::string &ns, const QueryResults &res) { rt.PrintQueryResults(ns, res); } diff --git a/cpp_src/gtests/tests/fixtures/reindexertestapi.h b/cpp_src/gtests/tests/fixtures/reindexertestapi.h index c36c1d0df..8214d475c 100644 --- a/cpp_src/gtests/tests/fixtures/reindexertestapi.h +++ b/cpp_src/gtests/tests/fixtures/reindexertestapi.h @@ -6,6 +6,7 @@ #include #include "core/indexdef.h" #include "core/indexopts.h" +#include "core/query/query.h" #include "gtests/tests/gtest_cout.h" #include "tools/errors.h" #include "tools/stringstools.h" @@ -74,6 +75,23 @@ class ReindexerTestApi { ASSERT_TRUE(err.ok()) << err.what(); ASSERT_TRUE(item.Status().ok()) << item.Status().what(); } + size_t Update(const reindexer::Query &q) { + QueryResultsType qr; + auto err = reindexer->Update(q, qr); + EXPECT_TRUE(err.ok()) << err.what(); + return qr.Count(); + } + void Delete(std::string_view ns, ItemType &item) { + assertrx(!!item); + auto err = reindexer->Delete(ns, item); + ASSERT_TRUE(err.ok()) << err.what(); + } + size_t Delete(const reindexer::Query &q) { + QueryResultsType qr; + auto err = reindexer->Delete(q, qr); + EXPECT_TRUE(err.ok()) << err.what(); + return qr.Count(); + } reindexer::Error DumpIndex(std::ostream &os, std::string_view ns, std::string_view index) { return reindexer->DumpIndex(os, ns, index); } diff --git a/cpp_src/gtests/tests/unit/ft/ft_generic.cc b/cpp_src/gtests/tests/unit/ft/ft_generic.cc index 5b4c03b6e..63d9e9c86 100644 --- a/cpp_src/gtests/tests/unit/ft/ft_generic.cc +++ b/cpp_src/gtests/tests/unit/ft/ft_generic.cc @@ -1018,6 +1018,66 @@ TEST_P(FTGenericApi, MergeLimitConstraints) { ASSERT_TRUE(err.ok()) << err.what(); } +TEST_P(FTGenericApi, ConfigBm25Coefficients) { + reindexer::FtFastConfig cfgDef = GetDefaultConfig(); + cfgDef.maxAreasInDoc = 100; + reindexer::FtFastConfig cfg = cfgDef; + cfg.bm25Config.bm25b = 0.0; + cfg.bm25Config.bm25Type = reindexer::FtFastConfig::Bm25Config::Bm25Type::rx; + + Init(cfg); + Add("nm1"sv, "слово пусто слова пусто словами"sv, ""sv); + Add("nm1"sv, "слово пусто слово"sv, ""sv); + Add("nm1"sv, "otherword targetword"sv, ""sv); + Add("nm1"sv, "otherword targetword otherword targetword"sv, ""sv); + Add("nm1"sv, "otherword targetword otherword targetword targetword"sv, ""sv); + Add("nm1"sv, + "otherword targetword otherword otherword otherword targetword otherword targetword otherword targetword otherword otherword otherword otherword otherword otherword otherword otherword targetword"sv, + ""sv); + + CheckResults("targetword", + {{"otherword !targetword! otherword otherword otherword !targetword! otherword !targetword! otherword !targetword! otherword otherword otherword otherword otherword otherword otherword otherword !targetword!", ""}, + {"otherword !targetword! otherword !targetword targetword!", ""}, + {"otherword !targetword! otherword !targetword!", ""}, + {"otherword !targetword!", ""}}, + true); + + cfg = cfgDef; + cfg.bm25Config.bm25b = 0.75; + reindexer::Error err = SetFTConfig(cfg, "nm1", "ft3", {"ft1", "ft2"}); + ASSERT_TRUE(err.ok()) << err.what(); + + CheckResults("targetword", + { + {"otherword !targetword! otherword !targetword targetword!", ""}, + {"otherword !targetword! otherword !targetword!", ""}, + {"otherword !targetword! otherword otherword otherword !targetword! otherword !targetword! otherword !targetword! otherword otherword otherword otherword otherword otherword otherword otherword !targetword!", ""}, + {"otherword !targetword!", ""} + }, + true); + cfg = cfgDef; + cfg.bm25Config.bm25Type = reindexer::FtFastConfig::Bm25Config::Bm25Type::wordCount; + cfg.fieldsCfg[0].positionWeight = 0.0; + cfg.fullMatchBoost=1.0; + + err = SetFTConfig(cfg, "nm1", "ft3", {"ft1", "ft2"}); + ASSERT_TRUE(err.ok()) << err.what(); + + CheckResults("targetword", + { + {"otherword !targetword! otherword otherword otherword !targetword! otherword !targetword! otherword !targetword! otherword otherword otherword otherword otherword otherword otherword otherword !targetword!", ""}, + {"otherword !targetword! otherword !targetword targetword!", ""}, + {"otherword !targetword! otherword !targetword!", ""}, + {"otherword !targetword!", ""}, + + }, + true); + + CheckResults("словах",{{"!слово! пусто !слово!",""},{"!слово! пусто !слова! пусто !словами!",""}},true); + +} + + TEST_P(FTGenericApi, ConfigFtProc) { reindexer::FtFastConfig cfgDef = GetDefaultConfig(); cfgDef.synonyms = {{{"тестов"}, {"задача"}}}; @@ -1311,6 +1371,87 @@ TEST_P(FTGenericApi, ExplainWithFtPreselect) { } } +TEST_P(FTGenericApi, StopWordsWithMorphemes) { + reindexer::FtFastConfig cfg = GetDefaultConfig(); + + Init(cfg); + Add("Шахматы из слоновой кости"sv); + Add("Мат в эфире "sv); + Add("Известняк"sv); + Add("Известия"sv); + Add("Изверг"sv); + + Add("Подобрал подосиновики, положил в лубочек"sv); + Add("Подопытный кролик"sv); + Add("Шла Саша по шоссе"sv); + + Add("Зайка серенький под елочкой скакал"sv); + Add("За Альянс! (с)"sv); + Add("Заноза в пальце"sv); + + Add("На западном фронте без перемен"sv); + Add("Наливные яблочки"sv); + Add("Нарком СССР"sv); + + CheckResults("*из*", {{"!Известняк!", ""}, {"!Известия!", ""}, {"!Изверг!", ""}}, false); + CheckResults("из", {}, false); + + CheckResults("*под*", {{"!Подобрал подосиновики!, положил в лубочек", ""}, {"!Подопытный! кролик", ""}}, false); + CheckResults("под", {}, false); + + CheckResults( + "*за*", {{"!Зайка! серенький под елочкой скакал", ""}, {"!Заноза! в пальце", ""}, {"На !западном! фронте без перемен", ""}}, false); + CheckResults("за", {}, false); + + CheckResults("*на*", + { + {"!Наливные! яблочки", ""}, + {"!Нарком! СССР", ""}, + }, + false); + CheckResults("на", {}, false); + + cfg.stopWords.clear(); + + cfg.stopWords.insert({"на"}); + cfg.stopWords.insert({"мат", reindexer::StopWord::Type::Morpheme}); + + SetFTConfig(cfg); + + CheckResults("*из*", {{"Шахматы !из! слоновой кости", ""}, {"!Известняк!", ""}, {"!Известия!", ""}, {"!Изверг!", ""}}, false); + CheckResults("из", {{"Шахматы !из! слоновой кости", ""}}, false); + + CheckResults( + "*под*", + {{"!Подобрал подосиновики!, положил в лубочек", ""}, {"!Подопытный! кролик", ""}, {"Зайка серенький !под! елочкой скакал", ""}}, + false); + CheckResults("под", {{"Зайка серенький !под! елочкой скакал", ""}}, false); + + CheckResults("*по*", + {{"Шла Саша !по! шоссе", ""}, + {"!Подобрал подосиновики, положил! в лубочек", ""}, + {"!Подопытный! кролик", ""}, + {"Зайка серенький !под! елочкой скакал", ""}}, + false); + CheckResults("по~", {{"Шла Саша !по! шоссе", ""}, {"Зайка серенький !под! елочкой скакал", ""}}, false); + CheckResults("по", {{"Шла Саша !по! шоссе", ""}}, false); + + CheckResults("*мат*", {{"!Шахматы! из слоновой кости", ""}}, false); + CheckResults("мат", {}, false); + + CheckResults("*за*", + {{"!Зайка! серенький под елочкой скакал", ""}, + {"!Заноза! в пальце", ""}, + {"!За! Альянс! (с)", ""}, + {"На !западном! фронте без перемен", ""}}, + false); + CheckResults("за", {{"!За! Альянс! (с)", ""}}, false); + + CheckResults("*на*", {}, false); + CheckResults("на~", {}, false); + CheckResults("на", {}, false); +} + INSTANTIATE_TEST_SUITE_P(, FTGenericApi, ::testing::Values(reindexer::FtFastConfig::Optimization::Memory, reindexer::FtFastConfig::Optimization::CPU), [](const auto& info) { diff --git a/cpp_src/gtests/tests/unit/join_test.cc b/cpp_src/gtests/tests/unit/join_test.cc index 6fb37ea94..b62716ffc 100644 --- a/cpp_src/gtests/tests/unit/join_test.cc +++ b/cpp_src/gtests/tests/unit/join_test.cc @@ -608,7 +608,7 @@ TEST_F(JoinSelectsApi, TestNestedJoinsError) { for (auto& firstJoin : joinTypes) { for (auto& secondJoin : joinTypes) { auto sql = fmt::sprintf(sqlPattern, firstJoin, secondJoin); - ValidateQueryError(sql, errParams, "JOINs nested into the other JOINs are not supported"); + ValidateQueryThrow(sql, errParseSQL, "Expected ')', but found .*, line: 1 column: .*"); } } } @@ -620,7 +620,7 @@ TEST_F(JoinSelectsApi, TestNestedMergesInJoinsError) { auto joinTypes = {"inner join", "join", "left join"}; for (auto& join : joinTypes) { auto sql = fmt::sprintf(sqlPattern, join); - ValidateQueryError(sql, errParams, "MERGEs nested into the JOINs are not supported"); + ValidateQueryThrow(sql, errParseSQL, "Expected ')', but found merge, line: 1 column: .*"); } } @@ -631,6 +631,112 @@ TEST_F(JoinSelectsApi, TestNestedMergesInMergesError) { ValidateQueryError(sql, errParams, "MERGEs nested into the MERGEs are not supported"); } +TEST_F(JoinSelectsApi, CountCachedWithDifferentJoinConditions) { + // Test checks if cached total values is changing after inner join's condition change + + const std::vector kBaseQueries = { + Query(books_namespace).InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace)).Limit(10), + Query(books_namespace).InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondLe, 100)).Limit(10), + Query(books_namespace).InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondGe, 200)).Limit(10), + Query(books_namespace).InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondLe, 400)).Limit(10), + Query(books_namespace).InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondGe, 400)).Limit(10)}; + + SetQueriesCacheHitsCount(1); + for (auto& bq : kBaseQueries) { + const Query cachedTotalNoCondQ = Query(bq).CachedTotal(); + const Query totalCountNoCondQ = Query(bq).ReqTotal(); + QueryResults qrRegular; + auto err = rt.reindexer->Select(totalCountNoCondQ, qrRegular); + ASSERT_TRUE(err.ok()) << err.what() << "; " << totalCountNoCondQ.GetSQL(); + // Run all the queries with CountCached twice to check main and cached values + for (int i = 0; i < 2; ++i) { + QueryResults qrCached; + err = rt.reindexer->Select(cachedTotalNoCondQ, qrCached); + ASSERT_TRUE(err.ok()) << err.what() << "; i = " << i << "; " << cachedTotalNoCondQ.GetSQL(); + EXPECT_EQ(qrCached.TotalCount(), qrRegular.TotalCount()) << " i = " << i << "; " << bq.GetSQL(); + } + } +} + +TEST_F(JoinSelectsApi, CountCachedWithJoinNsUpdates) { + const Genre kLastGenre = *genres.rbegin(); + const std::vector kBaseQueries = { + Query(books_namespace) + .InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondGe, 100)) + .OrInnerJoin(genreId_fk, genreid, CondEq, + Query(genres_namespace) + .Where(genrename, CondSet, + {Variant{"non fiction"}, Variant{"poetry"}, Variant{"documentary"}, Variant{kLastGenre.name}})) + .Limit(10), + Query(books_namespace) + .InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondGe, 100)) + .InnerJoin(genreId_fk, genreid, CondEq, + Query(genres_namespace) + .Where(genrename, CondSet, + {Variant{"non fiction"}, Variant{"poetry"}, Variant{"documentary"}, Variant{kLastGenre.name}})) + .Limit(10), + Query(books_namespace) + .InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondGe, 100)) + .OpenBracket() + .InnerJoin(genreId_fk, genreid, CondEq, + Query(genres_namespace).Where(genrename, CondSet, {Variant{"non fiction"}, Variant{kLastGenre.name}})) + .OrInnerJoin( + genreId_fk, genreid, CondEq, + Query(genres_namespace).Where(genrename, CondSet, {Variant{"poetry"}, Variant{"documentary"}, Variant{kLastGenre.name}})) + .CloseBracket() + .Limit(10), + Query(books_namespace) + .InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondGe, 100)) + .OpenBracket() + .InnerJoin(genreId_fk, genreid, CondEq, + Query(genres_namespace).Where(genrename, CondSet, {Variant{"non fiction"}, Variant{kLastGenre.name}})) + .InnerJoin(genreId_fk, genreid, CondEq, Query(genres_namespace)) + .CloseBracket() + .Limit(10), + Query(books_namespace) + .InnerJoin(authorid_fk, authorid, CondEq, Query(authors_namespace).Where(authorid, CondGe, 100)) + .OpenBracket() + .InnerJoin(genreId_fk, genreid, CondEq, + Query(genres_namespace).Where(genrename, CondSet, {Variant{"non fiction"}, Variant{kLastGenre.name}})) + .OrInnerJoin(genreId_fk, genreid, CondEq, Query(genres_namespace)) + .CloseBracket() + .Limit(10)}; + + SetQueriesCacheHitsCount(1); + for (auto& bq : kBaseQueries) { + const Query cachedTotalNoCondQ = Query(bq).CachedTotal(); + const Query totalCountNoCondQ = Query(bq).ReqTotal(); + auto checkQuery = [&](std::string_view step) { + // With Initial data + QueryResults qrRegular; + auto err = rt.reindexer->Select(totalCountNoCondQ, qrRegular); + ASSERT_TRUE(err.ok()) << err.what() << "; step: " << step << "; " << totalCountNoCondQ.GetSQL(); + // Run all the queries with CountCached twice to check main and cached values + for (int i = 0; i < 2; ++i) { + QueryResults qrCached; + err = rt.reindexer->Select(cachedTotalNoCondQ, qrCached); + ASSERT_TRUE(err.ok()) << err.what() << "; step: " << step << "; i = " << i << "; " << cachedTotalNoCondQ.GetSQL(); + EXPECT_EQ(qrCached.TotalCount(), qrRegular.TotalCount()) << "step: " << step << "; i = " << i << "; " << bq.GetSQL(); + } + }; + + // Check query and create cache with initial data + checkQuery("initial data"); + + // Update data on the first joined namespace + RemoveLastAuthors(250); + checkQuery("first ns update (remove)"); + FillAuthorsNamespace(250); + checkQuery("first ns update (add)"); + + // Update data on the second joined namespace + RemoveGenre(kLastGenre.id); + checkQuery("second ns update (remove)"); + AddGenre(kLastGenre.id, kLastGenre.name); + checkQuery("second ns update (insert)"); + } +} + TEST_F(JoinOnConditionsApi, TestGeneralConditions) { const std::string sqlTemplate = R"(select * from books_namespace inner join books_namespace on (books_namespace.authorid_fk = books_namespace.authorid_fk and books_namespace.pages %s books_namespace.pages);)"; diff --git a/cpp_src/gtests/tests/unit/queries_test.cc b/cpp_src/gtests/tests/unit/queries_test.cc index 75118509d..a09710bce 100644 --- a/cpp_src/gtests/tests/unit/queries_test.cc +++ b/cpp_src/gtests/tests/unit/queries_test.cc @@ -222,8 +222,8 @@ TEST_F(QueriesApi, SqlParseGenerate) { .Or() .Where("age", CondSet, {"1", "2", "3", "4"}) .Limit(10000000)}, - {"SELECT * FROM test_namespace WHERE INNER JOIN join_ns ON test_namespace.id = join_ns.id ORDER BY 'year + join_ns.year * (5 - " - "rand())'", + {"SELECT * FROM test_namespace WHERE INNER JOIN join_ns ON test_namespace.id = join_ns.id " + "ORDER BY 'year + join_ns.year * (5 - rand())'", Query{"test_namespace"}.InnerJoin("id", "id", CondEq, Query{"join_ns"}).Sort("year + join_ns.year * (5 - rand())", false)}, {"SELECT * FROM "s + geomNs + " WHERE ST_DWithin(" + kFieldNamePointNonIndex + ", ST_GeomFromText('POINT(1.25 -7.25)'), 0.5)", Query{geomNs}.DWithin(kFieldNamePointNonIndex, reindexer::Point{1.25, -7.25}, 0.5)}, @@ -242,6 +242,74 @@ TEST_F(QueriesApi, SqlParseGenerate) { Query{"main_ns"}.Where("id", CondGt, Query{"second_ns"}.Aggregate(AggAvg, {"id"}).Where("id", CondLt, 10))}, {"SELECT * FROM main_ns WHERE id > (SELECT COUNT(*) FROM second_ns WHERE id < 10 LIMIT 0)", Query{"main_ns"}.Where("id", CondGt, Query{"second_ns"}.Where("id", CondLt, 10).ReqTotal())}, + {"SELECT * FROM main_ns WHERE (SELECT * FROM second_ns WHERE id < 10 LIMIT 0) IS NOT NULL AND value IN (5,4,1)", + Query{"main_ns"} + .Where(Query{"second_ns"}.Where("id", CondLt, 10), CondAny, {}) + .Where("value", CondSet, {Variant{5}, Variant{4}, Variant{1}})}, + {"SELECT * FROM main_ns WHERE ((SELECT * FROM second_ns WHERE id < 10 LIMIT 0) IS NOT NULL) AND value IN (5,4,1)", + Query{"main_ns"} + .OpenBracket() + .Where(Query{"second_ns"}.Where("id", CondLt, 10), CondAny, {}) + .CloseBracket() + .Where("value", CondSet, {Variant{5}, Variant{4}, Variant{1}})}, + {"SELECT * FROM main_ns WHERE id IN (SELECT id FROM second_ns WHERE id < 999) AND value >= 1000", + Query{"main_ns"}.Where("id", CondSet, Query{"second_ns"}.Select({"id"}).Where("id", CondLt, 999)).Where("value", CondGe, 1000)}, + {"SELECT * FROM main_ns WHERE (id IN (SELECT id FROM second_ns WHERE id < 999)) AND value >= 1000", + Query{"main_ns"} + .OpenBracket() + .Where("id", CondSet, Query{"second_ns"}.Select({"id"}).Where("id", CondLt, 999)) + .CloseBracket() + .Where("value", CondGe, 1000)}, + {"SELECT * FROM main_ns " + "WHERE (SELECT id FROM second_ns WHERE id < 999 AND xxx IS NULL ORDER BY 'value' DESC LIMIT 10) = 0 " + "ORDER BY 'tree'", + Query{"main_ns"} + .Where(Query{"second_ns"} + .Select({"id"}) + .Where("id", CondLt, 999) + .Where("xxx", CondEmpty, VariantArray{}) + .Limit(10) + .Sort("value", true), + CondEq, 0) + .Sort("tree", false)}, + {"SELECT * FROM main_ns " + "WHERE ((SELECT id FROM second_ns WHERE id < 999 AND xxx IS NULL ORDER BY 'value' DESC LIMIT 10) = 0) " + "ORDER BY 'tree'", + Query{"main_ns"} + .OpenBracket() + .Where(Query{"second_ns"} + .Select({"id"}) + .Where("id", CondLt, 999) + .Where("xxx", CondEmpty, VariantArray{}) + .Limit(10) + .Sort("value", true), + CondEq, 0) + .CloseBracket() + .Sort("tree", false)}, + {"SELECT * FROM main_ns " + "WHERE INNER JOIN (SELECT * FROM second_ns WHERE NOT val = 10) ON main_ns.id = second_ns.uid " + "AND id IN (SELECT id FROM third_ns WHERE id < 999) " + "AND INNER JOIN (SELECT * FROM fourth_ns WHERE val IS NOT NULL OFFSET 2 LIMIT 1) ON main_ns.uid = fourth_ns.id", + Query{"main_ns"} + .InnerJoin("id", "uid", CondEq, Query("second_ns").Not().Where("val", CondEq, 10)) + .Where("id", CondSet, Query{"third_ns"}.Select({"id"}).Where("id", CondLt, 999)) + .InnerJoin("uid", "id", CondEq, Query("fourth_ns").Where("val", CondAny, VariantArray{}).Limit(1).Offset(2))}, + {"SELECT * FROM main_ns " + "WHERE INNER JOIN (SELECT * FROM second_ns WHERE NOT val = 10 OFFSET 2 LIMIT 1) ON main_ns.id = second_ns.uid " + "AND id IN (SELECT id FROM third_ns WHERE id < 999) " + "LEFT JOIN (SELECT * FROM fourth_ns WHERE val IS NOT NULL) ON main_ns.uid = fourth_ns.id", + Query{"main_ns"} + .InnerJoin("id", "uid", CondEq, Query("second_ns").Not().Where("val", CondEq, 10).Limit(1).Offset(2)) + .Where("id", CondSet, Query{"third_ns"}.Select({"id"}).Where("id", CondLt, 999)) + .LeftJoin("uid", "id", CondEq, Query("fourth_ns").Where("val", CondAny, VariantArray{}))}, + {"SELECT * FROM main_ns " + "WHERE id IN (SELECT id FROM third_ns WHERE id < 999 OFFSET 7 LIMIT 5) " + "LEFT JOIN (SELECT * FROM second_ns WHERE NOT val = 10 OFFSET 2 LIMIT 1) ON main_ns.id = second_ns.uid " + "LEFT JOIN (SELECT * FROM fourth_ns WHERE val IS NOT NULL) ON main_ns.uid = fourth_ns.id", + Query{"main_ns"} + .LeftJoin("id", "uid", CondEq, Query("second_ns").Not().Where("val", CondEq, 10).Limit(1).Offset(2)) + .Where("id", CondSet, Query{"third_ns"}.Select({"id"}).Where("id", CondLt, 999).Limit(5).Offset(7)) + .LeftJoin("uid", "id", CondEq, Query("fourth_ns").Where("val", CondAny, VariantArray{}))}, }; for (const auto& [sql, expected, direction] : cases) { diff --git a/cpp_src/gtests/tests/unit/replication_master_master_test.cc b/cpp_src/gtests/tests/unit/replication_master_master_test.cc index d3d5c81ef..b868c3a9c 100644 --- a/cpp_src/gtests/tests/unit/replication_master_master_test.cc +++ b/cpp_src/gtests/tests/unit/replication_master_master_test.cc @@ -120,16 +120,33 @@ class TestNamespace1 { Query qr = Query(nsName_).Sort("id", false); BaseApi::QueryResultsType res(node.Get()->api.reindexer.get()); auto err = node.Get()->api.reindexer->Select(qr, res); - EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_TRUE(err.ok()) << err.what(); for (auto it : res) { WrSerializer ser; - auto err = it.GetJSON(ser, false); - EXPECT_TRUE(err.ok()) << err.what(); + err = it.GetJSON(ser, false); + ASSERT_TRUE(err.ok()) << err.what(); gason::JsonParser parser; auto root = parser.Parse(ser.Slice()); ids.push_back(root["id"].As()); } } + void GetDataWithStrings(ServerControl& node, std::map& ids) { + Query qr = Query(nsName_).Sort("id", false); + BaseApi::QueryResultsType res(node.Get()->api.reindexer.get()); + auto err = node.Get()->api.reindexer->Select(qr, res); + ASSERT_TRUE(err.ok()) << err.what(); + for (auto it : res) { + WrSerializer ser; + err = it.GetJSON(ser, false); + ASSERT_TRUE(err.ok()) << err.what(); + gason::JsonParser parser; + auto root = parser.Parse(ser.Slice()); + int id = root["id"].As(); + std::string s = root["data"].As(); + ids[id] = s; + } + } + const std::string nsName_; }; @@ -1105,3 +1122,175 @@ TEST_F(ReplicationSlaveSlaveApi, WriteIntoSlaveNsAfterReconfiguration) { validateItemsCount(nodes[1], kNs2, 3 * n); for (auto& node : nodes) node.Stop(); } + +struct DataStore { + void Add(int64_t id, const std::string& s) { + std::unique_lock l(mtx); + data[id] = s; + } + bool Check(const std::map& r) { + std::unique_lock l(mtx); + return data == r; + } + int64_t Size() { + std::unique_lock l(mtx); + return data.size(); + } + +private: + std::mutex mtx; + std::map data; +}; + +class ServerIdChange : public ReplicationSlaveSlaveApi, public ::testing::WithParamInterface { +protected: + void SetUp() { fs::RmDirAll(kBaseTestsetDbPathServerIdChange); } + + void TearDown() {} + +public: + void AddFun(ServerControl& master, DataStore& dataStore, int fromId, unsigned int dn) { + for (unsigned int i = 0; i < dn; i++) { + reindexer::client::Item item = master.Get()->api.NewItem("ns1"); + int64_t id = fromId + i; + std::string ss = reindexer::randStringAlph(10); + dataStore.Add(id, ss); + auto err = item.FromJSON("{\"id\":" + std::to_string(id) + ",\"data\":\"" + ss + "\"" + "}"); + ASSERT_TRUE(err.ok()) << err.what(); + master.Get()->api.Upsert("ns1", item); + } + }; + + void ChangeServerId(bool isMaster, ServerControl& node, int newServerId, int port) { + if (isMaster) { + ReplicationConfigTest config("master"); + config.serverId_ = newServerId; + node.Get()->MakeMaster(config); + } else { + std::string masterDsn = "cproto://127.0.0.1:" + std::to_string(port) + "/db"; + ReplicationConfigTest config("slave", false, true, newServerId, masterDsn); + node.Get()->MakeSlave(0, config); + } + } + +protected: + const std::string kBaseTestsetDbPathServerIdChange = fs::JoinPath(fs::GetTempDir(), "rx_test/ServerIdChange"); +}; + +TEST_P(ServerIdChange, UpdateServerId) { + const int port = 10100; + const std::string kBaseDbPath(fs::JoinPath(kBaseTestsetDbPathServerIdChange, "UpdateServerId")); + const std::string kDbPathMaster(kBaseDbPath + "/test_"); + std::vector nodes; + DataStore dataStore; + + /* + m + / \ + 1 2 + | + 3 + */ + + std::vector slaveConfiguration = {-1, port, port, port + 1}; + for (size_t i = 0; i < slaveConfiguration.size(); i++) { + nodes.emplace_back().InitServer(i, port + i, port + 1000 + i, kDbPathMaster + std::to_string(i), "db", true); + ChangeServerId(i == 0, nodes.back(), 0, slaveConfiguration[i]); + } + + ServerControl& master = nodes[0]; + TestNamespace1 ns(master); + + const int startId = 0; + const int n2 = 20000; + const int dn = 10; + + AddFun(master, dataStore, startId, n2); + + for (size_t i = 1; i < nodes.size(); i++) { + WaitSync(nodes[0], nodes[i], "ns1"); + } + for (auto& n : nodes) { + n.Get()->SetWALSize(GetParam(), "ns1"); + } + + auto changeConfig = [this, &nodes, &slaveConfiguration, &ns, &dataStore](bool isMaster, int configurationIndex, int newServerId, + int from) { + std::atomic_bool stopInsertThread = false; + std::mutex m; + std::condition_variable cv; + bool startChange = false; + + auto AddThreadFun = [this, &startChange, &m, &stopInsertThread, &nodes, &dataStore, &cv]() { + bool isFirst = true; + while (!stopInsertThread) { + int64_t fromId = rand() % 1'000'000; + AddFun(nodes[0], dataStore, fromId, 10); + if (isFirst) { + { + std::unique_lock lk(m); + startChange = true; + } + cv.notify_all(); + } + std::this_thread::sleep_for(std::chrono::microseconds(10)); + isFirst = false; + } + }; + + std::unique_lock lk(m); + std::thread insertThread(AddThreadFun); + cv.wait(lk, [&startChange] { return startChange; }); + lk.unlock(); + + ChangeServerId(isMaster, nodes[configurationIndex], newServerId, slaveConfiguration[configurationIndex]); + AddFun(nodes[0], dataStore, from, dn); + + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + stopInsertThread = true; + insertThread.join(); + + for (size_t i = 1; i < nodes.size(); i++) { + WaitSync(nodes[0], nodes[i], "ns1"); + } + + std::vector> results; + + Query qr = Query("ns1").Sort("id", true); + + for (size_t i = 0; i < nodes.size(); i++) { + results.emplace_back(); + ns.GetDataWithStrings(nodes[i], results.back()); + ASSERT_EQ(results.back().size(), dataStore.Size()) << " nodeIndex=" << i; + } + + for (size_t i = 1; i < results.size(); ++i) { + ASSERT_TRUE(dataStore.Check(results[i])); + } + }; + + std::unordered_set usedId; + for (int i = 0; i < 10; i++) { + int sId = 0; + while (true) { + sId = rand() % 100 + 300; + if (usedId.find(sId) == usedId.end()) { + usedId.insert(sId); + break; + } + } + + bool isMaster = rand() % 2; + int configurationIndex = 0; + if (!isMaster) { + configurationIndex = rand() % 3 + 1; + } + changeConfig(isMaster, configurationIndex, sId, startId + n2 + dn * (i + 1)); + } + + for (auto& node : nodes) { + node.Stop(); + } +} + +INSTANTIATE_TEST_SUITE_P(WalSize, ServerIdChange, ::testing::Values(1, 4000000)); \ No newline at end of file diff --git a/cpp_src/gtests/tests/unit/string_function_test.cc b/cpp_src/gtests/tests/unit/string_function_test.cc index 441a1e29c..62ad2cea4 100644 --- a/cpp_src/gtests/tests/unit/string_function_test.cc +++ b/cpp_src/gtests/tests/unit/string_function_test.cc @@ -1,4 +1,12 @@ +#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #include +#pragma GCC diagnostic pop +#else // REINDEX_WITH_ASAN +#include +#endif // REINDEX_WITH_ASAN + #include "gtest/gtest.h" #include "reindexer_api.h" #include "tools/customlocal.h" diff --git a/cpp_src/gtests/tests/unit/tolal_lru_cache.cc b/cpp_src/gtests/tests/unit/tolal_lru_cache.cc index 64a1dd036..f3e7890f7 100644 --- a/cpp_src/gtests/tests/unit/tolal_lru_cache.cc +++ b/cpp_src/gtests/tests/unit/tolal_lru_cache.cc @@ -16,42 +16,101 @@ using reindexer::QueryCountCache; using reindexer::QueryCacheKey; using reindexer::QueryCountCacheVal; using reindexer::EqQueryCacheKey; +using reindexer::kCountCachedKeyMode; + +struct CacheJoinedSelectorMock { + std::string_view RightNsName() const noexcept { return rightNsName; } + int64_t LastUpdateTime() const noexcept { return lastUpdateTime; } + + std::string rightNsName; + int64_t lastUpdateTime; +}; +using CacheJoinedSelectorsMock = std::vector; TEST(LruCache, SimpleTest) { - const int nsCount = 10; - const int iterCount = 1000; + constexpr int kNsCount = 10; + constexpr int kSingleJoinNsCount = 5; + constexpr int kDoubleJoinNsCount = 5; + constexpr int kIterCount = 3000; - typedef std::pair QueryCachePair; + struct QueryCacheData { + const CacheJoinedSelectorsMock* JoinedSelectorsPtr() const noexcept { return joinedSelectors.size() ? &joinedSelectors : nullptr; } - std::vector qs; + Query q; + CacheJoinedSelectorsMock joinedSelectors = {}; + bool cached = false; + int64_t expectedTotal = -1; + }; + std::vector qs; PRINTF("preparing queries for caching ...\n"); - for (auto i = 0; i < nsCount; i++) { - auto idx = std::to_string(i); - qs.emplace_back(Query("namespace" + idx), false); + int i = 0; + for (int j = 0; j < kNsCount; ++j, ++i) { + qs.emplace_back(QueryCacheData{.q = Query(fmt::sprintf("namespace_%d", i))}); + } + for (int j = 0; j < kSingleJoinNsCount; ++j, ++i) { + const std::string kJoinedNsName = fmt::sprintf("joined_namespace_%d", j); + qs.emplace_back(QueryCacheData{ + .q = Query(fmt::sprintf("namespace_%d", i)) + .InnerJoin(fmt::sprintf("joined_field_%d", j), fmt::sprintf("main_field_%d", j % 2), CondEq, Query(kJoinedNsName)), + .joinedSelectors = {CacheJoinedSelectorMock{kJoinedNsName, 123}}}); + } + for (int j = 0; j < kDoubleJoinNsCount; ++j, ++i) { + const std::string kJoinedNsName1 = fmt::sprintf("second_joined_namespace_%d", j); + const std::string kJoinedNsName2 = fmt::sprintf("third_joined_namespace_%d", j); + constexpr int64_t kUpdateTime1 = 123; + constexpr int64_t kUpdateTime2 = 321; + if (j % 3 == 0) { + qs.emplace_back(QueryCacheData{ + .q = Query(fmt::sprintf("namespace_%d", i)) + .InnerJoin(fmt::sprintf("joined_field_%d", j), fmt::sprintf("main_field_%d", j % 2), CondEq, Query(kJoinedNsName1)) + .OrInnerJoin(fmt::sprintf("joined_field_%d", j), fmt::sprintf("main_field_%d", j % 2), CondEq, + Query(kJoinedNsName2)), + .joinedSelectors = {CacheJoinedSelectorMock{kJoinedNsName1, kUpdateTime1}, + CacheJoinedSelectorMock{kJoinedNsName2, kUpdateTime2}}}); + } else { + qs.emplace_back(QueryCacheData{ + .q = + Query(fmt::sprintf("namespace_%d", i)) + .InnerJoin(fmt::sprintf("joined_field_%d", j), fmt::sprintf("main_field_%d", j % 2), CondEq, Query(kJoinedNsName1)) + .InnerJoin(fmt::sprintf("joined_field_%d", j), fmt::sprintf("main_field_%d", j % 2), CondEq, Query(kJoinedNsName2)), + .joinedSelectors = {CacheJoinedSelectorMock{kJoinedNsName1, kUpdateTime1}, + CacheJoinedSelectorMock{kJoinedNsName2, kUpdateTime2}}}); + } } QueryCountCache cache(reindexer::kDefaultCacheSizeLimit, reindexer::kDefaultHitCountToCache); - auto keyComparator = EqQueryCacheKey(); - PRINTF("checking query cache ...\n"); - for (auto i = 0; i < iterCount; i++) { + PRINTF("checking query cache...\n"); + for (auto i = 0; i < kIterCount; i++) { auto idx = rand() % qs.size(); - auto const& qce = qs.at(idx); - QueryCacheKey ckey{qce.first}; + auto& qce = qs.at(idx); + QueryCacheKey ckey{qce.q, kCountCachedKeyMode, qce.JoinedSelectorsPtr()}; auto cached = cache.Get(ckey); - bool exist = qce.second; + bool exist = qce.cached; if (cached.valid) { - ASSERT_TRUE(exist) << "query missing in query cache!\n"; - QueryCacheKey k(qs[idx].first); - ASSERT_TRUE(keyComparator(k, ckey)) << "queries are not EQUAL!\n"; + ASSERT_TRUE(exist) << "query missing in query cache"; + ASSERT_EQ(cached.val.total_count, qce.expectedTotal) << "cached data are not valid"; } else { - size_t total = static_cast(rand() % 1000); + size_t total = static_cast(rand() % 10000); cache.Put(ckey, QueryCountCacheVal{total}); - qs[idx].second = true; + qce.cached = true; + qce.expectedTotal = total; } } + PRINTF("checking query update time change...\n"); + auto& qce = qs.back(); + if (!qce.cached) { + QueryCacheKey ckey{qce.q, kCountCachedKeyMode, qce.JoinedSelectorsPtr()}; + auto cached = cache.Get(ckey); + ASSERT_FALSE(cached.valid) << "query missing in query cache"; + cache.Put(ckey, QueryCountCacheVal{static_cast(rand() % 10000)}); + } + qce.joinedSelectors.back().lastUpdateTime += 100; + QueryCacheKey ckey{qce.q, kCountCachedKeyMode, qce.JoinedSelectorsPtr()}; + auto cached = cache.Get(ckey); + ASSERT_FALSE(cached.valid) << "update time change did not affected the key"; } TEST(LruCache, StressTest) { @@ -87,11 +146,13 @@ TEST(LruCache, StressTest) { for (auto i = 0; i < iterCount; i++) { auto idx = rand() % qs.size(); auto const& qce = qs.at(idx); - QueryCacheKey ckey{qce}; + QueryCacheKey ckey{qce, kCountCachedKeyMode, static_cast(nullptr)}; auto cached = cache.Get(ckey); if (cached.valid) { - ASSERT_TRUE(EqQueryCacheKey()(qs[idx], ckey)) << "queries are not EQUAL!\n"; + ASSERT_TRUE(EqQueryCacheKey()( + QueryCacheKey{qs[idx], kCountCachedKeyMode, static_cast(nullptr)}, ckey)) + << "queries are not EQUAL!\n"; } else { size_t total = static_cast(rand() % 1000); cache.Put(ckey, QueryCountCacheVal{total}); diff --git a/cpp_src/readme.md b/cpp_src/readme.md index b496cca9e..dba0b23d3 100644 --- a/cpp_src/readme.md +++ b/cpp_src/readme.md @@ -46,18 +46,29 @@ yum update yum install reindexer-server ``` -Available distros: `centos-7`, `fedora-38`, `fedora-39`, `redos-7` +Available distros: `centos-7`, `fedora-38`, `fedora-39`. ### Ubuntu/Debian ```bash -curl https://repo.reindexer.io/RX-KEY.GPG | apt-key add +wget https://repo.reindexer.io/RX-KEY.GPG -O /etc/apt/trusted.gpg.d/reindexer.asc echo "deb https://repo.reindexer.io/ /" >> /etc/apt/sources.list apt update apt install reindexer-server ``` -Available distros: `debian-bookworm`, `debian-bullseye`, `ubuntu-bionic`, `ubuntu-focal`, `ubuntu-jammy` +Available distros: `debian-bookworm`, `debian-bullseye`, `ubuntu-focal`, `ubuntu-jammy` + +### Redos + +```bash +rpm --import https://repo.reindexer.io/RX-KEY.GPG +dnf config-manager --add-repo https://repo.reindexer.io//x86_64/ +dnf update +dnf install reindexer-server +``` + +Available distros: `redos-7`. ## OSX brew @@ -110,7 +121,7 @@ service start reindexer ## HTTP REST API The simplest way to use reindexer with any program language - is using REST API. The -[complete REST API documentation is here](server/contrib/server.md). +[complete REST API documentation is here](server/contrib/server.md). [Or explore interactive version of Reindexer's swagger documentation](https://editor.swagger.io/?url=https://raw.githubusercontent.com/Restream/reindexer/master/cpp_src/server/contrib/server.yml) ## GRPC API diff --git a/cpp_src/replicator/replicator.cc b/cpp_src/replicator/replicator.cc index 1c8e80200..3d94564a8 100644 --- a/cpp_src/replicator/replicator.cc +++ b/cpp_src/replicator/replicator.cc @@ -1047,7 +1047,7 @@ void Replicator::onWALUpdateImpl(LSNPair LSNs, std::string_view nsName, const WA if (slaveNs && !LSNs.upstreamLSN_.isEmpty()) { auto replState = slaveNs->GetReplState(dummyCtx_); if (!replState.lastUpstreamLSN.isEmpty()) { - if (replState.lastUpstreamLSN >= LSNs.upstreamLSN_) { + if (replState.lastUpstreamLSN.Counter() >= LSNs.upstreamLSN_.Counter()) { logPrintf(LogTrace, "[repl:%s:%s]:%d OnWALUpdate old record state = %d upstreamLSN = %s replState.lastUpstreamLSN=%d wrec.type = %d", nsName, slave_->storagePath_, config_.serverId, state_.load(std::memory_order_acquire), LSNs.upstreamLSN_, diff --git a/cpp_src/replicator/walselecter.cc b/cpp_src/replicator/walselecter.cc index 4d6cd1c48..a0198263c 100644 --- a/cpp_src/replicator/walselecter.cc +++ b/cpp_src/replicator/walselecter.cc @@ -44,11 +44,9 @@ void WALSelecter::operator()(QueryResults &result, SelectCtx ¶ms) { auto &lsnEntry = q.Entries().Get(lsnIdx); if (lsnEntry.Values().size() == 1 && lsnEntry.Condition() == CondGt) { lsn_t fromLSN = lsn_t(std::min(lsnEntry.Values()[0].As(), std::numeric_limits::max() - 1)); - if (fromLSN.Server() != ns_->serverId_) - throw Error(errOutdatedWAL, "Query to WAL with incorrect LSN %ld, LSN counter %ld", int64_t(fromLSN), ns_->wal_.LSNCounter()); if (ns_->wal_.LSNCounter() != (fromLSN.Counter() + 1) && ns_->wal_.is_outdated(fromLSN.Counter() + 1) && count) - throw Error(errOutdatedWAL, "Query to WAL with outdated LSN %ld, LSN counter %ld walSize = %d count = %d", int64_t(fromLSN), - ns_->wal_.LSNCounter(), ns_->wal_.size(), count); + throw Error(errOutdatedWAL, "Query to WAL with outdated LSN %ld, LSN counter %ld walSize = %d count = %d", + int64_t(fromLSN.Counter()), ns_->wal_.LSNCounter(), ns_->wal_.size(), count); const auto walEnd = ns_->wal_.end(); for (auto it = ns_->wal_.upper_bound(fromLSN.Counter()); count && it != walEnd; ++it) { diff --git a/cpp_src/server/CMakeLists.txt b/cpp_src/server/CMakeLists.txt index 9cf7e2b4e..f605f12ef 100644 --- a/cpp_src/server/CMakeLists.txt +++ b/cpp_src/server/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.0) project(reindexer_server_library) set (SWAGGER_VERSION "2.x") -set (GH_FACE_VERSION "3.21.0") +set (GH_FACE_VERSION "3.22.0") set (GH_FACE_TAG "v${GH_FACE_VERSION}") set (TARGET reindexer_server_library) set (SERVER_LIB_DIR ${PROJECT_BINARY_DIR} PARENT_SCOPE) diff --git a/cpp_src/server/contrib/CMakeLists.txt b/cpp_src/server/contrib/CMakeLists.txt index a162877a8..039475da5 100644 --- a/cpp_src/server/contrib/CMakeLists.txt +++ b/cpp_src/server/contrib/CMakeLists.txt @@ -40,6 +40,6 @@ if(python3) WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/server/contrib COMMENT "Generate query.json.h" ) - add_custom_target(query_json DEPENDS ${QUERY_SCHEMA}) + add_custom_target(query_json ALL DEPENDS ${QUERY_SCHEMA}) endif() endif () diff --git a/cpp_src/server/contrib/server.md b/cpp_src/server/contrib/server.md index 856261b09..277069e6a 100644 --- a/cpp_src/server/contrib/server.md +++ b/cpp_src/server/contrib/server.md @@ -77,6 +77,7 @@ * [FulltextConfig](#fulltextconfig) * [FulltextFieldConfig](#fulltextfieldconfig) * [FulltextSynonym](#fulltextsynonym) + * [StopWordObject](#stopwordobject) * [Index](#index) * [IndexCacheMemStats](#indexcachememstats) * [IndexMemStat](#indexmemstat) @@ -132,7 +133,7 @@ Reindexer is fast. ### Version information -*Version* : 3.21.0 +*Version* : 3.22.0 ### License information @@ -2195,6 +2196,7 @@ Query execution explainings |**selectors**
*optional*|Filter selectors, used to proccess query conditions|< [selectors](#explaindef-selectors) > array| |**sort_by_uncommitted_index**
*optional*|Optimization of sort by uncompleted index has been performed|boolean| |**sort_index**
*optional*|Index, which used for sort results|string| +|**subqueries**
*optional*|Explain of subqueries preselect|< [subqueries](#explaindef-subqueries) > array| |**total_us**
*optional*|Total query execution time|integer| @@ -2231,6 +2233,7 @@ Query execution explainings |---|---|---| |**comparators**
*optional*|Count of comparators used, for this selector|integer| |**cost**
*optional*|Cost expectation of this selector|integer| +|**description**
*optional*|Description of the selector|string| |**explain_preselect**
*optional*|Preselect in joined namespace execution explainings|[ExplainDef](#explaindef)| |**explain_select**
*optional*|One of selects in joined namespace execution explainings|[ExplainDef](#explaindef)| |**field**
*optional*|Field or index name|string| @@ -2239,6 +2242,17 @@ Query execution explainings |**keys**
*optional*|Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching|integer| |**matched**
*optional*|Count of processed documents, matched this selector|integer| |**method**
*optional*|Method, used to process condition|enum (scan, index, inner_join, left_join)| +|**type**
*optional*|Type of the selector|string| + + +**subqueries** + +|Name|Description|Schema| +|---|---|---| +|**explain**
*optional*|Explain of the subquery's preselect|[ExplainDef](#explaindef)| +|**field**
*optional*|Name of field being compared with the subquery's result|string| +|**keys**
*optional*|Count of keys being compared with the subquery's result|integer| +|**namespace**
*optional*|Subquery's namespace name|string| @@ -2295,7 +2309,7 @@ Fulltext Index configuration |**position_boost**
*optional*|Boost of search query term position
**Default** : `1.0`
**Minimum value** : `0`
**Maximum value** : `10`|number (float)| |**position_weight**
*optional*|Weight of search query term position in final rank. 0: term position will not change final rank. 1: term position will affect to final rank in 0 - 100% range
**Default** : `0.1`
**Minimum value** : `0`
**Maximum value** : `1`|number (float)| |**stemmers**
*optional*|List of stemmers to use|< string > array| -|**stop_words**
*optional*|List of stop words. Words from this list will be ignored in documents and queries|< string > array| +|**stop_words**
*optional*|List of objects of stop words. Words from this list will be ignored when building indexes. |< [StopWordObject](#stopwordobject) > array| |**sum_ranks_by_fields_ratio**
*optional*|Ratio to summation of ranks of match one term in several fields. For example, if value of this ratio is K, request is '@+f1,+f2,+f3 word', ranks of match in fields are R1, R2, R3 and R2 < R1 < R3, final rank will be R = R2 + K*R1 + K*K*R3
**Default** : `0.0`
**Minimum value** : `0`
**Maximum value** : `1`|number (float)| |**synonyms**
*optional*|List of synonyms for replacement|< [FulltextSynonym](#fulltextsynonym) > array| |**term_len_boost**
*optional*|Boost of search query term length
**Default** : `1.0`
**Minimum value** : `0`
**Maximum value** : `10`|number (float)| @@ -2356,6 +2370,13 @@ Fulltext synonym definition +### StopWordObject +Stop word object definition +|Name|Description|Schema| +|---|---|---| +|**word**
*optional*|Stop word|string| +|**is_morpheme**
*optional*|If the value is true, the word can be included in search results in queries such as 'word*', 'word~' etc.|boolean| + ### Index |Name|Description|Schema| @@ -2659,7 +2680,7 @@ List of meta info of the specified namespace |**index_idset_hits_to_cache**
*optional*|Default 'hits to cache' for index IdSets caches. This value determines how many requests required to put results into cache. For example with value of 2: first request will be executed without caching, second request will generate cache entry and put results into the cache and third request will get cached results. This value may be automatically increased if cache is invalidation too fast|integer| |**joins_preselect_cache_size**
*optional*|Max size of the index IdSets cache in bytes for each namespace. This cache will be enabled only if 'join_cache_mode' property is not 'off'. It stores resulting IDs, serialized JOINed queries and any other 'preselect' information for the JOIN queries (when target namespace is right namespace of the JOIN)|integer| |**joins_preselect_hit_to_cache**
*optional*|Default 'hits to cache' for joins preselect cache of the current namespace. This value determines how many requests required to put results into cache. For example with value of 2: first request will be executed without caching, second request will generate cache entry and put results into the cache and third request will get cached results. This value may be automatically increased if cache is invalidation too fast|integer| -|**query_count_cache_size**
*optional*|Max size of the cache for COUNT_CACHED() aggreagetion in bytes for each namespace. This cache stores resulting COUNTs and serialized queries for the COUNT_CACHED() aggregations|integer| +|**query_count_cache_size**
*optional*|Max size of the cache for COUNT_CACHED() aggregation in bytes for each namespace. This cache stores resulting COUNTs and serialized queries for the COUNT_CACHED() aggregations|integer| |**query_count_hit_to_cache**
*optional*|Default 'hits to cache' for COUNT_CACHED() aggregation of the current namespace. This value determines how many requests required to put results into cache. For example with value of 2: first request will be executed without caching, second request will generate cache entry and put results into the cache and third request will get cached results. This value may be automatically increased if cache is invalidation too fast|integer| diff --git a/cpp_src/server/contrib/server.yml b/cpp_src/server/contrib/server.yml index f4f66051a..6b97caeec 100644 --- a/cpp_src/server/contrib/server.yml +++ b/cpp_src/server/contrib/server.yml @@ -5,7 +5,7 @@ info: Reindexer's goal is to provide fast search with complex queries. The Reindexer is compact and fast. It has not heavy dependencies. Complete reindexer docker image with all libraries and web interface size is just 15MB. Reindexer is fast. - version: "3.21.0" + version: "3.22.0" title: "Reindexer REST API" license: name: "Apache 2.0" @@ -2779,6 +2779,17 @@ definitions: description: "Descent or ascent sorting direction" type: boolean + FtStopWordObject: + type: object + properties: + word: + description: "Stop word" + type: string + is_morpheme: + type: boolean + description: "If the value is true, the word can be included in search results in queries such as 'word*', 'word~' etc." + default: false + FulltextConfig: type: object description: "Fulltext Index configuration" @@ -2817,9 +2828,9 @@ definitions: description: "List of symbols, which will be threated as word part, all other symbols will be thrated as wors separators" stop_words: type: array - description: "List of stop words. Words from this list will be ignored in documents and queries" + description: "List of objects of stop words. Words from this list will be ignored when building indexes" items: - type: string + $ref: "#/definitions/FtStopWordObject" stemmers: type: array default: ["en","ru"] @@ -2987,6 +2998,31 @@ definitions: description: "Max total number of highlighted areas in ft result, when result still remains cacheable. '-1' means unlimited" default: -1 maximum: 1000000000 + bm25_config: + type: object + description: "Config for document ranking function" + properties: + bm25_k1: + type: number + format: "float" + description: "Coefficient k1 in the formula for calculating bm25. Сoefficient that sets the saturation threshold for the frequency of the term. The higher the coefficient, the higher the threshold and the lower the saturation rate." + default: 2.0 + minimum: 0.0 + bm25_b: + type: number + format: "float" + description: "Coefficient b in the formula for calculating bm25. If b is bigger, the effects of the length of the document compared to the average length are more amplified." + default: 0.75 + minimum: 0.0 + maximum: 1.0 + bm25_type: + type: string + description: "Formula for calculating document relevance (rx_bm25, bm25, word_count)" + enum: + - "rx_bm25" + - "bm25" + - "word_count" + default: "rx_bm25" fields: type: array description: "Configuration for certian field if it differ from whole index configuration" @@ -3299,6 +3335,12 @@ definitions: keys: type: integer description: "Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching" + type: + type: string + description: "Type of the selector" + description: + type: string + description: "Description of the selector" explain_preselect: description: "Preselect in joined namespace execution explainings" $ref: "#/definitions/ExplainDef" @@ -3366,6 +3408,24 @@ definitions: values_count: type: integer description: resulting size of query values set + subqueries: + type: array + description: "Explain of subqueries preselect" + items: + type: object + properties: + namespace: + type: string + description: "Subquery's namespace name" + keys: + type: integer + description: "Count of keys being compared with the subquery's result" + field: + type: string + description: "Name of field being compared with the subquery's result" + explain: + description: "Explain of the subquery's preselect" + $ref: "#/definitions/ExplainDef" AggregationResDef: @@ -4023,7 +4083,7 @@ definitions: type: integer default: 134217728 minimun: 0 - description: "Max size of the cache for COUNT_CACHED() aggreagetion in bytes for each namespace. This cache stores resulting COUNTs and serialized queries for the COUNT_CACHED() aggregations" + description: "Max size of the cache for COUNT_CACHED() aggregation in bytes for each namespace. This cache stores resulting COUNTs and serialized queries for the COUNT_CACHED() aggregations" query_count_hit_to_cache: type: integer default: 2 diff --git a/cpp_src/server/httpserver.cc b/cpp_src/server/httpserver.cc index 87cd16189..f0a2c65cd 100644 --- a/cpp_src/server/httpserver.cc +++ b/cpp_src/server/httpserver.cc @@ -22,6 +22,7 @@ #include "resources_wrapper.h" #include "statscollect/istatswatcher.h" #include "statscollect/prometheus.h" +#include "tools/alloc_ext/je_malloc_extension.h" #include "tools/alloc_ext/tc_malloc_extension.h" #include "tools/flagguard.h" #include "tools/fsops.h" diff --git a/cpp_src/tools/json2kv.cc b/cpp_src/tools/json2kv.cc index c693796a8..e7a7f1a15 100644 --- a/cpp_src/tools/json2kv.cc +++ b/cpp_src/tools/json2kv.cc @@ -32,7 +32,9 @@ Variant jsonValue2Variant(const gason::JsonValue &v, KeyValueType t, std::string -> Variant { throw Error(errLogic, "Error parsing json field '%s' - got number, expected %s", fieldName, t.Name()); }); case gason::JSON_STRING: return t.EvaluateOneOf( - [&](OneOf) { return Variant(p_string(json_string_ftr{v.sval.ptr})); }, + [&](OneOf) { + return Variant(p_string(json_string_ftr{v.sval.ptr}), Variant::no_hold_t{}); + }, [&](KeyValueType::Uuid) { return Variant{Uuid{v.toString()}}; }, [&](OneOf) -> Variant { @@ -59,7 +61,7 @@ Variant jsonValue2Variant(const gason::JsonValue &v, KeyValueType t, std::string [](KeyValueType::Double) noexcept { return Variant(0.0); }, [](KeyValueType::Bool) noexcept { return Variant(false); }, [](KeyValueType::Int) noexcept { return Variant(0); }, [](KeyValueType::Int64) noexcept { return Variant(static_cast(0)); }, - [](KeyValueType::String) { return Variant(p_string(static_cast(nullptr))); }, + [](KeyValueType::String) { return Variant(static_cast(nullptr)); }, [](KeyValueType::Uuid) noexcept { return Variant{Uuid{}}; }, [&](OneOf) -> Variant { throw Error(errLogic, "Error parsing json field '%s' - got null, expected %s", fieldName, t.Name()); diff --git a/cpp_src/tools/serializer.h b/cpp_src/tools/serializer.h index f370263ce..91d57bb3e 100644 --- a/cpp_src/tools/serializer.h +++ b/cpp_src/tools/serializer.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "core/cjson/ctag.h" #include "core/keyvalue/uuid.h" diff --git a/cpp_src/tools/stringstools.cc b/cpp_src/tools/stringstools.cc index dbf99ecf1..9c8f72b45 100644 --- a/cpp_src/tools/stringstools.cc +++ b/cpp_src/tools/stringstools.cc @@ -9,7 +9,6 @@ #include "estl/fast_hash_map.h" #include "itoa/itoa.h" #include "tools/assertrx.h" -#include "tools/customlocal.h" #include "tools/randomgenerator.h" #include "tools/stringstools.h" #include "utf8cpp/utf8.h" @@ -236,24 +235,6 @@ std::pair calcUtf8BeforeDelims(const char *str, int pos, size_t return std::make_pair(str + pos - ptr, charCounter); } -void check_for_replacement(wchar_t &ch) { - if (ch == 0x451) { // 'ё' - ch = 0x435; // 'е' - } -} - -void check_for_replacement(uint32_t &ch) { - if (ch == 0x451) { // 'ё' - ch = 0x435; // 'е' - } -} - -bool is_number(std::string_view str) { - uint16_t i = 0; - while ((i < str.length() && IsDigit(str[i]))) i++; - return (i && i == str.length()); -} - void split(std::string_view str, std::string &buf, std::vector &words, const std::string &extraWordSymbols) { // assuming that the 'ToLower' function and the 'check for replacement' function should not change the character size in bytes buf.resize(str.length()); @@ -415,7 +396,7 @@ template bool checkIfEndsWith(std::string_view pattern, std: template bool checkIfEndsWith(std::string_view pattern, std::string_view src) noexcept; template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) noexcept { auto itl = lhs.begin(); auto itr = rhs.begin(); @@ -437,11 +418,11 @@ int collateCompare(std::string_view lhs, std::string_view rhs, con } template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) noexcept { auto itl = lhs.data(); auto itr = rhs.data(); - for (; itl != lhs.data() + lhs.size() && itr != rhs.size() + rhs.data();) { + for (auto lhsEnd = lhs.data() + lhs.size(), rhsEnd = rhs.size() + rhs.data(); itl != lhsEnd && itr != rhsEnd;) { auto chl = ToLower(utf8::unchecked::next(itl)); auto chr = ToLower(utf8::unchecked::next(itr)); @@ -458,7 +439,7 @@ int collateCompare(std::string_view lhs, std::string_view rhs, cons } template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) noexcept { char *posl = nullptr; char *posr = nullptr; @@ -478,7 +459,7 @@ int collateCompare(std::string_view lhs, std::string_view rhs, c } template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &sortOrderTable) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &sortOrderTable) noexcept { auto itl = lhs.data(); auto itr = rhs.data(); @@ -502,7 +483,7 @@ int collateCompare(std::string_view lhs, std::string_view rhs, co } template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) noexcept { size_t l1 = lhs.size(); size_t l2 = rhs.size(); int res = memcmp(lhs.data(), rhs.data(), std::min(l1, l2)); @@ -711,7 +692,7 @@ Error getBytePosInMultilineString(std::string_view str, const size_t line, const } if ((currLine == line) && (charPos == currCharPos)) { bytePos = it - str.begin() - 1; - return errOK; + return Error(); } return Error(errNotValid, "Wrong cursor position: line=%d, pos=%d", line, charPos); } diff --git a/cpp_src/tools/stringstools.h b/cpp_src/tools/stringstools.h index 97c451577..b162ee94a 100644 --- a/cpp_src/tools/stringstools.h +++ b/cpp_src/tools/stringstools.h @@ -10,6 +10,7 @@ #include "core/indexopts.h" #include "core/type_consts.h" #include "tools/customhash.h" +#include "tools/customlocal.h" #include "tools/errors.h" namespace reindexer { @@ -104,18 +105,18 @@ template [[nodiscard]] Pos wordToByteAndCharPos(std::string_view str, int wordPosition, const std::string& extraWordSymbols); template -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable& sortOrderTable); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable& sortOrderTable) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); -[[nodiscard]] inline int collateCompare(std::string_view lhs, std::string_view rhs, const CollateOpts& collateOpts) { +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; +[[nodiscard]] inline int collateCompare(std::string_view lhs, std::string_view rhs, const CollateOpts& collateOpts) noexcept { switch (collateOpts.mode) { case CollateASCII: return collateCompare(lhs, rhs, collateOpts.sortOrderTable); @@ -136,9 +137,18 @@ std::string utf16_to_utf8(const std::wstring& src); std::wstring& utf8_to_utf16(std::string_view src, std::wstring& dst); std::string& utf16_to_utf8(const std::wstring& src, std::string& dst); -void check_for_replacement(wchar_t& ch); -void check_for_replacement(uint32_t& ch); -bool is_number(std::string_view str); +inline void check_for_replacement(wchar_t& ch) noexcept { + ch = (ch == 0x451) ? 0x435 : ch; // 'ё' -> 'е' +} +inline void check_for_replacement(uint32_t& ch) noexcept { + ch = (ch == 0x451) ? 0x435 : ch; // 'ё' -> 'е' +} +inline bool is_number(std::string_view str) noexcept { + uint16_t i = 0; + for (; (i < str.length() && IsDigit(str[i])); ++i) + ; + return (i && i == str.length()); +} int fast_strftime(char* buf, const tm* tm); std::string urldecode2(std::string_view str); diff --git a/cpp_src/vendor/cpp-btree/btree.h b/cpp_src/vendor/cpp-btree/btree.h index 38f2cad9c..49a80f0b7 100644 --- a/cpp_src/vendor/cpp-btree/btree.h +++ b/cpp_src/vendor/cpp-btree/btree.h @@ -719,10 +719,10 @@ struct btree_iterator { typedef btree_iterator const_iterator; typedef btree_iterator self_type; - btree_iterator() : node(NULL), position(-1) {} - btree_iterator(Node *n, int p) : node(n), position(p) {} - btree_iterator(const iterator &x) : node(x.node), position(x.position) {} - btree_iterator &operator=(const iterator &x) { + btree_iterator() noexcept : node(NULL), position(-1) {} + btree_iterator(Node *n, int p) noexcept : node(n), position(p) {} + btree_iterator(const iterator &x) noexcept : node(x.node), position(x.position) {} + btree_iterator &operator=(const iterator &x) noexcept { if (reinterpret_cast(this) != &x) { node = x.node; position = x.position; @@ -731,45 +731,45 @@ struct btree_iterator { } // Increment/decrement the iterator. - void increment() { + void increment() noexcept { if (node->leaf() && ++position < node->count()) { return; } increment_slow(); } - void increment_by(int count); - void increment_slow(); + void increment_by(int count) noexcept; + void increment_slow() noexcept; - void decrement() { + void decrement() noexcept { if (node->leaf() && --position >= 0) { return; } decrement_slow(); } - void decrement_slow(); + void decrement_slow() noexcept; - bool operator==(const const_iterator &x) const { return node == x.node && position == x.position; } - bool operator!=(const const_iterator &x) const { return node != x.node || position != x.position; } + bool operator==(const const_iterator &x) const noexcept { return node == x.node && position == x.position; } + bool operator!=(const const_iterator &x) const noexcept { return node != x.node || position != x.position; } // Accessors for the key/value the iterator is pointing at. - const key_type &key() const { return node->key(position); } - reference operator*() const { return node->value(position); } - pointer operator->() const { return &node->value(position); } + const key_type &key() const noexcept { return node->key(position); } + reference operator*() const noexcept { return node->value(position); } + pointer operator->() const noexcept { return &node->value(position); } - self_type &operator++() { + self_type &operator++() noexcept { increment(); return *this; } - self_type &operator--() { + self_type &operator--() noexcept { decrement(); return *this; } - self_type operator++(int) { + self_type operator++(int) noexcept { self_type tmp = *this; ++*this; return tmp; } - self_type operator--(int) { + self_type operator--(int) noexcept { self_type tmp = *this; --*this; return tmp; @@ -1534,7 +1534,7 @@ void btree_node

::swap(btree_node *x) { //// // btree_iterator methods template -void btree_iterator::increment_slow() { +void btree_iterator::increment_slow() noexcept { if (node->leaf()) { assertrx(position >= node->count()); self_type save(*this); @@ -1557,7 +1557,7 @@ void btree_iterator::increment_slow() { } template -void btree_iterator::increment_by(int count) { +void btree_iterator::increment_by(int count) noexcept { while (count > 0) { if (node->leaf()) { int rest = node->count() - position; @@ -1574,7 +1574,7 @@ void btree_iterator::increment_by(int count) { } template -void btree_iterator::decrement_slow() { +void btree_iterator::decrement_slow() noexcept { if (node->leaf()) { assertrx(position <= -1); self_type save(*this); diff --git a/cpp_src/vendor/cpp-btree/btree_container.h b/cpp_src/vendor/cpp-btree/btree_container.h index 94469d201..759db216e 100644 --- a/cpp_src/vendor/cpp-btree/btree_container.h +++ b/cpp_src/vendor/cpp-btree/btree_container.h @@ -53,14 +53,14 @@ class btree_container { btree_container(const self_type &x) : tree_(x.tree_) {} // Iterator routines. - iterator begin() { return tree_.begin(); } - const_iterator begin() const { return tree_.begin(); } - iterator end() { return tree_.end(); } - const_iterator end() const { return tree_.end(); } - reverse_iterator rbegin() { return tree_.rbegin(); } - const_reverse_iterator rbegin() const { return tree_.rbegin(); } - reverse_iterator rend() { return tree_.rend(); } - const_reverse_iterator rend() const { return tree_.rend(); } + iterator begin() noexcept(noexcept(std::declval().begin())) { return tree_.begin(); } + const_iterator begin() const noexcept(noexcept(std::declval().begin())) { return tree_.begin(); } + iterator end() noexcept(noexcept(std::declval().end())) { return tree_.end(); } + const_iterator end() const noexcept(noexcept(std::declval().end())) { return tree_.end(); } + reverse_iterator rbegin() noexcept(noexcept(std::declval().rbegin())) { return tree_.rbegin(); } + const_reverse_iterator rbegin() const noexcept(noexcept(std::declval().rbegin())) { return tree_.rbegin(); } + reverse_iterator rend() noexcept(noexcept(std::declval().rend())) { return tree_.rend(); } + const_reverse_iterator rend() const noexcept(noexcept(std::declval().rend())) { return tree_.rend(); } // Lookup routines. iterator lower_bound(const key_type &key) { return tree_.lower_bound(key); } @@ -102,18 +102,18 @@ class btree_container { void verify() const { tree_.verify(); } // Size routines. - size_type size() const { return tree_.size(); } - size_type max_size() const { return tree_.max_size(); } - bool empty() const { return tree_.empty(); } - size_type height() const { return tree_.height(); } - size_type internal_nodes() const { return tree_.internal_nodes(); } - size_type leaf_nodes() const { return tree_.leaf_nodes(); } - size_type nodes() const { return tree_.nodes(); } - size_type bytes_used() const { return tree_.bytes_used(); } - static double average_bytes_per_value() { return Tree::average_bytes_per_value(); } - double fullness() const { return tree_.fullness(); } - double overhead() const { return tree_.overhead(); } - const key_compare &key_comp() const { return tree_.key_comp(); } + size_type size() const noexcept(noexcept(std::declval().size())) { return tree_.size(); } + size_type max_size() const noexcept(noexcept(std::declval().max_size())) { return tree_.max_size(); } + bool empty() const noexcept(noexcept(std::declval().empty())) { return tree_.empty(); } + size_type height() const noexcept(noexcept(std::declval().height())) { return tree_.height(); } + size_type internal_nodes() const noexcept(noexcept(std::declval().internal_nodes())) { return tree_.internal_nodes(); } + size_type leaf_nodes() const noexcept(noexcept(std::declval().leaf_nodes())) { return tree_.leaf_nodes(); } + size_type nodes() const noexcept(noexcept(std::declval().nodes())) { return tree_.nodes(); } + size_type bytes_used() const noexcept(noexcept(std::declval().bytes_used())) { return tree_.bytes_used(); } + static double average_bytes_per_value() noexcept(noexcept(Tree::average_bytes_per_value())) { return Tree::average_bytes_per_value(); } + double fullness() const noexcept(noexcept(std::declval().fullness())) { return tree_.fullness(); } + double overhead() const noexcept(noexcept(std::declval().overhead())) { return tree_.overhead(); } + const key_compare &key_comp() const noexcept(noexcept(std::declval().key_comp())) { return tree_.key_comp(); } bool operator==(const self_type &x) const { if (size() != x.size()) { @@ -315,4 +315,4 @@ class btree_multi_container : public btree_container { } // namespace btree -#endif // UTIL_BTREE_BTREE_CONTAINER_H__ +#endif // UTIL_BTREE_BTREE_CONTAINER_H__ diff --git a/cpp_src/vendor/prometheus/family.h b/cpp_src/vendor/prometheus/family.h index 5c59122d0..6992bc161 100644 --- a/cpp_src/vendor/prometheus/family.h +++ b/cpp_src/vendor/prometheus/family.h @@ -157,7 +157,7 @@ T& Family::Add(std::map&& labels, int64_t epoch, Ar auto metrics_iter = metrics_.find(hash); if (metrics_iter != metrics_.end()) { -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(WITH_STDLIB_DEBUG) auto labels_iter = labels_.find(hash); assertrx(labels_iter != labels_.end()); const auto& old_labels = labels_iter->second; @@ -166,7 +166,7 @@ T& Family::Add(std::map&& labels, int64_t epoch, Ar metrics_iter->second.epoch = epoch; return *metrics_iter->second.ptr; } else { -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(WITH_STDLIB_DEBUG) for (auto& label_pair : labels) { auto& label_name = label_pair.first; assertrx(CheckLabelName(label_name)); diff --git a/cpp_src/vendor/prometheus/impl/check_names.cc b/cpp_src/vendor/prometheus/impl/check_names.cc index 0aabbc88c..6a800ce31 100644 --- a/cpp_src/vendor/prometheus/impl/check_names.cc +++ b/cpp_src/vendor/prometheus/impl/check_names.cc @@ -1,15 +1,21 @@ #include "prometheus/check_names.h" -#include - #if defined(__GLIBCXX__) && __GLIBCXX__ <= 20150623 #define STD_REGEX_IS_BROKEN #endif +#if defined(__GNUC__) && (__GNUC__ == 12) && (__GNUC_MINOR__ == 2) && defined(REINDEX_WITH_ASAN) +// regex header is broken in GCC 12.2 with ASAN +#define STD_REGEX_IS_BROKEN +#endif #if defined(_MSC_VER) && _MSC_VER < 1900 #define STD_REGEX_IS_BROKEN #endif +#ifndef STD_REGEX_IS_BROKEN +#include +#endif + namespace prometheus { bool CheckMetricName(const std::string& name) { // see https://prometheus.io/docs/concepts/data_model/ diff --git a/cpp_src/vendor/spdlog/details/os.h b/cpp_src/vendor/spdlog/details/os.h index cf8501181..a3f05d782 100644 --- a/cpp_src/vendor/spdlog/details/os.h +++ b/cpp_src/vendor/spdlog/details/os.h @@ -247,8 +247,9 @@ inline size_t filesize(FILE *f) #else // unix int fd = fileno(f); - //64 bits(but not in osx or cygwin, where fstat64 is deprecated) -#if !defined(__FreeBSD__) && !defined(__APPLE__) && (defined(__x86_64__) || defined(__ppc64__)) && !defined(__CYGWIN__) + // 64 bits(but not in osx, linux/musl or cygwin, where fstat64 is deprecated) +#if ((defined(__linux__) && defined(__GLIBC__)) || defined(__sun) || defined(_AIX)) && \ + (defined(__LP64__) || defined(_LP64)) struct stat64 st ; if (fstat64(fd, &st) == 0) return static_cast(st.st_size); diff --git a/dependencies.sh b/dependencies.sh index a6427bd3b..10e3d35f7 100755 --- a/dependencies.sh +++ b/dependencies.sh @@ -37,7 +37,7 @@ almalinux9_rpms="gcc-c++ make snappy-devel leveldb-devel gperftools-devel findut fedora_debs=" gcc-c++ make snappy-devel leveldb-devel gperftools-devel findutils curl tar unzip rpm-build rpmdevtools git" centos7_debs="centos-release-scl devtoolset-9-gcc devtoolset-9-gcc-c++ make snappy-devel leveldb-devel gperftools-devel findutils curl tar unzip rpm-build rpmdevtools git" debian_debs="build-essential g++ libunwind-dev libgoogle-perftools-dev libsnappy-dev libleveldb-dev make curl unzip git" -alpine_apks="g++ snappy-dev leveldb-dev libexecinfo-dev make curl cmake unzip git" +alpine_apks="g++ snappy-dev leveldb-dev libunwind-dev make curl cmake unzip git" arch_pkgs="gcc snappy leveldb make curl cmake unzip git" redos_rpms="gcc gcc-c++ make snappy-devel leveldb-devel gperftools-devel findutils curl tar unzip git cmake rpm-build python-srpm-macros" @@ -237,7 +237,7 @@ install_alpine() { if [ $? -eq 0 ]; then success_msg "Package '$pkg' was installed successfully." else - error_msg "Could not install '$pkg' package. Try 'apt-get update && apt-get install $pkg'" && return 1 + error_msg "Could not install '$pkg' package. Try 'apk update && apk add $pkg'" && return 1 fi fi done diff --git a/describer.go b/describer.go index b356b3cba..051dca394 100644 --- a/describer.go +++ b/describer.go @@ -417,7 +417,7 @@ type NamespaceCacheConfig struct { // This value may be automatically increased if cache is invalidation too fast // Default value is 2. Min value is 0 JoinHitsToCache uint32 `json:"joins_preselect_hit_to_cache"` - // Max size of the cache for COUNT_CACHED() aggreagetion in bytes for each namespace + // Max size of the cache for COUNT_CACHED() aggregation in bytes for each namespace // This cache stores resulting COUNTs and serialized queries for the COUNT_CACHED() aggregations // Default value is 134217728 (128 MB). Min value is 0 QueryCountCacheSize uint64 `json:"query_count_cache_size"` diff --git a/dsl/dsl.go b/dsl/dsl.go index e110dde15..bf4945939 100644 --- a/dsl/dsl.go +++ b/dsl/dsl.go @@ -80,13 +80,13 @@ type Sort struct { } type Filter struct { - Op string `json:"op,omitempty"` - Field string `json:"field,omitempty"` - Joined *JoinQuery `json:"join_query,omitempty"` - SubQ *SubQuery `json:"subquery,omitempty"` - Cond string `json:"cond,omitempty"` - Value interface{} `json:"value,omitempty"` - Filters []Filter `json:"filters,omitempty"` + Op string `json:"Op,omitempty"` + Field string `json:"Field,omitempty"` + Joined *JoinQuery `json:"Join_Query,omitempty"` + SubQ *SubQuery `json:"Subquery,omitempty"` + Cond string `json:"Cond,omitempty"` + Value interface{} `json:"Value,omitempty"` + Filters []Filter `json:"Filters,omitempty"` } type JoinOnCondition struct { diff --git a/ftfastconfig.go b/ftfastconfig.go index 2eafbea2d..95bf77c09 100644 --- a/ftfastconfig.go +++ b/ftfastconfig.go @@ -41,7 +41,6 @@ type FtTyposDetailedConfig struct { MaxExtraLetters int `json:"max_extra_letters"` } - type FtBaseRanking struct { // Relevancy of full word match // Values range: [0,500] @@ -81,6 +80,21 @@ type FtBaseRanking struct { Synonyms int `json:"synonyms_proc"` } +type StopWord struct { + Word string `json:"word"` + IsMorpheme bool `json:"is_morpheme"` +} + +type Bm25ConfigType struct { + // Coefficient k1 in the formula for calculating bm25 + Bm25k1 float64 `json:"bm25_k1"` + // Coefficient b in the formula for calculating bm25 + Bm25b float64 `json:"bm25_b"` + // Formula for calculating document relevance (rx, classic, word_count) + Bm25Type string `json:"bm25_type"` + +} + // FtFastConfig configurarion of FullText search index type FtFastConfig struct { // boost of bm25 ranking. default value 1. @@ -139,8 +153,10 @@ type FtFastConfig struct { EnableTranslit bool `json:"enable_translit"` // Enable wrong keyboard layout variants processing EnableKbLayout bool `json:"enable_kb_layout"` - // List of stop words. Words from this list will be ignored in documents and queries - StopWords []string `json:"stop_words"` + // List of objects of stop words. Words from this list will be ignored when building indexes + // but can be included in search results in queries such as 'word*', 'word~' etc. if for the stop-word attribute is_morpheme is true. + // The list item can be either a reindexer.StopWord, or string + StopWords []interface{} `json:"stop_words"` // List of synonyms for replacement Synonyms []struct { // List source tokens in query, which will be replaced with alternatives @@ -170,6 +186,8 @@ type FtFastConfig struct { EnablePreselectBeforeFt bool `json:"enable_preselect_before_ft"` // Config for subterm rank multiplier FtBaseRankingConfig *FtBaseRanking `json:"base_ranking,omitempty"` + // Config for document ranking + Bm25Config *Bm25ConfigType `json:"bm25_config,omitempty"` } func DefaultFtFastConfig() FtFastConfig { @@ -201,8 +219,8 @@ func DefaultFtFastConfig() FtFastConfig { MaxTotalAreasToCache: -1, Optimization: "Memory", EnablePreselectBeforeFt: false, - FtBaseRankingConfig: &FtBaseRanking{FullMatch: 100, PrefixMin: 50, SuffixMin:10, Typo:85, TypoPenalty: 15, StemmerPenalty: 15, Kblayout: 90, Translit:90, Synonyms:95}, - + FtBaseRankingConfig: &FtBaseRanking{FullMatch: 100, PrefixMin: 50, SuffixMin: 10, Typo: 85, TypoPenalty: 15, StemmerPenalty: 15, Kblayout: 90, Translit: 90, Synonyms: 95}, + Bm25Config: &Bm25ConfigType{Bm25k1: 2.0, Bm25b: 0.75, Bm25Type: "rx_bm25"}, } } diff --git a/ftfuzzyconfig.go b/ftfuzzyconfig.go index 570970a54..223faed4b 100644 --- a/ftfuzzyconfig.go +++ b/ftfuzzyconfig.go @@ -39,15 +39,15 @@ type FtFuzzyConfig struct { EnableTranslit bool `json:"enable_translit"` // Enable wrong keyboard layout variants processing EnableKbLayout bool `json:"enable_kb_layout"` - // List of stop words. Words from this list will be ignored in documents and queries - StopWords []string `json:"stop_words"` + // List of objects of stop words. Words from this list will be ignored when building indexes + // but can be included in search results in queries such as 'word*', 'word~' etc. if for the stop-word attribute is_morpheme is true + StopWords []interface{} `json:"stop_words"` // Log level of full text search engine LogLevel int `json:"log_level"` // Extra symbols, which will be threated as parts of word to addition to letters and digits ExtraWordSymbols string `json:"extra_word_symbols"` // Config for subterm rank multiplier FtBaseRankingConfig *FtBaseRanking `json:"base_ranking,omitempty"` - } func DefaultFtFuzzyConfig() FtFuzzyConfig { @@ -69,6 +69,6 @@ func DefaultFtFuzzyConfig() FtFuzzyConfig { EnableKbLayout: true, LogLevel: 0, ExtraWordSymbols: "/-+", - FtBaseRankingConfig: &FtBaseRanking{FullMatch: 100, PrefixMin: 50, SuffixMin:10, Typo:85, TypoPenalty: 15, StemmerPenalty: 15, Kblayout: 90, Translit:90, Synonyms:95}, + FtBaseRankingConfig: &FtBaseRanking{FullMatch: 100, PrefixMin: 50, SuffixMin: 10, Typo: 85, TypoPenalty: 15, StemmerPenalty: 15, Kblayout: 90, Translit: 90, Synonyms: 95}, } } diff --git a/fulltext.md b/fulltext.md index 672ab7e52..a3d4173d5 100644 --- a/fulltext.md +++ b/fulltext.md @@ -6,11 +6,11 @@ Reindexer has builtin full text search engine. This document describes usage of - [Define full text index fields](#define-full-text-index-fields) - [Query to full text index](#query-to-full-text-index) - [Text query format](#text-query-format) - - [Patterns](#patterns) - - [Field selection](#field-selection) - - [Binary operators](#binary-operators) - - [Escape character](#escape-character) - - [Phrase search](#phrase-search) + - [Patterns](#patterns) + - [Field selection](#field-selection) + - [Binary operators](#binary-operators) + - [Escape character](#escape-character) + - [Phrase search](#phrase-search) - [Examples of text queris](#examples-of-text-queris) - [Natural language processing](#natural-language-processing) - [Merging queries results](#merging-queries-results) @@ -24,9 +24,10 @@ Reindexer has builtin full text search engine. This document describes usage of - [Performance and memory usage](#performance-and-memory-usage) - [Configuration](#configuration) - [Base config parameters](#base-config-parameters) + - [Stopwords details](#stopwords-details) - [Detailed typos config](#detailed-typos-config) - [Base ranking config](#base-ranking-config) - - [Limitations and know issues](#limitations-and-know-issues) + - [Limitations and know issues](#limitations-and-know-issues) ## LIKE @@ -34,17 +35,17 @@ Reindexer has builtin full text search engine. This document describes usage of For simple search in text can be used operator `LIKE`. It search strings which match a pattern. In the pattern `_` means any char and `%` means any sequence of chars. ``` - In Go: - query := db.Query("items"). - Where("field", reindexer.LIKE, "pattern") + In Go: + query := db.Query("items"). + Where("field", reindexer.LIKE, "pattern") - In SQL: - SELECT * FROM items WHERE fields LIKE 'pattern' + In SQL: + SELECT * FROM items WHERE fields LIKE 'pattern' ``` ``` - 'me_t' corresponds to 'meet', 'meat', 'melt' and so on - '%tion' corresponds to 'tion', 'condition', 'creation' and so on + 'me_t' corresponds to 'meet', 'meat', 'melt' and so on + '%tion' corresponds to 'tion', 'condition', 'creation' and so on ``` @@ -54,8 +55,8 @@ Full text search is performed in fields marked with `text` tag: ```go type Item struct { - ID int64 `reindex:"id,,pk"` - Description string `reindex:"description,text"` + ID int64 `reindex:"id,,pk"` + Description string `reindex:"description,text"` } ``` @@ -63,10 +64,10 @@ Full text search is also available for multiple fields of composite index marked ```go type Item struct { - ID int64 `reindex:"id,,pk"` - Name string `reindex:"name,-"` - Description string `reindex:"description,-"` - _ struct{} `reindex:"name+description=text_search,text,composite` + ID int64 `reindex:"id,,pk"` + Name string `reindex:"name,-"` + Description string `reindex:"description,-"` + _ struct{} `reindex:"name+description=text_search,text,composite` } ``` In this example full text index will include fields `name` and `description`,`text_search` is short alias of composite index name for using in Queries. @@ -78,22 +79,22 @@ Full text index is case insensitive. The source text is tokenized to set of word Queries to full text index are constructed by usual query interface ```go - query := db.Query ("items"). - Match ("name+description","text query","") + query := db.Query ("items"). + Match ("name+description","text query","") ``` Or equivalent query using name alias: ```go - query := db.Query ("items"). - Match ("text_search","text query","") + query := db.Query ("items"). + Match ("text_search","text query","") ``` Queries to full text index can be combined with conditions on another fields. e.g: ```go - query := db.Query ("items"). - Match ("description","text query"). - WhereInt("year",reindexer.GT,2010) + query := db.Query ("items"). + Match ("description","text query"). + WhereInt("year",reindexer.GT,2010) ``` Each result of query contains rank of match. Rank is integer from 0 to 255. 0 - lowest relevancy, 255 - best relevancy. The query Iterator has method `Rank()`, which returns rank of current result @@ -166,33 +167,37 @@ There are built in stemmers support in full text search. It enables natural lang It is possible to merge multiple queries results and sort final result by relevancy. ```go - query := db.Query ("items"). - Match ("description","text query1") - q2 := db.Query ("another_items"). - Match ("description","text query2") - query.Merge (q2) + query := db.Query ("items"). + Match ("description","text query1") + q2 := db.Query ("another_items"). + Match ("description","text query2") + query.Merge (q2) iterator = query.Exec () - // Check the error - if err := iterator.Error(); err != nil { - panic(err) - } - // Iterate over results - for iterator.Next() { - // Get the next document and cast it to a pointer - switch elem := iterator.Object().(type) { - case Item: - fmt.Printf ("%v,rank=%d\n",*elem,iterator.Rank()) - case AnotherItem: - fmt.Printf ("%v,rank=%d\n",*elem,iterator.Rank()) - } - } + // Check the error + if err := iterator.Error(); err != nil { + panic(err) + } + // Iterate over results + for iterator.Next() { + // Get the next document and cast it to a pointer + switch elem := iterator.Object().(type) { + case Item: + fmt.Printf ("%v,rank=%d\n",*elem,iterator.Rank()) + case AnotherItem: + fmt.Printf ("%v,rank=%d\n",*elem,iterator.Rank()) + } + } ``` ## Using select functions It is possible to use select functions to process result data. -For now you can use snippet, snippet_n and highlight. Those functions does not work for composite fulltext indexes. +For now you can use snippet, snippet_n and highlight. For composite indexes the result of the function will be written in to corresponding subfields. You can not put [,)\0] symbols in functions params. If the value contains special characters, it must be enclosed in single quotes. +Notice: although text indexes may be created over numeric fields, select functions can not be applied to any non-string field. + +For all the functions there are two types of supported syntax with the same behavior: `field.func_name(...)` and `field = func_name(...)`. + ### Highlight This functions just highlights text area that was found. It has two arguments - @@ -319,58 +324,98 @@ Several parameters of full text search engine can be configured from application ```go ... ftconfig := reindexer.DefaultFtFastConfig() - // Setup configuration - ftconfig.LogLevel = reindexer.TRACE - // Setup another parameters - // ... - // Create index definition - indexDef := reindexer.IndexDef { - Name: "description", - JSONPaths: []string{"description"}, - IndexType: "text", - FieldType: "string", - Config: ftconfig, - } - // Add index with configuration - return db.AddIndex ("items",indexDef) + // Setup configuration + ftconfig.LogLevel = reindexer.TRACE + // Setup another parameters + // ... + // Create index definition + indexDef := reindexer.IndexDef { + Name: "description", + JSONPaths: []string{"description"}, + IndexType: "text", + FieldType: "string", + Config: ftconfig, + } + // Add index with configuration + return db.AddIndex ("items",indexDef) ``` ### Base config parameters -| | Parameter name | Type | Description | Default value | -|---|:----------------------------:|:--------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------:| -| | Bm25Boost | float | Boost of bm25 ranking | 1 | -| | Bm25Weight | float | Weight of bm25 rank in final rank 0: bm25 will not change final rank. 1: bm25 will affect to fin l rank in 0 - 100% range. | 0.1 | -| | DistanceBoost | float | Boost of search query term distance in found document. | 1 | -| | DistanceWeight | float | Weight of search query terms distance in found document in final rank 0: distance will not change final rank. 1: distance will affect to final rank in 0 - 100% range. | 0.5 | -| | TermLenBoost | float | Boost of search query term length | 1 | -| | TermLenWeight | float | Weight of search query term length in final rank. 0: term length will not change final rank. 1: term length will affect to final rank in 0 - 100% range | 0.3 | -| | PositionBoost | float | Boost of search query term position | 1.0 | -| | PositionWeight | float | Weight of search query term position in final rank. 0: term position will not change final rank. 1: term position will affect to final rank in 0 - 100% range | 0.1 | -| | FullMatchBoost | float | Boost of full match of search phrase with doc | 1.1 | -| | PartialMatchDecrease | int | Decrease of relevancy in case of partial match by value: partial_match_decrease * (non matched symbols) / (matched symbols) | 15 | -| | MinRelevancy | float | Minimum rank of found documents. 0: all found documents will be returned 1: only documents with relevancy >= 100% will be returned | 0.05 | -| | MaxTypos | int | Maximum possible typos in word. 0: typos are disabled, words with typos will not match. N: words with N possible typos will match. Check [typos handling](#typos-handling-details) section for detailed description. | 2 | -| | MaxTyposInWord | int | Deprecated, use MaxTypos instead of this. Cannot be used with MaxTypos. Maximum possible typos in word. 0: typos is disabled, words with typos will not match. N: words with N possible typos will match. It is not recommended to set more than 1 possible typo -It will seriously increase RAM usage, and decrease search speed | - | -| | MaxTypoLen | int | Maximum word length for building and matching variants with typos. | 15 | -| | FtTyposDetailedConfig | struct | Config for more precise typos algorithm tuning | | -| | MaxRebuildSteps | int | Maximum steps without full rebuild of ft - more steps faster commit slower select - optimal about 15. | 50 | -| | MaxStepSize | int | Maximum unique words to step | 4000 | -| | MergeLimit | int | Maximum documents count which will be processed in merge query results. Increasing this value may refine ranking of queries with high frequency words, but will decrease search speed | 20000 | -| | Stemmers | []string | List of stemmers to use | "en","ru" | -| | EnableTranslit | bool | Enable russian translit variants processing. e.g. term "luntik" will match word "лунтик" | true | -| | EnableKbLayout | bool | Enable wrong keyboard layout variants processing. e.g. term "keynbr" will match word "лунтик" | true | -| | StopWords | []string | List of stop words. Words from this list will be ignored in documents and queries | | -| | SumRanksByFieldsRatio | float | Ratio of summation of ranks of match one term in several fields | 0.0 | -| | LogLevel | int | Log level of full text search engine | 0 | -| | FieldsCfg | []struct | Configs for certain fields. Overlaps parameters from main config. Contains parameters: FieldName, Bm25Boost, Bm25Weight, TermLenBoost, TermLenWeight, PositionBoost, PositionWeight. | empty | -| | EnableWarmupOnNsCopy | bool | Enable automatic index warmup after transaction, which has performed namespace copy | false | -| | ExtraWordSymbols | string | Extra symbols, which will be threated as parts of word to addition to letters and digits | "+/-" | -| | MaxAreasInDoc | int | Max number of highlighted areas for each field in each document (for snippet() and highlight()). '-1' means unlimited | 5 | -| | MaxTotalAreasToCache | int | Max total number of highlighted areas in ft result, when result still remains cacheable. '-1' means unlimited | -1 | -| | Optimization | string | Optimize the index by 'memory' or by 'cpu' | "memory" | -| | FtBaseRanking | struct | Relevance of the word in different forms | | +| | Parameter name | Type | Description | Default value | +|---|:---------------------:|:--------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------:| +| | Bm25Boost | float | Boost of bm25 ranking | 1 | +| | Bm25Weight | float | Weight of bm25 rank in final rank 0: bm25 will not change final rank. 1: bm25 will affect to fin l rank in 0 - 100% range. | 0.1 | +| | DistanceBoost | float | Boost of search query term distance in found document. | 1 | +| | DistanceWeight | float | Weight of search query terms distance in found document in final rank 0: distance will not change final rank. 1: distance will affect to final rank in 0 - 100% range. | 0.5 | +| | TermLenBoost | float | Boost of search query term length | 1 | +| | TermLenWeight | float | Weight of search query term length in final rank. 0: term length will not change final rank. 1: term length will affect to final rank in 0 - 100% range | 0.3 | +| | PositionBoost | float | Boost of search query term position | 1.0 | +| | PositionWeight | float | Weight of search query term position in final rank. 0: term position will not change final rank. 1: term position will affect to final rank in 0 - 100% range | 0.1 | +| | FullMatchBoost | float | Boost of full match of search phrase with doc | 1.1 | +| | PartialMatchDecrease | int | Decrease of relevancy in case of partial match by value: partial_match_decrease * (non matched symbols) / (matched symbols) | 15 | +| | MinRelevancy | float | Minimum rank of found documents. 0: all found documents will be returned 1: only documents with relevancy >= 100% will be returned | 0.05 | +| | MaxTypos | int | Maximum possible typos in word. 0: typos are disabled, words with typos will not match. N: words with N possible typos will match. Check [typos handling](#typos-handling-details) section for detailed description. | 2 | +| | MaxTyposInWord | int | Deprecated, use MaxTypos instead of this. Cannot be used with MaxTypos. Maximum possible typos in word. 0: typos is disabled, words with typos will not match. N: words with N possible typos will match. It is not recommended to set more than 1 possible typo -It will seriously increase RAM usage, and decrease search speed | - | +| | MaxTypoLen | int | Maximum word length for building and matching variants with typos. | 15 | +| | FtTyposDetailedConfig | struct | Config for more precise typos algorithm tuning | | +| | MaxRebuildSteps | int | Maximum steps without full rebuild of ft - more steps faster commit slower select - optimal about 15. | 50 | +| | MaxStepSize | int | Maximum unique words to step | 4000 | +| | MergeLimit | int | Maximum documents count which will be processed in merge query results. Increasing this value may refine ranking of queries with high frequency words, but will decrease search speed | 20000 | +| | Stemmers | []string | List of stemmers to use | "en","ru" | +| | EnableTranslit | bool | Enable russian translit variants processing. e.g. term "luntik" will match word "лунтик" | true | +| | EnableKbLayout | bool | Enable wrong keyboard layout variants processing. e.g. term "keynbr" will match word "лунтик" | true | +| | StopWords | []struct | List of objects of stopwords. Words from this list will be ignored when building indexes, but may be used in fulltext queries (such as 'word*', 'word~' etc) and produce non-empty search results. [More...](#stopwords-details) | | +| | SumRanksByFieldsRatio | float | Ratio of summation of ranks of match one term in several fields | 0.0 | +| | LogLevel | int | Log level of full text search engine | 0 | +| | FieldsCfg | []struct | Configs for certain fields. Overlaps parameters from main config. Contains parameters: FieldName, Bm25Boost, Bm25Weight, TermLenBoost, TermLenWeight, PositionBoost, PositionWeight. | empty | +| | EnableWarmupOnNsCopy | bool | Enable automatic index warmup after transaction, which has performed namespace copy | false | +| | ExtraWordSymbols | string | Extra symbols, which will be threated as parts of word to addition to letters and digits | "+/-" | +| | MaxAreasInDoc | int | Max number of highlighted areas for each field in each document (for snippet() and highlight()). '-1' means unlimited | 5 | +| | MaxTotalAreasToCache | int | Max total number of highlighted areas in ft result, when result still remains cacheable. '-1' means unlimited | -1 | +| | Optimization | string | Optimize the index by 'memory' or by 'cpu' | "memory" | +| | FtBaseRanking | struct | Relevance of the word in different forms | | +| | Bm25Config | struct | Document ranking function parameters | | +| | Bm25Config.Bm25k1 | float | Coefficient k1 in the formula for calculating bm25 (used only for rx_bm25, bm25). Сoefficient that sets the saturation threshold for the frequency of the term. The higher the coefficient, the higher the threshold and the lower the saturation rate. | 2.0 | +| | Bm25Config.Bm25b | float | Coefficient b in the formula for calculating bm25 (used only for rx_bm25, bm25). If b is bigger, the effects of the length of the document compared to the average length are more amplified. | 0.75 | +| | Bm25Config.Bm25Type | string | Formula for calculating document relevance (rx_bm25, bm25, word_count) | "rx_bm25" | + + +### Stopwords details +The list item can be either a string or a structure containing a string (the stopword) and a bool attribute (`is_morpheme`) indicating whether the stopword can be part of a word that can be shown in query-results. +If the stopword is set as a string, then the `is_morpheme` attribute is `false` by default and following entries are equivalent: +```json +"StopWords":[ + { + "word": "some_word", + "is_morpheme": false + }, + ///... +] +``` +, +```json +"StopWords":[ + "some_word", + ///... +] +``` + +#### Example: +If the list of stopwords looks like this: +```json +"StopWords":[ + { + "word": "under", + "is_morpheme": true + }, + ///... +] +``` +and there are pair of documents containing this word: `{"...under the roof ..."}, {"... to understand and forgive..."}`. Then for the query 'under*' we will get as a result only document `{"... to understand and forgive..."}` and for the query 'under' we will get nothing as a result. + +If the "StopWords" section is not specified in the config, then the [default](./cpp_src/core/ft/stopwords/stop_en.cc) stopwords list will be used, and if it is explicitly specified empty, it means that there are no stopwords. ### Detailed typos config diff --git a/iterator.go b/iterator.go index d70832764..9fd8d9165 100644 --- a/iterator.go +++ b/iterator.go @@ -15,11 +15,11 @@ import ( type ExplainSelector struct { // Field or index name - Field string `json:"field"` + Field string `json:"field,omitempty"` // Field type enum: indexed, non-indexed FieldType string `json:"field_type,omitempty"` // Method, used to process condition - Method string `json:"method"` + Method string `json:"method,omitempty"` // Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching Keys int `json:"keys"` // Count of comparators used, for this selector @@ -30,6 +30,8 @@ type ExplainSelector struct { Matched int `json:"matched"` // Count of scanned documents by this selector Items int `json:"items"` + Type string `json:"type,omitempty"` + Description string `json:"description,omitempty"` // Preselect in joined namespace execution explainings ExplainPreselect *ExplainResults `json:"explain_preselect,omitempty"` // One of selects in joined namespace execution explainings @@ -37,6 +39,13 @@ type ExplainSelector struct { Selectors []ExplainSelector `json:"selectors,omitempty"` } +type ExplainSubQuery struct { + Namespace string `json:"namespace"` + Explain ExplainResults `json:"explain"` + Keys int `json:"keys,omitempty"` + Field string `json:"field,omitempty"` +} + // ExplainResults presents query plan type ExplainResults struct { // Total query execution time @@ -61,6 +70,8 @@ type ExplainResults struct { Selectors []ExplainSelector `json:"selectors"` // Explaining attempts to inject Join queries ON-conditions into the Main Query WHERE clause OnConditionsInjections []ExplainJoinOnInjections `json:"on_conditions_injections,omitempty"` + // Explaining of subqueries' preselect + SubQueriesExplains []ExplainSubQuery `json:"subqueries,omitempty"` } // Describes the process of a single JOIN-query ON-conditions injection into the Where clause of a main query diff --git a/query.go b/query.go index c2f1be9cb..7a001df5d 100644 --- a/query.go +++ b/query.go @@ -555,20 +555,24 @@ func (q *Query) DWithin(index string, point Point, distance float64) *Query { return q } -func (q *Query) AggregateSum(field string) { +func (q *Query) AggregateSum(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggSum).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateAvg(field string) { +func (q *Query) AggregateAvg(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggAvg).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateMin(field string) { +func (q *Query) AggregateMin(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggMin).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateMax(field string) { +func (q *Query) AggregateMax(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggMax).PutVarCUInt(1).PutVString(field) + return q } type AggregateFacetRequest struct { diff --git a/readme.md b/readme.md index 354ae605e..38d512333 100644 --- a/readme.md +++ b/readme.md @@ -51,6 +51,7 @@ Storages are compatible between those versions, hovewer, replication configs are - [Transactions commit strategies](#transactions-commit-strategies) - [Implementation notes](#implementation-notes) - [Join](#join) + - [Anti-join](#anti-join) - [Joinable interface](#joinable-interface) - [Subqueries (nested queries)](#subqueries-nested-queries) - [Complex Primary Keys and Composite Indexes](#complex-primary-keys-and-composite-indexes) @@ -829,7 +830,7 @@ query := db.Query("items_with_join").Join( "actors" ).On("actors_ids", reindexer.SET, "id") -query.Exec () +it := query.Exec() ``` In this example, Reindexer uses reflection under the hood to create Actor slice and copy Actor struct. @@ -882,6 +883,30 @@ query3 := db.Query("items_with_join"). Note that usually `Or` operator implements short-circuiting for `Where` conditions: if the previous condition is true the next one is not evaluated. But in case of `InnerJoin` it works differently: in `query1` (from the example above) both `InnerJoin` conditions are evaluated despite the result of `WhereInt`. `Limit(0)` as part of `InnerJoin` (`query3` from the example above) does not join any data - it works like a filter only to verify conditions. +#### Anti-join + +Reindexer does not support `ANTI JOIN` SQL construction, hovewer, it supports logical operations with JOINs. In fact `NOT (INNER JOIN ...)` is totally equivalent to the `ANTI JOIN`: +```go +query := db.Query("items_with_join"). + Not(). + OpenBracket(). // Brackets are essential here for NOT to work + InnerJoin( + db.Query("actors"). + WhereBool("is_visible", reindexer.EQ, true), + "actors"). + On("id", reindexer.EQ, "id") + CloseBracket() +``` +```SQL +SELECT * FROM items_with_join +WHERE + NOT ( + INNER JOIN ( + SELECT * FROM actors WHERE is_visible = true + ) ON items_with_join.id = actors.id + ) +``` + #### Joinable interface To avoid using reflection, `Item` can implement `Joinable` interface. If that implemented, Reindexer uses this instead of the slow reflection-based implementation. This increases overall performance by 10-20%, and reduces the amount of allocations. diff --git a/test/config_test.go b/test/config_test.go index 6857911b1..b9af73149 100644 --- a/test/config_test.go +++ b/test/config_test.go @@ -10,6 +10,18 @@ import ( "github.com/restream/reindexer/v3" ) +type FtConfCheck struct { + ID int `reindex:"id,,pk"` +} + +const ( + ftCfgNsName = "ft_cfg_check" +) + +func init() { + tnamespaces[ftCfgNsName] = FtConfCheck{} +} + func TestSetDefaultQueryDebug(t *testing.T) { t.Run("set debug level to exist ns config", func(t *testing.T) { ns := "ns_with_config" @@ -89,3 +101,111 @@ func TestSetDefaultQueryDebug(t *testing.T) { assert.True(t, found) }) } + +func TestFtConfigCompatibility(t *testing.T) { + config := reindexer.DefaultFtFastConfig() + + addFtIndex := func(indexName string) reindexer.IndexDescription { + err := DB.AddIndex(ftCfgNsName, reindexer.IndexDef{ + Name: indexName, + JSONPaths: []string{indexName}, + Config: config, + IndexType: "text", + FieldType: "string", + }) + assert.NoError(t, err) + + item, err := DBD.Query(reindexer.NamespacesNamespaceName).Where("name", reindexer.EQ, ftCfgNsName).Exec().FetchOne() + assert.NoError(t, err) + + indexes := item.(*reindexer.NamespaceDescription).Indexes + index := indexes[len(indexes)-1] + return index + } + + checkStopWordsFtConfig := func(index reindexer.IndexDescription) { + conf := index.Config.(map[string]interface{}) + cfgStopWords := conf["stop_words"].([]interface{}) + assert.Equal(t, len(cfgStopWords), len(config.StopWords)) + + for idx, wordI := range config.StopWords { + switch wordI.(type) { + case string: + assert.Equal(t, wordI, cfgStopWords[idx]) + case reindexer.StopWord: + word := wordI.(reindexer.StopWord) + assert.Equal(t, word.Word, cfgStopWords[idx].(map[string]interface{})["word"]) + assert.Equal(t, word.IsMorpheme, cfgStopWords[idx].(map[string]interface{})["is_morpheme"]) + } + } + } + + t.Run("check string stop_words config with index create", func(t *testing.T) { + stopWordsStrs := append(make([]interface{}, 0), "под", "на", "из") + config.StopWords = stopWordsStrs + index := addFtIndex("idxStopWordsStrs") + checkStopWordsFtConfig(index) + }) + + t.Run("check object stop_words config with index create", func(t *testing.T) { + stopWordsObjs := append(make([]interface{}, 0), + reindexer.StopWord{ + Word: "пред", + IsMorpheme: true, + }, reindexer.StopWord{ + Word: "над", + IsMorpheme: true, + }, reindexer.StopWord{ + Word: "за", + IsMorpheme: false, + }) + config.StopWords = stopWordsObjs + index := addFtIndex("idxStopWordsObjs") + checkStopWordsFtConfig(index) + }) + + t.Run("check mixed stop_words config with index create", func(t *testing.T) { + stopWordsMix := append(make([]interface{}, 0), + "под", + reindexer.StopWord{ + Word: "пред", + IsMorpheme: true, + }, + reindexer.StopWord{ + Word: "за", + IsMorpheme: false, + }, + "на", + reindexer.StopWord{ + Word: "над", + IsMorpheme: true, + }, + "из") + config.StopWords = stopWordsMix + index := addFtIndex("idxStopWordsMix") + checkStopWordsFtConfig(index) + }) + + checkBm25FtConfig := func(index reindexer.IndexDescription, expectedBm25k1 float64, + expectedBm25b float64, expectedBm25Type string) { + conf := index.Config.(map[string]interface{}) + rankFunConf := conf["bm25_config"].(map[string]interface{}) + cfgBm25k1 := rankFunConf["bm25_k1"] + cfgBm25b := rankFunConf["bm25_b"] + cfgBm25Type := rankFunConf["bm25_type"] + assert.Equal(t, expectedBm25k1, cfgBm25k1) + assert.Equal(t, expectedBm25b, cfgBm25b) + assert.Equal(t, expectedBm25Type, cfgBm25Type) + } + + t.Run("check bm25_k1, bm25_b, bm25_type configs with index create", func(t *testing.T) { + expectedBm25k1 := 1.53 + expectedBm25b := 0.52 + expectedBm25Type := "bm25" + config.Bm25Config.Bm25k1 = expectedBm25k1 + config.Bm25Config.Bm25b = expectedBm25b + config.Bm25Config.Bm25Type = expectedBm25Type + index := addFtIndex("idxBm25") + checkBm25FtConfig(index, expectedBm25k1, expectedBm25b, expectedBm25Type) + }) +} diff --git a/test/ft/fx.go b/test/ft/fx.go index f11e505d4..1ed7da775 100644 --- a/test/ft/fx.go +++ b/test/ft/fx.go @@ -37,7 +37,7 @@ func createReindexDbInstance(rx *reindexer.Reindexer, namespace string, indexTyp if indexType == "fuzzytext" { // Disable non exact searchers, disable stop word dictionat cfg := reindexer.DefaultFtFuzzyConfig() - cfg.StopWords = []string{} + cfg.StopWords = make([]interface{}, 0) cfg.Stemmers = []string{} cfg.EnableKbLayout = false cfg.EnableTranslit = false @@ -47,7 +47,7 @@ func createReindexDbInstance(rx *reindexer.Reindexer, namespace string, indexTyp config = cfg } else { cfg := reindexer.DefaultFtFastConfig() - cfg.StopWords = []string{} + cfg.StopWords = make([]interface{}, 0) cfg.Stemmers = []string{} cfg.EnableKbLayout = false cfg.EnableTranslit = false diff --git a/test/join_test.go b/test/join_test.go index 46687225c..08837876c 100644 --- a/test/join_test.go +++ b/test/join_test.go @@ -471,6 +471,7 @@ type expectedExplain struct { Field string FieldType string Method string + Description string Keys int Comparators int Matched int @@ -499,6 +500,13 @@ type expectedExplainJoinOnInjections struct { Conditions []expectedExplainConditionInjection } +type expectedExplainSubQuery struct { + Namespace string + Keys int + Field string + Selectors []expectedExplain +} + func checkExplain(t *testing.T, res []reindexer.ExplainSelector, expected []expectedExplain, fieldName string) { require.Equal(t, len(expected), len(res)) for i := 0; i < len(expected); i++ { @@ -514,6 +522,7 @@ func checkExplain(t *testing.T, res []reindexer.ExplainSelector, expected []expe assert.Equalf(t, expected[i].Matched, res[i].Matched, fieldName+expected[i].Field) assert.Equalf(t, expected[i].Keys, res[i].Keys, fieldName+expected[i].Field) assert.Equalf(t, expected[i].Comparators, res[i].Comparators, fieldName+expected[i].Field) + assert.Equalf(t, expected[i].Description, res[i].Description, fieldName+expected[i].Field) if len(expected[i].Preselect) == 0 { assert.Nil(t, res[i].ExplainPreselect, fieldName+expected[i].Field) } else { @@ -561,6 +570,16 @@ func checkExplainJoinOnInjections(t *testing.T, res []reindexer.ExplainJoinOnInj } } +func checkExplainSubqueries(t *testing.T, res []reindexer.ExplainSubQuery, expected []expectedExplainSubQuery) { + require.Equal(t, len(expected), len(res)) + for i := 0; i < len(expected); i++ { + assert.Equal(t, expected[i].Namespace, res[i].Namespace) + assert.Equal(t, expected[i].Field, res[i].Field) + assert.Equal(t, expected[i].Keys, res[i].Keys) + checkExplain(t, res[i].Explain.Selectors, expected[i].Selectors, "") + } +} + func TestExplainJoin(t *testing.T) { nsMain := "test_explain_main" nsJoined := "test_explain_joined" diff --git a/test/queries_test.go b/test/queries_test.go index eae306113..191c84146 100644 --- a/test/queries_test.go +++ b/test/queries_test.go @@ -241,6 +241,8 @@ func init() { tnamespaces["test_items_eqaul_position"] = TestItemEqualPosition{} tnamespaces["test_items_strict"] = TestItem{} tnamespaces["test_items_strict_joined"] = TestJoinItem{} + + tnamespaces["test_items_explain"] = TestItemSimple{} } func FillTestItemsForNot() { @@ -1119,7 +1121,7 @@ func callQueriesSequence(t *testing.T, namespace string, distinct []string, sort newTestQuery(DB, namespace).Distinct(distinct).Sort(sort, desc).ReqTotal(). WhereQuery(t, newTestQuery(DB, namespace).Where("id", reindexer.EQ, mkID(rand.Int()%5000)), - reindexer.ANY, nil). + reindexer.ANY, nil). ExecAndVerify(t) newTestQuery(DB, namespace).Distinct(distinct).Sort(sort, desc).ReqTotal(). @@ -2173,3 +2175,183 @@ func TestQrIdleTimeout(t *testing.T) { } }) } + +func TestQueryExplain(t *testing.T) { + t.Parallel() + + ns := "test_items_explain" + + tx := newTestTx(DB, ns) + for i := 0; i < 5; i++ { + tx.Upsert(TestItemSimple{ID: i, Year: i, Name: randString()}) + } + tx.MustCommit() + + t.Run("Subquery explain check (WhereQuery)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + WhereQuery(t, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 1), reindexer.GE, 0) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 5, + }, + { + Description: "always true", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 1, + }, + }, + }, + }) + }) + + t.Run("Subquery explain check (Where)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + Where("id", reindexer.EQ, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 3)) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "id", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Field: "id", + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + }, + }, + }) + }) + + t.Run("Subquery explain check (Where + WhereQuery)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + Where("id", reindexer.SET, DB.Query(ns).Select("id").Where("year", reindexer.SET, []int{1, 2})). + WhereQuery(t, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 5), reindexer.LE, 10) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 1, + }, + { + Description: "always false", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 0, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Field: "id", + Selectors: []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 5, + }, + { + Field: "year", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 2, + }, + }, + }, + { + Namespace: ns, + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 0, + }, + }, + }, + }) + }) +} diff --git a/test/reindexer_bench_test.go b/test/reindexer_bench_test.go index 3d5802948..63e8a174d 100644 --- a/test/reindexer_bench_test.go +++ b/test/reindexer_bench_test.go @@ -409,6 +409,31 @@ func Benchmark2CondQueryTotal(b *testing.B) { } } +func BenchmarkSubQueryEq(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.EQ, DBD.Query("test_join_items").Select("id").WhereInt32("id", reindexer.EQ, prices[rand.Int()%len(prices)])).Limit(20) + q.MustExec().FetchAll() + } +} + +func BenchmarkSubQuerySet(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + rangeMin := prices[rand.Int()%len(prices)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.SET, DBD.Query("test_join_items").Select("id").WhereInt32("id", reindexer.RANGE, rangeMin, rangeMin + 500)).Limit(20) + q.MustExec().FetchAll() + } +} + +func BenchmarkSubQueryAggregate(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.LT, DBD.Query("test_join_items").AggregateAvg("id").WhereInt32("id", reindexer.SET, prices...).Limit(500)).Limit(20) + q.MustExec().FetchAll() + } +} + func Benchmark2CondQueryLeftJoin(b *testing.B) { ctx := &TestJoinCtx{} for i := 0; i < b.N; i++ { diff --git a/test/select_function_test.go b/test/select_function_test.go index aa8a5caa8..d49c846ff 100644 --- a/test/select_function_test.go +++ b/test/select_function_test.go @@ -1,54 +1,84 @@ package reindexer import ( + "fmt" "testing" "github.com/restream/reindexer/v3" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) type TestSelectTextItem struct { - ID int `reindex:"id,,pk"` - Name string `reindex:"name,text"` + ID int `reindex:"id,,pk"` + Name string `reindex:"name,text"` + _ struct{} `reindex:"id+name=comp_idx,text,composite"` } +const testSelectFuncNs = "test_select_func" + func init() { - tnamespaces["test_select_text_item"] = TestSelectTextItem{} + tnamespaces[testSelectFuncNs] = TestSelectTextItem{} } -func FillTestSelectTextItemsTx(count int, tx *txTest) { - for i := 0; i < count; i++ { - if err := tx.Upsert(&TestSelectTextItem{ +func FillTestSelectTextItems(names []string) { + tx := newTestTx(DB, testSelectFuncNs) + for i := 0; i < len(names); i++ { + item := TestSelectTextItem{ ID: mkID(i), - Name: randLangString(), - }); err != nil { + Name: names[i], + } + if err := tx.Upsert(&item); err != nil { panic(err) } } -} -func FillTestSelectTextItems(count int) { - tx := newTestTx(DB, "test_select_text_item") - FillTestSelectTextItemsTx(count, tx) tx.MustCommit() } -func TestSelectFunction(t *testing.T) { - FillTestSelectTextItems(50) - CheckSelectItemsQueries(t) +func checkSelectFunc(t *testing.T, qt *queryTest, expected string) { + res_slice, err := qt.MustExec(t).FetchAll() + require.NoError(t, err) + require.Len(t, res_slice, 1) + res := res_slice[0].(*TestSelectTextItem) + require.EqualValues(t, expected, res.Name) } -func CheckSelectItemsQueries(t *testing.T) { +func TestSelectFunctions(t *testing.T) { + t.Parallel() + + const ns = testSelectFuncNs + words := []string{"some wordrx", "w(here rx fin)d", "somerxhere"} + FillTestSelectTextItems(words) - first := randLangString() + delimiters := []string{".", "=", " = "} - q1 := DB.Query("test_select_text_item").Where("name", reindexer.EQ, first).Functions("name.snippet(,,3,3)") + t.Run("check select_function highlight", func(t *testing.T) { + for _, delim := range delimiters { + q := DB.Query(ns).Where("name", reindexer.EQ, "rx"). + Functions(fmt.Sprintf("name%shighlight(<,>)", delim)) + checkSelectFunc(t, q, "w(here fin)d") + } + }) - res, _, err := q1.MustExec(t).FetchAllWithRank() - assert.NoError(t, err) + t.Run("check select_function snippet", func(t *testing.T) { + for _, delim := range delimiters { + q := DB.Query(ns).Where("name", reindexer.EQ, "rx"). + Functions(fmt.Sprintf("name%ssnippet(<,>,2,3,'!','#')", delim)) + checkSelectFunc(t, q, "!e fi#") + } + }) - for _, item := range res { - _, ok := item.(*TestSelectTextItem) - assert.True(t, ok, "Unknown type after merge ") - } + t.Run("check select_function snippet_n", func(t *testing.T) { + for _, delim := range delimiters { + q := DB.Query(ns).Where("name", reindexer.EQ, "rx"). + Functions(fmt.Sprintf("name%ssnippet_n('<','>',10,2,pre_delim='[',post_delim=']',left_bound='(',right_bound=')',with_area=1)", delim)) + checkSelectFunc(t, q, "[[2,11]here f]") + } + }) + t.Run("check can't select_function snippet with composite nonstring idx field", func(t *testing.T) { + q := DB.Query(ns).Where("comp_idx", reindexer.EQ, "rx").Functions("comp_idx=snippet(<,>,3,3,'!','!')") + result, err := q.Exec(t).FetchAll() + require.ErrorContains(t, err, "Unable to apply snippet function to the non-string field 'id'") + require.Nil(t, result) + }) }