From 1c30dbf9b068ff2df4f8c747cc046b6433cd48cc Mon Sep 17 00:00:00 2001 From: reindexer-bot <@> Date: Wed, 31 Jan 2024 22:34:09 +0000 Subject: [PATCH] [upd] Update face version to 3.22.0 --- changelog.md | 8 +- cpp_src/CMakeLists.txt | 2 +- cpp_src/client/coroqueryresults.cc | 21 +- cpp_src/client/coroqueryresults.h | 3 +- cpp_src/client/itemimpl.cc | 6 +- .../cmd/reindexer_server/contrib/Dockerfile | 16 +- .../test/test_storage_compatibility.sh | 195 +++++++++++++ cpp_src/core/cjson/cjsondecoder.cc | 2 +- cpp_src/core/cjson/cjsonmodifier.cc | 1 - cpp_src/core/ft/config/baseftconfig.cc | 30 +- cpp_src/core/ft/config/baseftconfig.h | 9 +- cpp_src/core/ft/ftdsl.cc | 44 ++- cpp_src/core/ft/ftdsl.h | 6 +- cpp_src/core/ft/stopwords/stop_ru.cc | 2 +- cpp_src/core/idsetcache.h | 4 +- cpp_src/core/index/indexordered.cc | 18 +- cpp_src/core/index/indextext/ftkeyentry.h | 2 +- cpp_src/core/index/indexunordered.cc | 4 +- cpp_src/core/item.cc | 2 +- cpp_src/core/itemimpl.cc | 10 +- cpp_src/core/itemmodifier.cc | 6 +- cpp_src/core/keyvalue/variant.cc | 22 +- cpp_src/core/keyvalue/variant.h | 19 +- cpp_src/core/namespace/namespaceimpl.cc | 11 +- cpp_src/core/namespace/namespaceimpl.h | 4 +- cpp_src/core/nsselecter/btreeindexiterator.h | 4 +- cpp_src/core/nsselecter/explaincalc.cc | 49 +--- cpp_src/core/nsselecter/explaincalc.h | 79 ++++-- cpp_src/core/nsselecter/fieldscomparator.cc | 4 +- cpp_src/core/nsselecter/nsselecter.cc | 1 + cpp_src/core/nsselecter/nsselecter.h | 3 +- cpp_src/core/nsselecter/selectiterator.cc | 103 +++---- cpp_src/core/nsselecter/selectiterator.h | 8 +- cpp_src/core/nsselecter/sortingcontext.cc | 74 ----- cpp_src/core/nsselecter/sortingcontext.h | 74 ++++- cpp_src/core/payload/payloadfieldvalue.h | 21 +- cpp_src/core/payload/payloadiface.cc | 51 +++- cpp_src/core/payload/payloadiface.h | 18 +- cpp_src/core/payload/payloadvalue.h | 4 +- cpp_src/core/query/query.cc | 10 +- cpp_src/core/query/query.h | 23 +- cpp_src/core/query/sql/sqlencoder.cc | 5 +- cpp_src/core/query/sql/sqlparser.cc | 264 ++++++++++-------- cpp_src/core/query/sql/sqlparser.h | 25 +- cpp_src/core/query/sql/sqlsuggester.cc | 180 ++++++++---- cpp_src/core/query/sql/sqlsuggester.h | 5 +- cpp_src/core/query/sql/sqltokentype.h | 8 +- cpp_src/core/querycache.h | 8 +- cpp_src/core/queryresults/joinresults.cc | 55 +--- cpp_src/core/queryresults/joinresults.h | 41 ++- cpp_src/core/queryresults/queryresults.cc | 28 +- cpp_src/core/queryresults/queryresults.h | 43 +-- cpp_src/core/reindexer_impl/rx_selector.cc | 51 +++- cpp_src/core/reindexer_impl/rx_selector.h | 9 +- cpp_src/core/schema.h | 2 +- .../core/selectfunc/functions/highlight.cc | 4 + cpp_src/core/selectfunc/functions/snippet.cc | 3 + cpp_src/core/selectfunc/selectfuncparser.cc | 20 +- cpp_src/core/selectkeyresult.h | 8 +- cpp_src/core/sortingprioritiestable.cc | 20 +- cpp_src/core/sortingprioritiestable.h | 21 +- cpp_src/estl/h_vector.h | 201 +++++++------ cpp_src/estl/span.h | 14 +- cpp_src/estl/trivial_reverse_iterator.h | 56 ++-- .../gtests/bench/fixtures/api_tv_simple.cc | 55 +++- cpp_src/gtests/bench/fixtures/api_tv_simple.h | 7 +- .../fixtures/api_tv_simple_comparators.cc | 22 +- .../fixtures/api_tv_simple_comparators.h | 5 +- cpp_src/gtests/bench/fixtures/ft_fixture.cc | 97 +++---- cpp_src/gtests/tests/API/base_tests.cc | 145 ++++++++-- cpp_src/gtests/tests/fixtures/ft_api.h | 2 +- .../gtests/tests/fixtures/join_selects_api.h | 12 +- .../gtests/tests/fixtures/queries_verifier.h | 9 + cpp_src/gtests/tests/unit/ft/ft_generic.cc | 81 ++++++ cpp_src/gtests/tests/unit/join_test.cc | 4 +- cpp_src/gtests/tests/unit/queries_test.cc | 72 ++++- .../gtests/tests/unit/string_function_test.cc | 8 + cpp_src/readme.md | 19 +- cpp_src/server/CMakeLists.txt | 2 +- cpp_src/server/contrib/CMakeLists.txt | 2 +- cpp_src/server/contrib/server.md | 23 +- cpp_src/server/contrib/server.yml | 39 ++- cpp_src/server/httpserver.cc | 1 + cpp_src/tools/json2kv.cc | 6 +- cpp_src/tools/serializer.h | 1 + cpp_src/tools/stringstools.cc | 33 +-- cpp_src/tools/stringstools.h | 30 +- cpp_src/vendor/cpp-btree/btree.h | 42 +-- cpp_src/vendor/cpp-btree/btree_container.h | 42 +-- cpp_src/vendor/prometheus/family.h | 4 +- cpp_src/vendor/prometheus/impl/check_names.cc | 10 +- cpp_src/vendor/spdlog/details/os.h | 5 +- dependencies.sh | 4 +- dsl/dsl.go | 14 +- ftfastconfig.go | 15 +- ftfuzzyconfig.go | 8 +- fulltext.md | 163 +++++++---- iterator.go | 15 +- query.go | 12 +- test/config_test.go | 87 ++++++ test/ft/fx.go | 4 +- test/join_test.go | 19 ++ test/queries_test.go | 184 +++++++++++- test/reindexer_bench_test.go | 25 ++ test/select_function_test.go | 80 ++++-- 105 files changed, 2358 insertions(+), 1094 deletions(-) create mode 100755 cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh delete mode 100644 cpp_src/core/nsselecter/sortingcontext.cc diff --git a/changelog.md b/changelog.md index 9147eb029..daa705c8c 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,6 @@ # Version 3.21.0 (15.12.2023) ## Core -- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implemented in the next releases) +- [fea] Added [subqueries](#subqueries-nested-queries) support (`explain` for subqueries will be implement in the next releases) - [fea] Added backtraces/minidump support for Windows platform - [fea] Added query crash tracker support for Windows platform - [fix] Added explicit error for aggregations in joined queries @@ -16,8 +16,8 @@ ## Go connector - [fea] Added Go API and DSL-convertor for subqueries -- [fea] Changed CJSON-to-object conversion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field -- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage at runtime +- [fea] Changed CJSON-to-object convetrion logic for slices: now the single JSON values and fields with multiple JSON-paths will be concatenated together in the target field +- [fea] Added `WithStrictJoinHandlers`. This option allows to validate JoinHandlers usage in runtime - [fix] Fixed panic handling in the CJSON deserialization - [fix] Fixed logging in `cproto`-binding. Error messages will no longer be redirected to stdout instead of user's logger @@ -25,7 +25,7 @@ - [fea] Saved the scroll position on the sorting - [fea] Changed the Server ID range - [fea] Improved the notification about the supported browsers -- [fea] Added the default values to the config form when the default config is used +- [fea] Added the default values to the config form when the default config is using - [fix] Fixed the wrong redirect to a fake database - [fix] Fixed the column order changing on the data sorting - [fix] Fixed the horizontal scroll on the data sorting diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index a500f4e78..60c69cbb0 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -731,7 +731,7 @@ if (NOT WIN32) SET(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "server") SET(DIST_INCLUDE_FILES "tools/errors.h" "tools/serializer.h" "tools/varint.h" "tools/stringstools.h" "tools/customhash.h" "tools/assertrx.h" "tools/jsonstring.h" - "tools/verifying_updater.h" + "tools/verifying_updater.h" "tools/customlocal.h" "core/reindexer.h" "core/type_consts.h" "core/item.h" "core/payload/payloadvalue.h" "core/payload/payloadiface.h" "core/indexopts.h" "core/namespacedef.h" "core/keyvalue/variant.h" "core/keyvalue/geometry.h" "core/sortingprioritiestable.h" "core/rdxcontext.h" "core/activity_context.h" "core/type_consts_helpers.h" "core/payload/fieldsset.h" "core/payload/payloadtype.h" diff --git a/cpp_src/client/coroqueryresults.cc b/cpp_src/client/coroqueryresults.cc index ee3c1649a..4c0360cee 100644 --- a/cpp_src/client/coroqueryresults.cc +++ b/cpp_src/client/coroqueryresults.cc @@ -53,11 +53,19 @@ void CoroQueryResults::Bind(std::string_view rawResult, RPCQrId id) { PayloadType("tmp").clone()->deserialize(ser); }, ResultSerializer::AggsFlag::ClearAggregations); + + auto copyStart = rawResult.begin() + ser.Pos(); + if (const auto rawResLen = std::distance(copyStart, rawResult.end()); rx_unlikely(rawResLen > int64_t(RawResBufT::max_size()))) { + throw Error( + errLogic, + "client::QueryResults::Bind: rawResult buffer overflow. Max size if %d bytes, but %d bytes requested. Try to reduce " + "fetch limit (current limit is %d)", + RawResBufT::max_size(), rawResLen, fetchAmount_); + } + rawResult_.assign(copyStart, rawResult.end()); } catch (const Error &err) { status_ = err; } - - rawResult_.assign(rawResult.begin() + ser.Pos(), rawResult.end()); } void CoroQueryResults::fetchNextResults() { @@ -79,7 +87,14 @@ void CoroQueryResults::fetchNextResults() { ser.GetRawQueryParams(queryParams_, nullptr, ResultSerializer::AggsFlag::DontClearAggregations); - rawResult_.assign(rawResult.begin() + ser.Pos(), rawResult.end()); + auto copyStart = rawResult.begin() + ser.Pos(); + if (const auto rawResLen = std::distance(copyStart, rawResult.end()); rx_unlikely(rawResLen > int64_t(RawResBufT::max_size()))) { + throw Error(errLogic, + "client::QueryResults::fetchNextResults: rawResult buffer overflow. Max size if %d bytes, but %d bytes requested. Try " + "to reduce fetch limit (current limit is %d)", + RawResBufT::max_size(), rawResLen, fetchAmount_); + } + rawResult_.assign(copyStart, rawResult.end()); } h_vector CoroQueryResults::GetNamespaces() const { diff --git a/cpp_src/client/coroqueryresults.h b/cpp_src/client/coroqueryresults.h index c8b43f15c..d791ae426 100644 --- a/cpp_src/client/coroqueryresults.h +++ b/cpp_src/client/coroqueryresults.h @@ -72,6 +72,7 @@ class CoroQueryResults { friend class RPCClient; friend class CoroRPCClient; friend class RPCClientMock; + using RawResBufT = h_vector; CoroQueryResults(net::cproto::CoroClientConnection* conn, NsArray&& nsArray, int fetchFlags, int fetchAmount, seconds timeout); CoroQueryResults(net::cproto::CoroClientConnection* conn, NsArray&& nsArray, std::string_view rawResult, RPCQrId id, int fetchFlags, int fetchAmount, seconds timeout); @@ -81,7 +82,7 @@ class CoroQueryResults { net::cproto::CoroClientConnection* conn_; NsArray nsArray_; - h_vector rawResult_; + RawResBufT rawResult_; RPCQrId queryID_; int fetchOffset_; int fetchFlags_; diff --git a/cpp_src/client/itemimpl.cc b/cpp_src/client/itemimpl.cc index ef15c097d..39217dabd 100644 --- a/cpp_src/client/itemimpl.cc +++ b/cpp_src/client/itemimpl.cc @@ -54,7 +54,7 @@ void ItemImpl::FromCJSON(std::string_view slice) { throw Error(errParseJson, "Internal error - left unparsed data %d", rdser.Pos()); } tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, Variant(p_string(&tupleData_))); + pl.Set(0, Variant(p_string(&tupleData_), Variant::no_hold_t{})); } Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool /*pkOnly*/) { @@ -88,7 +88,7 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool /*pkOnly*/) { if (err.ok()) { // Put tuple to field[0] tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, Variant(p_string(&tupleData_))); + pl.Set(0, Variant(p_string(&tupleData_), Variant::no_hold_t{})); ser_ = WrSerializer(); } return err; @@ -102,7 +102,7 @@ Error ItemImpl::FromMsgPack(std::string_view buf, size_t &offset) { Error err = decoder.Decode(buf, pl, ser_, offset); if (err.ok()) { tupleData_.assign(ser_.Slice().data(), ser_.Slice().size()); - pl.Set(0, Variant(p_string(&tupleData_))); + pl.Set(0, Variant(p_string(&tupleData_), Variant::no_hold_t{})); } return err; } diff --git a/cpp_src/cmd/reindexer_server/contrib/Dockerfile b/cpp_src/cmd/reindexer_server/contrib/Dockerfile index 8eb80a77c..0da240cb0 100644 --- a/cpp_src/cmd/reindexer_server/contrib/Dockerfile +++ b/cpp_src/cmd/reindexer_server/contrib/Dockerfile @@ -1,12 +1,11 @@ -FROM alpine:3.14 AS build +FROM alpine:3.19 AS build RUN cd /tmp && apk update && \ - apk add git curl autoconf automake libtool linux-headers g++ make libunwind-dev grpc-dev grpc protobuf-dev c-ares-dev && \ + apk add git curl autoconf automake libtool linux-headers g++ make libunwind-dev grpc-dev protobuf-dev c-ares-dev patch && \ git clone https://github.com/gperftools/gperftools.git && \ cd gperftools && \ - echo "noinst_PROGRAMS =" >> Makefile.am && \ sed -i s/_sigev_un\._tid/sigev_notify_thread_id/ src/profile-handler.cc && \ - ./autogen.sh && ./configure --disable-dependency-tracking && make -j8 && make install + ./autogen.sh && ./configure --disable-dependency-tracking && make -j8 && make install ADD . /src @@ -19,17 +18,16 @@ RUN ./dependencies.sh && \ make -j8 reindexer_server reindexer_tool && \ make install -C cpp_src/cmd/reindexer_server && \ make install -C cpp_src/cmd/reindexer_tool && \ + make install -C cpp_src/server/grpc && \ cp ../cpp_src/cmd/reindexer_server/contrib/entrypoint.sh /entrypoint.sh && \ rm -rf /usr/local/lib/*.a /usr/local/include /usr/local/lib/libtcmalloc_debug* /usr/local/lib/libtcmalloc_minimal* \ - /usr/local/lib/libprofiler* /usr/local/lib/libtcmalloc.* /usr/local/share/doc /usr/local/share/man /usr/local/share/perl5 /usr/local/bin/pprof* + /usr/local/lib/libprofiler* /usr/local/lib/libtcmalloc.* /usr/local/share/doc /usr/local/share/man /usr/local/share/perl5 /usr/local/bin/pprof* -RUN cd build && make install -C cpp_src/server/grpc - -FROM alpine:3.14 +FROM alpine:3.19 COPY --from=build /usr/local /usr/local COPY --from=build /entrypoint.sh /entrypoint.sh -RUN apk update && apk add libstdc++ libunwind snappy libexecinfo leveldb c-ares libprotobuf xz-libs && rm -rf /var/cache/apk/* +RUN apk update && apk add libstdc++ libunwind snappy leveldb c-ares libprotobuf xz-libs grpc-cpp && rm -rf /var/cache/apk/* ENV RX_DATABASE /db ENV RX_CORELOG stdout diff --git a/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh new file mode 100755 index 000000000..d189d3841 --- /dev/null +++ b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh @@ -0,0 +1,195 @@ +#!/bin/bash +# Task: https://github.com/restream/reindexer/-/issues/1188 +set -e + +function KillAndRemoveServer { + local pid=$1 + kill $pid + wait $pid + yum remove -y 'reindexer*' > /dev/null +} + +function WaitForDB { + # wait until DB is loaded + set +e # disable "exit on error" so the script won't stop when DB's not loaded yet + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + while [[ $is_connected != "test" ]] + do + sleep 2 + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + done + set -e +} + +function CompareNamespacesLists { + local ns_list_actual=$1 + local ns_list_expected=$2 + local pid=$3 + + diff=$(echo ${ns_list_actual[@]} ${ns_list_expected[@]} | tr ' ' '\n' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: namespaces list not changed" + else + echo "##### FAIL: namespaces list was changed" + echo "expected: $ns_list_expected" + echo "actual: $ns_list_actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + +function CompareMemstats { + local actual=$1 + local expected=$2 + local pid=$3 + diff=$(echo ${actual[@]} ${expected[@]} | tr ' ' '\n' | sed 's/\(.*\),$/\1/' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: memstats not changed" + else + echo "##### FAIL: memstats was changed" + echo "expected: $expected" + echo "actual: $actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + + +RX_SERVER_CURRENT_VERSION_RPM="$(basename build/reindexer-*server*.rpm)" +VERSION_FROM_RPM=$(echo "$RX_SERVER_CURRENT_VERSION_RPM" | grep -o '.*server-..') +VERSION=$(echo ${VERSION_FROM_RPM: -2:1}) # one-digit version + +echo "## choose latest release rpm file" +if [ $VERSION == 3 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 3) + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +elif [ $VERSION == 4 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 4) + # replicationstats ns added for v4 + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\n#replicationstats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +else + echo "Unknown version" + exit 1 +fi + +echo "## downloading latest release rpm file: $LATEST_RELEASE" +curl "http://repo.itv.restr.im/itv-api-ng/7/x86_64/$LATEST_RELEASE" --output $LATEST_RELEASE; +echo "## downloading example DB" +curl "https://git.restream.ru/MaksimKravchuk/reindexer_testdata/-/raw/master/big.zip" --output big.zip; +unzip -o big.zip # unzips into mydb_big.rxdump; + +ADDRESS="cproto://127.0.0.1:6534/" +DB_NAME="test" + +memstats_expected=$'[ +{"replication":{"data_hash":24651210926,"data_count":3}}, +{"replication":{"data_hash":6252344969,"data_count":1}}, +{"replication":{"data_hash":37734732881,"data_count":28}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":1024095024522,"data_count":1145}}, +{"replication":{"data_hash":8373644068,"data_count":1315}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":7404222244,"data_count":97}}, +{"replication":{"data_hash":94132837196,"data_count":4}}, +{"replication":{"data_hash":1896088071,"data_count":2}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":-672103903,"data_count":33538}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":6833710705,"data_count":1}}, +{"replication":{"data_hash":5858155773472,"data_count":4500}}, +{"replication":{"data_hash":-473221280268823592,"data_count":65448}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":8288213744,"data_count":3}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":354171024786967,"data_count":3941}}, +{"replication":{"data_hash":-6520334670,"data_count":35886}}, +{"replication":{"data_hash":112772074632,"data_count":281}}, +{"replication":{"data_hash":-12679568198538,"data_count":1623116}} +] +Returned 27 rows' + +echo "##### Forward compatibility test #####" + +DB_PATH=$(pwd)"/rx_db" + +echo "Database: "$DB_PATH + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +# run RX server with disabled logging +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb; +sleep 1; + +namespaces_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_1; +CompareNamespacesLists "${namespaces_1[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_1[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l0 --corelog=none --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_2; +CompareNamespacesLists "${namespaces_2[@]}" "${namespaces_1[@]}" $server_pid; + +memstats_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_2[@]}" "${memstats_1[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; +sleep 1; + +echo "##### Backward compatibility test #####" + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb; +sleep 1; + +namespaces_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_3; +CompareNamespacesLists "${namespaces_3[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_3[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_4; +CompareNamespacesLists "${namespaces_4[@]}" "${namespaces_3[@]}" $server_pid; + +memstats_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_4[@]}" "${memstats_3[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; diff --git a/cpp_src/core/cjson/cjsondecoder.cc b/cpp_src/core/cjson/cjsondecoder.cc index 53a5162cf..7f6e84571 100644 --- a/cpp_src/core/cjson/cjsondecoder.cc +++ b/cpp_src/core/cjson/cjsondecoder.cc @@ -127,7 +127,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs [[nodiscard]] Variant CJsonDecoder::cjsonValueToVariant(TagType tagType, Serializer &rdser, KeyValueType fieldType) { if (fieldType.Is() && tagType != TagType::TAG_STRING) { storage_.emplace_back(rdser.GetRawVariant(KeyValueType{tagType}).As()); - return Variant(p_string(&storage_.back()), false); + return Variant(p_string(&storage_.back()), Variant::no_hold_t{}); } else { return reindexer::cjsonValueToVariant(tagType, rdser, fieldType); } diff --git a/cpp_src/core/cjson/cjsonmodifier.cc b/cpp_src/core/cjson/cjsonmodifier.cc index 8409dbeeb..c8474cc54 100644 --- a/cpp_src/core/cjson/cjsonmodifier.cc +++ b/cpp_src/core/cjson/cjsonmodifier.cc @@ -1,5 +1,4 @@ #include "cjsonmodifier.h" -#include "core/keyvalue/p_string.h" #include "core/type_consts_helpers.h" #include "jsondecoder.h" #include "tagsmatcher.h" diff --git a/cpp_src/core/ft/config/baseftconfig.cc b/cpp_src/core/ft/config/baseftconfig.cc index 2cb4ad7f1..5df2d0198 100644 --- a/cpp_src/core/ft/config/baseftconfig.cc +++ b/cpp_src/core/ft/config/baseftconfig.cc @@ -1,16 +1,14 @@ #include "baseftconfig.h" -#include #include "core/cjson/jsonbuilder.h" #include "core/ft/stopwords/stop.h" #include "tools/errors.h" -#include "tools/jsontools.h" namespace reindexer { BaseFTConfig::BaseFTConfig() { - for (const char **p = stop_words_en; *p != nullptr; p++) stopWords.insert(*p); - for (const char **p = stop_words_ru; *p != nullptr; p++) stopWords.insert(*p); + for (const char **p = stop_words_en; *p != nullptr; p++) stopWords.insert({*p, StopWord::Type::Morpheme}); + for (const char **p = stop_words_ru; *p != nullptr; p++) stopWords.insert({*p, StopWord::Type::Morpheme}); } void BaseFTConfig::parseBase(const gason::JsonNode &root) { @@ -25,7 +23,25 @@ void BaseFTConfig::parseBase(const gason::JsonNode &root) { auto &stopWordsNode = root["stop_words"]; if (!stopWordsNode.empty()) { stopWords.clear(); - for (auto &sw : stopWordsNode) stopWords.insert(sw.As()); + for (auto &sw : stopWordsNode) { + std::string word; + StopWord::Type type = StopWord::Type::Stop; + if (sw.value.getTag() == gason::JsonTag::JSON_STRING) { + word = sw.As(); + } else if (sw.value.getTag() == gason::JsonTag::JSON_OBJECT) { + word = sw["word"].As(); + type = sw["is_morpheme"].As() ? StopWord::Type::Morpheme : StopWord::Type::Stop; + } + + if (std::find_if(word.begin(), word.end(), [](const auto &symbol) { return std::isspace(symbol); }) != word.end()) { + throw Error(errParams, "Stop words can't contain spaces: %s", word); + } + + auto [it, inserted] = stopWords.emplace(std::move(word), type); + if (!inserted && it->type != type) { + throw Error(errParams, "Duplicate stop-word with different morpheme attribute: %s", *it); + } + } } auto &stemmersNode = root["stemmers"]; @@ -80,7 +96,9 @@ void BaseFTConfig::getJson(JsonBuilder &jsonBuilder) const { { auto stopWordsNode = jsonBuilder.Array("stop_words"); for (const auto &sw : stopWords) { - stopWordsNode.Put(nullptr, sw); + auto wordNode = stopWordsNode.Object(nullptr); + wordNode.Put("word", sw); + wordNode.Put("is_morpheme", sw.type == StopWord::Type::Morpheme); } } { diff --git a/cpp_src/core/ft/config/baseftconfig.h b/cpp_src/core/ft/config/baseftconfig.h index f17db982f..aecd0b2c1 100644 --- a/cpp_src/core/ft/config/baseftconfig.h +++ b/cpp_src/core/ft/config/baseftconfig.h @@ -19,6 +19,12 @@ static constexpr int kMinMergeLimitValue = 0; class JsonBuilder; +struct StopWord : std::string { + enum class Type { Stop, Morpheme }; + StopWord(std::string base, Type type = Type::Stop) noexcept : std::string(std::move(base)), type(type) {} + Type type; +}; + class BaseFTConfig { public: struct Synonym { @@ -39,7 +45,8 @@ class BaseFTConfig { bool enableKbLayout = true; bool enableNumbersSearch = false; bool enableWarmupOnNsCopy = false; - fast_hash_set stopWords; + + fast_hash_set stopWords; std::vector synonyms; int logLevel = 0; std::string extraWordSymbols = "-/+"; // word contains symbols (IsAlpa | IsDigit) {IsAlpa | IsDigit | IsExtra} diff --git a/cpp_src/core/ft/ftdsl.cc b/cpp_src/core/ft/ftdsl.cc index e153c2f87..98e78623e 100644 --- a/cpp_src/core/ft/ftdsl.cc +++ b/cpp_src/core/ft/ftdsl.cc @@ -1,6 +1,7 @@ #include "core/ft/ftdsl.h" #include #include +#include "core/ft/config/baseftconfig.h" #include "tools/customlocal.h" #include "tools/errors.h" #include "tools/stringstools.h" @@ -30,7 +31,7 @@ void FtDSLQuery::parse(std::wstring &utf16str) { bool inGroup = false; bool hasAnythingExceptNot = false; int groupCounter = 0; - int maxPatternLen = 1; + size_t maxPatternLen = 1; h_vector fieldsOpts; std::string utf8str; fieldsOpts.insert(fieldsOpts.end(), std::max(int(fields_.size()), 1), {1.0, false}); @@ -49,27 +50,26 @@ void FtDSLQuery::parse(std::wstring &utf16str) { ++it; } else { if (*it == '@') { - it++; + ++it; parseFields(utf16str, it, fieldsOpts); continue; } if (*it == '-') { fte.opts.op = OpNot; - it++; + ++it; } else if (*it == '+') { fte.opts.op = OpAnd; - it++; + ++it; } if (it != utf16str.end() && (*it == '\'' || *it == '\"')) { inGroup = !inGroup; - it++; + ++it; // closing group if (!inGroup) { int distance = 1; if (it != utf16str.end() && *it == '~') { - ++it; - if (it == utf16str.end()) { + if (++it == utf16str.end()) { throw Error(errParseDSL, "Expected digit after '~' operator in phrase, but found nothing"); } if (!std::isdigit(*it)) { @@ -96,17 +96,17 @@ void FtDSLQuery::parse(std::wstring &utf16str) { fteIt->opts.groupNum = groupCounter; } groupTermCounter = 0; - groupCounter++; + ++groupCounter; } } } if (it != utf16str.end() && *it == '=') { fte.opts.exact = true; - it++; + ++it; } if (it != utf16str.end() && *it == '*') { fte.opts.suff = true; - it++; + ++it; } } auto begIt = it; @@ -120,21 +120,21 @@ void FtDSLQuery::parse(std::wstring &utf16str) { } } auto endIt = it; - for (; it != utf16str.end(); it++) { + for (; it != utf16str.end(); ++it) { if (*it == '*') { fte.opts.pref = true; } else if (*it == '~') { fte.opts.typos = true; } else if (*it == '^') { - ++it; - if (it == utf16str.end()) { + if (++it == utf16str.end()) { throw Error(errParseDSL, "Expected digit after '^' operator in search query DSL, but found nothing"); } wchar_t *end = nullptr, *start = &*it; fte.opts.boost = wcstod(start, &end); - it += end - start - 1; - if (end == start) + if (end == start) { throw Error(errParseDSL, "Expected digit after '^' operator in search query DSL, but found '%c' ", char(*start)); + } + it += end - start - 1; } else { break; } @@ -143,18 +143,14 @@ void FtDSLQuery::parse(std::wstring &utf16str) { if (endIt != begIt) { fte.pattern.assign(begIt, endIt); utf16_to_utf8(fte.pattern, utf8str); - if (is_number(utf8str)) fte.opts.number = true; - if (fte.opts.op != OpNot && groupTermCounter == 0) { - // Setting up this flag before stopWords check, to prevent error on DSL with stop word + NOT - hasAnythingExceptNot = true; - } - if (stopWords_.find(utf8str) != stopWords_.end()) { + fte.opts.number = is_number(utf8str); + // Setting up this flag before stopWords check, to prevent error on DSL with stop word + NOT + hasAnythingExceptNot = hasAnythingExceptNot || (fte.opts.op != OpNot && groupTermCounter == 0); + if (auto it = stopWords_.find(utf8str); it != stopWords_.end() && it->type == StopWord::Type::Stop) { continue; } - if (int(fte.pattern.length()) > maxPatternLen) { - maxPatternLen = fte.pattern.length(); - } + maxPatternLen = (fte.pattern.length() > maxPatternLen) ? fte.pattern.length() : maxPatternLen; emplace_back(std::move(fte)); if (inGroup) ++groupTermCounter; } diff --git a/cpp_src/core/ft/ftdsl.h b/cpp_src/core/ft/ftdsl.h index 28e36fcae..a01dbac76 100644 --- a/cpp_src/core/ft/ftdsl.h +++ b/cpp_src/core/ft/ftdsl.h @@ -49,9 +49,11 @@ struct FtDSLVariant { int proc = 0; }; +struct StopWord; + class FtDSLQuery : public RVector { public: - FtDSLQuery(const RHashMap &fields, const fast_hash_set &stopWords, + FtDSLQuery(const RHashMap &fields, const fast_hash_set &stopWords, const std::string &extraWordSymbols) noexcept : fields_(fields), stopWords_(stopWords), extraWordSymbols_(extraWordSymbols) {} void parse(std::wstring &utf16str); @@ -64,7 +66,7 @@ class FtDSLQuery : public RVector { std::function resolver_; const RHashMap &fields_; - const fast_hash_set &stopWords_; + const fast_hash_set &stopWords_; const std::string &extraWordSymbols_; }; diff --git a/cpp_src/core/ft/stopwords/stop_ru.cc b/cpp_src/core/ft/stopwords/stop_ru.cc index 6a63c1a15..7e5472da0 100644 --- a/cpp_src/core/ft/stopwords/stop_ru.cc +++ b/cpp_src/core/ft/stopwords/stop_ru.cc @@ -268,7 +268,7 @@ const char *stop_words_ru[] = { "кроме", "куда", "кругом", - "с т", + "с", "у", "я", "та", diff --git a/cpp_src/core/idsetcache.h b/cpp_src/core/idsetcache.h index 185073c22..6acdb92b5 100644 --- a/cpp_src/core/idsetcache.h +++ b/cpp_src/core/idsetcache.h @@ -75,12 +75,12 @@ T &operator<<(T &os, const IdSetCacheVal &v) { } struct equal_idset_cache_key { - bool operator()(const IdSetCacheKey &lhs, const IdSetCacheKey &rhs) const { + bool operator()(const IdSetCacheKey &lhs, const IdSetCacheKey &rhs) const noexcept { return lhs.cond == rhs.cond && lhs.sort == rhs.sort && *lhs.keys == *rhs.keys; } }; struct hash_idset_cache_key { - size_t operator()(const IdSetCacheKey &s) const { return (s.cond << 8) ^ (s.sort << 16) ^ s.keys->Hash(); } + size_t operator()(const IdSetCacheKey &s) const noexcept { return (size_t(s.cond) << 8) ^ (size_t(s.sort) << 16) ^ s.keys->Hash(); } }; using IdSetCacheBase = LRUCache; diff --git a/cpp_src/core/index/indexordered.cc b/cpp_src/core/index/indexordered.cc index 643562e35..b0ac203ce 100644 --- a/cpp_src/core/index/indexordered.cc +++ b/cpp_src/core/index/indexordered.cc @@ -60,14 +60,13 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray &keys, CondType c auto startIt = this->idx_map.begin(); auto endIt = this->idx_map.end(); auto key1 = *keys.begin(); - switch (condition) { case CondLt: endIt = this->idx_map.lower_bound(static_cast(key1)); break; case CondLe: endIt = this->idx_map.lower_bound(static_cast(key1)); - if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast(key1), endIt->first)) endIt++; + if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast(key1), endIt->first)) ++endIt; break; case CondGt: startIt = this->idx_map.upper_bound(static_cast(key1)); @@ -83,12 +82,11 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray &keys, CondType c if (startIt == this->idx_map.end()) startIt = this->idx_map.upper_bound(static_cast(key1)); endIt = this->idx_map.lower_bound(static_cast(key2)); - if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast(key2), endIt->first)) endIt++; + if (endIt != this->idx_map.end() && !this->idx_map.key_comp()(static_cast(key2), endIt->first)) ++endIt; if (endIt != this->idx_map.end() && this->idx_map.key_comp()(endIt->first, static_cast(key1))) { return SelectKeyResults(std::move(res)); } - } break; case CondAny: case CondEq: @@ -134,9 +132,11 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray &keys, CondType c typename T::iterator startIt, endIt; } ctx = {&this->idx_map, sortId, startIt, endIt}; - auto selector = [&ctx](SelectKeyResult &res, size_t &idsCount) { + auto selector = [&ctx, count](SelectKeyResult &res, size_t &idsCount) { idsCount = 0; - for (auto it = ctx.startIt; it != ctx.endIt && it != ctx.i_map->end(); ++it) { + res.reserve(count); + for (auto it = ctx.startIt; it != ctx.endIt; ++it) { + assertrx(it != ctx.i_map->end()); // FIXME: assertrx_dbg idsCount += it->second.Unsorted().Size(); res.emplace_back(it->second, ctx.sortId); } @@ -145,7 +145,11 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray &keys, CondType c }; if (count > 1 && !opts.distinct && !opts.disableIdSetCache) { - this->tryIdsetCache(keys, condition, sortId, std::move(selector), res); + // Using btree node pointers instead of the real values from the filter and range instead all of the contidions + // to increase cache hits count + VariantArray cacheKeys = {Variant{startIt == this->idx_map.end() ? int64_t(0) : int64_t(&(*startIt))}, + Variant{endIt == this->idx_map.end() ? int64_t(0) : int64_t(&(*endIt))}}; + this->tryIdsetCache(cacheKeys, CondRange, sortId, std::move(selector), res); } else { size_t idsCount; selector(res, idsCount); diff --git a/cpp_src/core/index/indextext/ftkeyentry.h b/cpp_src/core/index/indextext/ftkeyentry.h index 5ac1ce72f..5cf257f50 100644 --- a/cpp_src/core/index/indextext/ftkeyentry.h +++ b/cpp_src/core/index/indextext/ftkeyentry.h @@ -39,7 +39,7 @@ class FtKeyEntry { IdSetPlain& Unsorted() noexcept { return impl_->Unsorted(); } const IdSetPlain& Unsorted() const noexcept { return impl_->Unsorted(); } - IdSetRef Sorted(unsigned sortId) const { return impl_->Sorted(sortId); } + IdSetRef Sorted(unsigned sortId) const noexcept { return impl_->Sorted(sortId); } void UpdateSortedIds(const UpdateSortedContext& ctx) { impl_->UpdateSortedIds(ctx); } void SetVDocID(int vdoc_id) noexcept { impl_->SetVDocID(vdoc_id); } const int& VDocID() const { return impl_->vdoc_id_; } diff --git a/cpp_src/core/index/indexunordered.cc b/cpp_src/core/index/indexunordered.cc index 06af94780..1d24cca2b 100644 --- a/cpp_src/core/index/indexunordered.cc +++ b/cpp_src/core/index/indexunordered.cc @@ -252,6 +252,8 @@ void IndexUnordered::Delete(const Variant &key, IdType id, StringsHolder &str } } +// WARNING: 'keys' is a key for LRUCache and in some cases (for ordered indexes, for example) can contain values, +// which are not correspond to the initial values from queries conditions template bool IndexUnordered::tryIdsetCache(const VariantArray &keys, CondType condition, SortType sortId, const std::function &selector, SelectKeyResult &res) { @@ -271,7 +273,7 @@ bool IndexUnordered::tryIdsetCache(const VariantArray &keys, CondType conditi cache_->Put(ckey, res.MergeIdsets(res.deferedExplicitSort, idsCount)); } } else { - res.push_back(SingleSelectKeyResult(cached.val.ids)); + res.emplace_back(std::move(cached.val.ids)); } } else { scanWin = selector(res, idsCount); diff --git a/cpp_src/core/item.cc b/cpp_src/core/item.cc index dfbed2b79..9dd1b3b18 100644 --- a/cpp_src/core/item.cc +++ b/cpp_src/core/item.cc @@ -102,7 +102,7 @@ Item::FieldRef &Item::FieldRef::operator=(span arr) { } else { itemImpl_->holder_->push_back(elem); } - pl.Set(field_, pos++, Variant(p_string{&itemImpl_->holder_->back()})); + pl.Set(field_, pos++, Variant(p_string{&itemImpl_->holder_->back()}, Variant::no_hold_t{})); } } } else { diff --git a/cpp_src/core/itemimpl.cc b/cpp_src/core/itemimpl.cc index 0c4730d86..1c8abd208 100644 --- a/cpp_src/core/itemimpl.cc +++ b/cpp_src/core/itemimpl.cc @@ -74,7 +74,7 @@ void ItemImpl::ModifyField(const IndexedTagsPath &tagsPath, const VariantArray & } tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } void ItemImpl::SetField(std::string_view jsonPath, const VariantArray &keys, const IndexExpressionEvaluator &ev) { @@ -95,7 +95,7 @@ Error ItemImpl::FromMsgPack(std::string_view buf, size_t &offset) { Error err = msgPackDecoder_->Decode(buf, pl, ser_, offset); if (err.ok()) { tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } return err; } @@ -110,7 +110,7 @@ Error ItemImpl::FromProtobuf(std::string_view buf) { Error err = decoder.Decode(buf, pl, ser_); if (err.ok()) { tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } return err; } @@ -180,7 +180,7 @@ void ItemImpl::FromCJSON(std::string_view slice, bool pkOnly, Recoder *recoder) if (!rdser.Eof()) throw Error(errParseJson, "Internal error - left unparsed data %d", rdser.Pos()); tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { @@ -231,7 +231,7 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { // Put tuple to field[0] tupleData_ = ser_.DetachLStr(); - pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); return err; } diff --git a/cpp_src/core/itemmodifier.cc b/cpp_src/core/itemmodifier.cc index b03f00862..02055a4ed 100644 --- a/cpp_src/core/itemmodifier.cc +++ b/cpp_src/core/itemmodifier.cc @@ -360,8 +360,10 @@ void ItemModifier::modifyCJSON(IdType id, FieldData &field, VariantArray &values } catch (const Error &) { ns_.krefs.resize(0); } + } else if (index.Opts().IsArray()) { + pl.Get(fieldIdx, ns_.krefs, Variant::hold_t{}); } else { - pl.Get(fieldIdx, ns_.krefs, index.Opts().IsArray()); + pl.Get(fieldIdx, ns_.krefs); } if (ns_.krefs == ns_.skrefs) continue; bool needClearCache{false}; @@ -576,7 +578,7 @@ void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, Vari if (index.Opts().IsSparse()) { pl.GetByJsonPath(field.tagspathWithLastIndex(), ns_.skrefs, index.KeyType()); } else { - pl.Get(field.index(), ns_.skrefs, true); + pl.Get(field.index(), ns_.skrefs, Variant::hold_t{}); } // Required when updating index array field with several tagpaths diff --git a/cpp_src/core/keyvalue/variant.cc b/cpp_src/core/keyvalue/variant.cc index 7e68ec9d9..64877208f 100644 --- a/cpp_src/core/keyvalue/variant.cc +++ b/cpp_src/core/keyvalue/variant.cc @@ -16,24 +16,26 @@ namespace reindexer { -Variant::Variant(const PayloadValue &v) : variant_{0, 1, KeyValueType::Composite{}} { new (cast()) PayloadValue(v); } +Variant::Variant(const PayloadValue &v) noexcept : variant_{0, 1, KeyValueType::Composite{}} { new (cast()) PayloadValue(v); } -Variant::Variant(PayloadValue &&v) : variant_{0, 1, KeyValueType::Composite{}} { new (cast()) PayloadValue(std::move(v)); } +Variant::Variant(PayloadValue &&v) noexcept : variant_{0, 1, KeyValueType::Composite{}} { new (cast()) PayloadValue(std::move(v)); } Variant::Variant(const std::string &v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(v)); } Variant::Variant(std::string &&v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(std::move(v))); } -Variant::Variant(const key_string &v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(v); } -Variant::Variant(key_string &&v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(std::move(v)); } -Variant::Variant(const char *v) : Variant(p_string(v)) {} -Variant::Variant(p_string v, bool enableHold) : variant_{0, 0, KeyValueType::String{}} { - if (v.type() == p_string::tagKeyString && enableHold) { +Variant::Variant(const key_string &v) noexcept : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(v); } +Variant::Variant(key_string &&v) noexcept : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(std::move(v)); } +Variant::Variant(const char *v) noexcept : Variant(p_string(v), Variant::no_hold_t{}) {} +Variant::Variant(p_string v, no_hold_t) noexcept : variant_{0, 0, KeyValueType::String{}} { *cast() = v; } +Variant::Variant(p_string v, hold_t) : variant_{0, 0, KeyValueType::String{}} { + if (v.type() == p_string::tagKeyString) { variant_.hold = 1; new (cast()) key_string(v.getKeyString()); } else { *cast() = v; } } +Variant::Variant(p_string v) noexcept : Variant(v, no_hold_t{}) {} Variant::Variant(const VariantArray &values) : variant_{0, 1, KeyValueType::Tuple{}} { WrSerializer ser; @@ -44,7 +46,7 @@ Variant::Variant(const VariantArray &values) : variant_{0, 1, KeyValueType::Tupl new (cast()) key_string(make_key_string(ser.Slice())); } -Variant::Variant(Point p) : Variant{VariantArray{p}} {} +Variant::Variant(Point p) noexcept : Variant{VariantArray{p}} {} Variant::Variant(Uuid uuid) noexcept : uuid_() { if (uuid.data_[0] == 0 && uuid.data_[1] == 0) { @@ -469,7 +471,7 @@ int Variant::RelaxCompare(const Variant &other, const CollateOpts &collateOpts) return Uuid{*this}.Compare(*otherUuid); } else { Uuid{*this}.PutToStr(uuidStrBuf); - return -other.Compare(Variant{uuidStrBufPString, false}); + return -other.Compare(Variant{uuidStrBufPString}); } } else if constexpr (withString == WithString::Yes) { Uuid{*this}.PutToStr(uuidStrBuf); @@ -484,7 +486,7 @@ int Variant::RelaxCompare(const Variant &other, const CollateOpts &collateOpts) return uuid->Compare(Uuid{other}); } else { Uuid{other}.PutToStr(uuidStrBuf); - return Compare(Variant{uuidStrBufPString, false}); + return Compare(Variant{uuidStrBufPString}); } } else if constexpr (withString == WithString::Yes) { Uuid{other}.PutToStr(uuidStrBuf); diff --git a/cpp_src/core/keyvalue/variant.h b/cpp_src/core/keyvalue/variant.h index efdf304ed..6fe5b532a 100644 --- a/cpp_src/core/keyvalue/variant.h +++ b/cpp_src/core/keyvalue/variant.h @@ -24,21 +24,26 @@ class Variant { friend Uuid; public: + struct no_hold_t {}; + struct hold_t {}; + Variant() noexcept : variant_{0, 0, KeyValueType::Null{}, uint64_t{}} {} explicit Variant(int v) noexcept : variant_{0, 0, KeyValueType::Int{}, v} {} explicit Variant(bool v) noexcept : variant_{0, 0, KeyValueType::Bool{}, v} {} explicit Variant(int64_t v) noexcept : variant_{0, 0, KeyValueType::Int64{}, v} {} explicit Variant(double v) noexcept : variant_{0, 0, KeyValueType::Double{}, v} {} - explicit Variant(const char *v); - explicit Variant(p_string v, bool enableHold = true); + explicit Variant(const char *v) noexcept; + Variant(p_string v, no_hold_t) noexcept; + Variant(p_string v, hold_t); + explicit Variant(p_string v) noexcept; explicit Variant(const std::string &v); explicit Variant(std::string &&v); - explicit Variant(const key_string &v); - explicit Variant(key_string &&v); - explicit Variant(const PayloadValue &v); - explicit Variant(PayloadValue &&v); + explicit Variant(const key_string &v) noexcept; + explicit Variant(key_string &&v) noexcept; + explicit Variant(const PayloadValue &v) noexcept; + explicit Variant(PayloadValue &&v) noexcept; explicit Variant(const VariantArray &values); - explicit Variant(Point); + explicit Variant(Point) noexcept; explicit Variant(Uuid) noexcept; Variant(const Variant &other) : uuid_{other.uuid_} { if (!isUuid()) { diff --git a/cpp_src/core/namespace/namespaceimpl.cc b/cpp_src/core/namespace/namespaceimpl.cc index 4c254163e..62d5be100 100644 --- a/cpp_src/core/namespace/namespaceimpl.cc +++ b/cpp_src/core/namespace/namespaceimpl.cc @@ -566,7 +566,7 @@ NamespaceImpl::RollBack_updateItems NamespaceImpl::updateItems(con for (auto fieldIdx : changedFields) { auto& index = *indexes_[fieldIdx]; if ((fieldIdx == 0) || deltaFields <= 0) { - oldValue.Get(fieldIdx, skrefsDel, true); + oldValue.Get(fieldIdx, skrefsDel, Variant::hold_t{}); bool needClearCache{false}; index.Delete(skrefsDel, rowId, *strHolder_, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); @@ -1500,9 +1500,12 @@ void NamespaceImpl::doDelete(IdType id) { if (index.Opts().IsSparse()) { assertrx(index.Fields().getTagsPathsLength() > 0); pl.GetByJsonPath(index.Fields().getTagsPath(0), skrefs, index.KeyType()); + } else if (index.Opts().IsArray()) { + pl.Get(field, skrefs, Variant::hold_t{}); } else { - pl.Get(field, skrefs, index.Opts().IsArray()); + pl.Get(field, skrefs); } + // Delete value from index bool needClearCache{false}; index.Delete(skrefs, id, *strHolder_, needClearCache); @@ -1848,8 +1851,10 @@ void NamespaceImpl::doUpsert(ItemImpl* ritem, IdType id, bool doUpdate) { } catch (const Error&) { krefs.resize(0); } + } else if (index.Opts().IsArray()) { + pl.Get(field, krefs, Variant::hold_t{}); } else { - pl.Get(field, krefs, index.Opts().IsArray()); + pl.Get(field, krefs); } if ((krefs.ArrayType().Is() && skrefs.ArrayType().Is()) || krefs == skrefs) continue; bool needClearCache{false}; diff --git a/cpp_src/core/namespace/namespaceimpl.h b/cpp_src/core/namespace/namespaceimpl.h index f05ab9b76..a3db5e963 100644 --- a/cpp_src/core/namespace/namespaceimpl.h +++ b/cpp_src/core/namespace/namespaceimpl.h @@ -281,12 +281,12 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. void OnConfigUpdated(DBConfigProvider &configProvider, const RdxContext &ctx); StorageOpts GetStorageOpts(const RdxContext &); std::shared_ptr GetSchemaPtr(const RdxContext &ctx) const; - int getNsNumber() const { return schema_ ? schema_->GetProtobufNsNumber() : 0; } + int getNsNumber() const noexcept { return schema_ ? schema_->GetProtobufNsNumber() : 0; } IndexesCacheCleaner GetIndexesCacheCleaner() { return IndexesCacheCleaner{*this}; } // Separate method for the v3/v4 replication compatibility. // It should not be used outside of this scenario void SetTagsMatcher(TagsMatcher &&tm, const RdxContext &ctx); - void SetDestroyFlag() { dbDestroyed_ = true; } + void SetDestroyFlag() noexcept { dbDestroyed_ = true; } Error FlushStorage(const RdxContext &ctx) { const auto flushOpts = StorageFlushOpts().WithImmediateReopen(); auto lck = rLock(ctx); diff --git a/cpp_src/core/nsselecter/btreeindexiterator.h b/cpp_src/core/nsselecter/btreeindexiterator.h index a805955a8..7a0b6bc25 100644 --- a/cpp_src/core/nsselecter/btreeindexiterator.h +++ b/cpp_src/core/nsselecter/btreeindexiterator.h @@ -10,8 +10,8 @@ namespace reindexer { template class BtreeIndexIterator final : public IndexIterator { public: - explicit BtreeIndexIterator(const T& idxMap) : idxMap_(idxMap), first_(idxMap.begin()), last_(idxMap.end()) {} - BtreeIndexIterator(const T& idxMap, const typename T::iterator& first, const typename T::iterator& last) + explicit BtreeIndexIterator(const T& idxMap) noexcept : idxMap_(idxMap), first_(idxMap.begin()), last_(idxMap.end()) {} + BtreeIndexIterator(const T& idxMap, const typename T::iterator& first, const typename T::iterator& last) noexcept : idxMap_(idxMap), first_(first), last_(last) {} ~BtreeIndexIterator() override final = default; diff --git a/cpp_src/core/nsselecter/explaincalc.cc b/cpp_src/core/nsselecter/explaincalc.cc index 642cade9c..4dfff967e 100644 --- a/cpp_src/core/nsselecter/explaincalc.cc +++ b/cpp_src/core/nsselecter/explaincalc.cc @@ -202,6 +202,16 @@ std::string ExplainCalc::GetJSON() { json.Put("postprocess_us"sv, To_us(postprocess_)); json.Put("loop_us"sv, To_us(loop_)); json.Put("general_sort_us"sv, To_us(sort_)); + if (!subqueries_.empty()) { + auto subQuries = json.Array("subqueries"); + for (const auto &sq : subqueries_) { + auto s = subQuries.Object(); + s.Put("namespace", sq.NsName()); + s.Raw("explain", sq.Explain()); + std::visit(overloaded{[&](size_t k) { s.Put("keys", k); }, [&](const std::string &f) { s.Put("field", f); }}, + sq.FieldOrKeys()); + } + } } json.Put("sort_index"sv, sortIndex_); json.Put("sort_by_uncommitted_index"sv, sortOptimization_); @@ -305,45 +315,6 @@ std::string SelectIteratorContainer::explainJSON(const_iterator begin, const_ite return name.str(); } -ExplainCalc::Duration ExplainCalc::lap() noexcept { - auto now = Clock::now(); - Duration d = now - last_point_; - last_point_ = now; - return d; -} - int ExplainCalc::To_us(const ExplainCalc::Duration &d) noexcept { return duration_cast(d).count(); } -void ExplainCalc::StartTiming() noexcept { - if (enabled_) lap(); -} - -void ExplainCalc::StopTiming() noexcept { - if (enabled_) total_ = preselect_ + prepare_ + select_ + postprocess_ + loop_; -} - -void ExplainCalc::AddPrepareTime() noexcept { - if (enabled_) prepare_ += lap(); -} - -void ExplainCalc::AddSelectTime() noexcept { - if (enabled_) select_ += lap(); -} - -void ExplainCalc::AddPostprocessTime() noexcept { - if (enabled_) postprocess_ += lap(); -} - -void ExplainCalc::AddLoopTime() noexcept { - if (enabled_) loop_ += lap(); -} - -void ExplainCalc::StartSort() noexcept { - if (enabled_) sort_start_point_ = Clock::now(); -} - -void ExplainCalc::StopSort() noexcept { - if (enabled_) sort_ = Clock::now() - sort_start_point_; -} - } // namespace reindexer diff --git a/cpp_src/core/nsselecter/explaincalc.h b/cpp_src/core/nsselecter/explaincalc.h index 837dfafde..ab23d8290 100644 --- a/cpp_src/core/nsselecter/explaincalc.h +++ b/cpp_src/core/nsselecter/explaincalc.h @@ -2,6 +2,7 @@ #include #include +#include #include #include "core/type_consts.h" @@ -17,6 +18,24 @@ struct ConditionInjection; typedef std::vector JoinedSelectors; typedef std::vector OnConditionInjections; +class SubQueryExplain { +public: + SubQueryExplain(const std::string& ns, std::string&& exp) : explain_{std::move(exp)}, namespace_{ns} {} + [[nodiscard]] const std::string& NsName() const& noexcept { return namespace_; } + [[nodiscard]] const auto& FieldOrKeys() const& noexcept { return fieldOrKeys_; } + [[nodiscard]] const std::string& Explain() const& noexcept { return explain_; } + void SetFieldOrKeys(std::variant&& fok) noexcept { fieldOrKeys_ = std::move(fok); } + + auto NsName() const&& = delete; + auto FieldOrKeys() const&& = delete; + auto Explain() const&& = delete; + +private: + std::string explain_; + std::string namespace_; + std::variant fieldOrKeys_{size_t(0)}; +}; + class ExplainCalc { public: typedef std::chrono::high_resolution_clock Clock; @@ -29,24 +48,40 @@ class ExplainCalc { ExplainCalc() = default; ExplainCalc(bool enable) noexcept : enabled_(enable) {} - void StartTiming() noexcept; - void StopTiming() noexcept; - - void AddPrepareTime() noexcept; - void AddSelectTime() noexcept; - void AddPostprocessTime() noexcept; - void AddLoopTime() noexcept; + void StartTiming() noexcept { + if (enabled_) lap(); + } + void StopTiming() noexcept { + if (enabled_) total_ = preselect_ + prepare_ + select_ + postprocess_ + loop_; + } + void AddPrepareTime() noexcept { + if (enabled_) prepare_ += lap(); + } + void AddSelectTime() noexcept { + if (enabled_) select_ += lap(); + } + void AddPostprocessTime() noexcept { + if (enabled_) postprocess_ += lap(); + } + void AddLoopTime() noexcept { + if (enabled_) loop_ += lap(); + } void AddIterations(int iters) noexcept { iters_ += iters; } - void StartSort() noexcept; - void StopSort() noexcept; + void StartSort() noexcept { + if (enabled_) sort_start_point_ = Clock::now(); + } + void StopSort() noexcept { + if (enabled_) sort_ = Clock::now() - sort_start_point_; + } void PutCount(int cnt) noexcept { count_ = cnt; } void PutSortIndex(std::string_view index) noexcept { sortIndex_ = index; } - void PutSelectors(const SelectIteratorContainer *qres) noexcept { selectors_ = qres; } - void PutJoinedSelectors(const JoinedSelectors *jselectors) noexcept { jselectors_ = jselectors; } + void PutSelectors(const SelectIteratorContainer* qres) noexcept { selectors_ = qres; } + void PutJoinedSelectors(const JoinedSelectors* jselectors) noexcept { jselectors_ = jselectors; } void SetPreselectTime(Duration preselectTime) noexcept { preselect_ = preselectTime; } - void PutOnConditionInjections(const OnConditionInjections *onCondInjections) noexcept { onInjections_ = onCondInjections; } + void PutOnConditionInjections(const OnConditionInjections* onCondInjections) noexcept { onInjections_ = onCondInjections; } void SetSortOptimization(bool enable) noexcept { sortOptimization_ = enable; } + void SetSubQueriesExplains(std::vector&& subQueriesExpl) noexcept { subqueries_ = std::move(subQueriesExpl); } void LogDump(int logLevel); std::string GetJSON(); @@ -59,14 +94,21 @@ class ExplainCalc { Duration Sort() const noexcept { return sort_; } size_t Iterations() const noexcept { return iters_; } - static int To_us(const Duration &d) noexcept; bool IsEnabled() const noexcept { return enabled_; } + static int To_us(const Duration &d) noexcept; + private: - Duration lap() noexcept; + Duration lap() noexcept { + const auto now = Clock::now(); + Duration d = now - last_point_; + last_point_ = now; + return d; + } time_point last_point_, sort_start_point_; - Duration total_, prepare_ = Duration::zero(); + Duration total_ = Duration::zero(); + Duration prepare_ = Duration::zero(); Duration preselect_ = Duration::zero(); Duration select_ = Duration::zero(); Duration postprocess_ = Duration::zero(); @@ -74,9 +116,10 @@ class ExplainCalc { Duration sort_ = Duration::zero(); std::string_view sortIndex_; - const SelectIteratorContainer *selectors_ = nullptr; - const JoinedSelectors *jselectors_ = nullptr; - const OnConditionInjections *onInjections_ = nullptr; ///< Optional + const SelectIteratorContainer* selectors_ = nullptr; + const JoinedSelectors* jselectors_ = nullptr; + const OnConditionInjections* onInjections_ = nullptr; ///< Optional + std::vector subqueries_; int iters_ = 0; int count_ = 0; diff --git a/cpp_src/core/nsselecter/fieldscomparator.cc b/cpp_src/core/nsselecter/fieldscomparator.cc index 2005861c8..1734a944c 100644 --- a/cpp_src/core/nsselecter/fieldscomparator.cc +++ b/cpp_src/core/nsselecter/fieldscomparator.cc @@ -41,8 +41,8 @@ class ArrayAdapter { [&](reindexer::KeyValueType::Double) noexcept { return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i)}; }, - [&](reindexer::KeyValueType::String) { - return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i), false}; + [&](reindexer::KeyValueType::String) noexcept { + return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i)}; }, [&](reindexer::KeyValueType::Bool) noexcept { return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i)}; }, [&](reindexer::KeyValueType::Int) noexcept { return reindexer::Variant{*reinterpret_cast(ptr_ + sizeof_ * i)}; }, diff --git a/cpp_src/core/nsselecter/nsselecter.cc b/cpp_src/core/nsselecter/nsselecter.cc index 3a7c22bc0..adc1d491e 100644 --- a/cpp_src/core/nsselecter/nsselecter.cc +++ b/cpp_src/core/nsselecter/nsselecter.cc @@ -27,6 +27,7 @@ void NsSelecter::operator()(QueryResults &result, SelectCtx &ctx, const RdxConte auto &explain = ctx.explain; explain = ExplainCalc(ctx.query.GetExplain() || logLevel >= LogInfo); + explain.SetSubQueriesExplains(std::move(ctx.subQueriesExplains)); ActiveQueryScope queryScope(ctx, ns_->optimizationState_, explain, ns_->locker_.IsReadOnly(), ns_->strHolder_.get()); explain.SetPreselectTime(ctx.preResultTimeTotal); diff --git a/cpp_src/core/nsselecter/nsselecter.h b/cpp_src/core/nsselecter/nsselecter.h index 6d4c87f34..329fa1e44 100644 --- a/cpp_src/core/nsselecter/nsselecter.h +++ b/cpp_src/core/nsselecter/nsselecter.h @@ -33,6 +33,7 @@ struct SelectCtx { const Query *parentQuery = nullptr; ExplainCalc explain; bool requiresCrashTracking = false; + std::vector subQueriesExplains; RX_ALWAYS_INLINE bool isMergeQuerySubQuery() const noexcept { return isMergeQuery == IsMergeQuery::Yes && parentQuery; } }; @@ -47,7 +48,7 @@ class NsSelecter { class JoinedNsValueGetter; public: - NsSelecter(NamespaceImpl *parent) : ns_(parent) {} + NsSelecter(NamespaceImpl *parent) noexcept : ns_(parent) {} void operator()(QueryResults &result, SelectCtx &ctx, const RdxContext &); diff --git a/cpp_src/core/nsselecter/selectiterator.cc b/cpp_src/core/nsselecter/selectiterator.cc index 05ab657be..481085245 100644 --- a/cpp_src/core/nsselecter/selectiterator.cc +++ b/cpp_src/core/nsselecter/selectiterator.cc @@ -7,9 +7,6 @@ namespace reindexer { -SelectIterator::SelectIterator(SelectKeyResult res, bool dist, std::string n, IteratorFieldKind fKind, bool forcedFirst) - : SelectKeyResult(std::move(res)), distinct(dist), name(std::move(n)), fieldKind(fKind), forcedFirst_(forcedFirst), type_(Forward) {} - void SelectIterator::Bind(const PayloadType &type, int field) { for (Comparator &cmp : comparators_) cmp.Bind(type, field); } @@ -18,62 +15,69 @@ void SelectIterator::Start(bool reverse, int maxIterations) { const bool explicitSort = applyDeferedSort(maxIterations); isReverse_ = reverse; - lastIt_ = begin(); + const auto begIt = begin(); + lastIt_ = begIt; - for (auto it = begin(); it != end(); it++) { + for (auto it = begIt, endIt = end(); it != endIt; ++it) { if (it->isRange_) { if (isReverse_) { - auto rrBegin = it->rEnd_ - 1; + const auto rrBegin = it->rEnd_ - 1; it->rrEnd_ = it->rBegin_ - 1; it->rrBegin_ = rrBegin; - it->rrIt_ = it->rrBegin_; + it->rrIt_ = rrBegin; } else { it->rIt_ = it->rBegin_; } } else { if (it->useBtree_) { - assertrx(it->set_); + assertrx_dbg(it->set_); if (reverse) { - it->setrbegin_ = it->set_->rbegin(); + const auto setRBegin = it->set_->rbegin(); + it->ritset_ = setRBegin; + it->setrbegin_ = setRBegin; it->setrend_ = it->set_->rend(); - it->ritset_ = it->set_->rbegin(); } else { - it->setbegin_ = it->set_->begin(); + const auto setBegin = it->set_->begin(); + it->itset_ = setBegin; + it->setbegin_ = setBegin; it->setend_ = it->set_->end(); - it->itset_ = it->setbegin_; } } else { if (isReverse_) { - it->rbegin_ = it->ids_.rbegin(); + const auto idsRBegin = it->ids_.rbegin(); it->rend_ = it->ids_.rend(); - it->rit_ = it->ids_.rbegin(); + it->rit_ = idsRBegin; + it->rbegin_ = idsRBegin; } else { - it->begin_ = it->ids_.begin(); + const auto idsBegin = it->ids_.begin(); it->end_ = it->ids_.end(); - it->it_ = it->ids_.begin(); + it->it_ = idsBegin; + it->begin_ = idsBegin; } } } } lastVal_ = isReverse_ ? INT_MAX : INT_MIN; - type_ = isReverse_ ? Reverse : Forward; - if (size() == 1 && begin()->indexForwardIter_) { + + if (size() == 0) { + type_ = OnlyComparator; + lastVal_ = isReverse_ ? INT_MIN : INT_MAX; + } else if (size() == 1 && begIt->indexForwardIter_) { type_ = UnbuiltSortOrdersIndex; - begin()->indexForwardIter_->Start(reverse); + begIt->indexForwardIter_->Start(reverse); } else if (isUnsorted) { type_ = Unsorted; } else if (size() == 1) { if (!isReverse_) { - type_ = begin()->isRange_ ? SingleRange : (explicitSort ? SingleIdSetWithDeferedSort : SingleIdset); + type_ = begIt->isRange_ ? SingleRange : (explicitSort ? SingleIdSetWithDeferedSort : SingleIdset); } else { - type_ = begin()->isRange_ ? RevSingleRange : (explicitSort ? RevSingleIdSetWithDeferedSort : RevSingleIdset); + type_ = begIt->isRange_ ? RevSingleRange : (explicitSort ? RevSingleIdSetWithDeferedSort : RevSingleIdset); } + } else { + type_ = isReverse_ ? Reverse : Forward; } - if (size() == 0) { - type_ = OnlyComparator; - lastVal_ = isReverse_ ? INT_MIN : INT_MAX; - } + ClearDistinct(); } @@ -81,7 +85,7 @@ void SelectIterator::Start(bool reverse, int maxIterations) { bool SelectIterator::nextFwd(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; int minVal = INT_MAX; - for (auto it = begin(); it != end(); it++) { + for (auto it = begin(), endIt = end(); it != endIt; ++it) { if (it->useBtree_) { if (it->itset_ != it->setend_) { it->itset_ = it->set_->upper_bound(lastVal_); @@ -100,7 +104,7 @@ bool SelectIterator::nextFwd(IdType minHint) noexcept { } } else if (!it->isRange_ && it->it_ != it->end_) { - for (; it->it_ != it->end_ && *it->it_ <= lastVal_; it->it_++) { + for (; it->it_ != it->end_ && *it->it_ <= lastVal_; ++it->it_) { } if (it->it_ != it->end_ && *it->it_ < minVal) { minVal = *it->it_; @@ -117,7 +121,7 @@ bool SelectIterator::nextRev(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; int maxVal = INT_MIN; - for (auto it = begin(); it != end(); it++) { + for (auto it = begin(), endIt = end(); it != endIt; ++it) { if (it->useBtree_ && it->ritset_ != it->setrend_) { for (; it->ritset_ != it->setrend_ && *it->ritset_ >= lastVal_; ++it->ritset_) { } @@ -133,7 +137,7 @@ bool SelectIterator::nextRev(IdType maxHint) noexcept { lastIt_ = it; } } else if (!it->isRange_ && !it->useBtree_ && it->rit_ != it->rend_) { - for (; it->rit_ != it->rend_ && *it->rit_ >= lastVal_; it->rit_++) { + for (; it->rit_ != it->rend_ && *it->rit_ >= lastVal_; ++it->rit_) { } if (it->rit_ != it->rend_ && *it->rit_ > maxVal) { maxVal = *it->rit_; @@ -160,7 +164,7 @@ bool SelectIterator::nextFwdSingleIdset(IdType minHint) noexcept { it->it_ = std::upper_bound(it->it_, it->end_, lastVal_); } } else { - for (; it->it_ != it->end_ && *it->it_ <= lastVal_; it->it_++) { + for (; it->it_ != it->end_ && *it->it_ <= lastVal_; ++it->it_) { } } lastVal_ = (it->it_ != it->end_) ? *it->it_ : INT_MAX; @@ -174,11 +178,11 @@ bool SelectIterator::nextRevSingleIdset(IdType maxHint) noexcept { auto it = begin(); if (it->useBtree_) { - for (; it->ritset_ != it->setrend_ && *it->ritset_ >= lastVal_; it->ritset_++) { + for (; it->ritset_ != it->setrend_ && *it->ritset_ >= lastVal_; ++it->ritset_) { } lastVal_ = (it->ritset_ != it->setrend_) ? *it->ritset_ : INT_MIN; } else { - for (; it->rit_ != it->rend_ && *it->rit_ >= lastVal_; it->rit_++) { + for (; it->rit_ != it->rend_ && *it->rit_ >= lastVal_; ++it->rit_) { } lastVal_ = (it->rit_ != it->rend_) ? *it->rit_ : INT_MIN; } @@ -192,41 +196,44 @@ bool SelectIterator::nextUnbuiltSortOrders() noexcept { return begin()->indexFor bool SelectIterator::nextFwdSingleRange(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; - if (lastVal_ < begin()->rBegin_) lastVal_ = begin()->rBegin_ - 1; + const auto begIt = begin(); + if (lastVal_ < begIt->rBegin_) lastVal_ = begIt->rBegin_ - 1; - lastVal_ = (lastVal_ < begin()->rEnd_) ? lastVal_ + 1 : begin()->rEnd_; - if (lastVal_ == begin()->rEnd_) lastVal_ = INT_MAX; + lastVal_ = (lastVal_ < begIt->rEnd_) ? lastVal_ + 1 : begIt->rEnd_; + if (lastVal_ == begIt->rEnd_) lastVal_ = INT_MAX; return (lastVal_ != INT_MAX); } bool SelectIterator::nextRevSingleRange(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; - if (lastVal_ > begin()->rrBegin_) lastVal_ = begin()->rrBegin_ + 1; + const auto begIt = begin(); + if (lastVal_ > begIt->rrBegin_) lastVal_ = begIt->rrBegin_ + 1; - lastVal_ = (lastVal_ > begin()->rrEnd_) ? lastVal_ - 1 : begin()->rrEnd_; - if (lastVal_ == begin()->rrEnd_) lastVal_ = INT_MIN; + lastVal_ = (lastVal_ > begIt->rrEnd_) ? lastVal_ - 1 : begIt->rrEnd_; + if (lastVal_ == begIt->rrEnd_) lastVal_ = INT_MIN; return (lastVal_ != INT_MIN); } // Unsorted next implementation bool SelectIterator::nextUnsorted() noexcept { - if (lastIt_ == end()) { + const auto endIt = end(); + if (lastIt_ == endIt) { return false; } else if (lastIt_->it_ == lastIt_->end_) { ++lastIt_; - while (lastIt_ != end()) { + while (lastIt_ != endIt) { if (lastIt_->it_ != lastIt_->end_) { lastVal_ = *lastIt_->it_; - lastIt_->it_++; + ++lastIt_->it_; return true; } ++lastIt_; } } else { lastVal_ = *lastIt_->it_; - lastIt_->it_++; + ++lastIt_->it_; return true; } @@ -236,8 +243,9 @@ bool SelectIterator::nextUnsorted() noexcept { void SelectIterator::ExcludeLastSet(const PayloadValue &value, IdType rowId, IdType properRowId) { for (auto &comp : comparators_) comp.ExcludeDistinct(value, properRowId); if (type_ == UnbuiltSortOrdersIndex) { - if (begin()->indexForwardIter_->Value() == rowId) { - begin()->indexForwardIter_->ExcludeLastSet(); + const auto begIt = begin(); + if (begIt->indexForwardIter_->Value() == rowId) { + begIt->indexForwardIter_->ExcludeLastSet(); } } else if (!End() && lastIt_ != end() && lastVal_ == rowId) { assertrx(!lastIt_->isRange_); @@ -284,12 +292,13 @@ double SelectIterator::Cost(int expectedIterations) const noexcept { // Comparatos with non index fields must have much higher cost, than comparators with index fields result = jsonPathComparators ? (8 * double(expectedIterations) + jsonPathComparators + 1) : (double(expectedIterations) + 1); } + const auto sz = size(); if (distinct) { - result += size(); + result += sz; } else if (type_ != SingleIdSetWithDeferedSort && type_ != RevSingleIdSetWithDeferedSort && !deferedExplicitSort) { - result += static_cast(GetMaxIterations()) * size(); + result += static_cast(GetMaxIterations()) * sz; } else { - result += static_cast(CostWithDefferedSort(size(), GetMaxIterations(), expectedIterations)); + result += static_cast(CostWithDefferedSort(sz, GetMaxIterations(), expectedIterations)); } return isNotOperation_ ? expectedIterations + result : result; } diff --git a/cpp_src/core/nsselecter/selectiterator.h b/cpp_src/core/nsselecter/selectiterator.h index 55286e183..ef4606b1e 100644 --- a/cpp_src/core/nsselecter/selectiterator.h +++ b/cpp_src/core/nsselecter/selectiterator.h @@ -24,7 +24,13 @@ class SelectIterator : public SelectKeyResult { }; SelectIterator() = default; - SelectIterator(SelectKeyResult res, bool distinct, std::string name, IteratorFieldKind fieldKind, bool forcedFirst = false); + SelectIterator(SelectKeyResult res, bool dist, std::string n, IteratorFieldKind fKind, bool forcedFirst = false) noexcept + : SelectKeyResult(std::move(res)), + distinct(dist), + name(std::move(n)), + fieldKind(fKind), + forcedFirst_(forcedFirst), + type_(Forward) {} /// Starts iteration process: prepares /// object for further work. diff --git a/cpp_src/core/nsselecter/sortingcontext.cc b/cpp_src/core/nsselecter/sortingcontext.cc deleted file mode 100644 index 65376dbd9..000000000 --- a/cpp_src/core/nsselecter/sortingcontext.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include "sortingcontext.h" -#include "core/index/index.h" -#include "core/query/query.h" - -namespace reindexer { - -Index *SortingContext::sortIndex() const noexcept { - if (entries.empty()) return nullptr; - return std::visit(overloaded{[](const OneOf &) noexcept -> Index * { return nullptr; }, - [](const FieldEntry &e) noexcept { return e.index; }}, - entries[0]); -} - -const Index *SortingContext::sortIndexIfOrdered() const noexcept { - if (entries.empty() || !isIndexOrdered() || !enableSortOrders) return nullptr; - return std::visit(overloaded{[](const OneOf &) noexcept -> Index * { return nullptr; }, - [](const FieldEntry &e) noexcept { return e.index; }}, - entries[0]); -} - -int SortingContext::sortId() const noexcept { - if (!enableSortOrders) return 0; - Index *sortIdx = sortIndex(); - return sortIdx ? sortIdx->SortId() : 0; -} - -bool SortingContext::isIndexOrdered() const noexcept { - if (entries.empty()) return false; - return std::visit(overloaded{[](const OneOf &) noexcept { return false; }, - [](const FieldEntry &e) noexcept { return e.index && e.index->IsOrdered(); }}, - entries[0]); -} - -bool SortingContext::isOptimizationEnabled() const noexcept { return (uncommitedIndex >= 0) && sortIndex(); } - -const SortingContext::Entry &SortingContext::getFirstColumnEntry() const noexcept { - assertrx(!entries.empty()); - return entries[0]; -} - -void SortingContext::resetOptimization() noexcept { - uncommitedIndex = -1; - if (!entries.empty()) { - std::visit( - overloaded{[](const OneOf &) noexcept {}, [](FieldEntry &e) noexcept { e.index = nullptr; }}, - entries[0]); - } -} - -SortingOptions::SortingOptions(const SortingContext &sortingContext) noexcept - : forcedMode{sortingContext.forcedMode}, - multiColumn{sortingContext.entries.size() > 1}, - haveExpression{!sortingContext.expressions.empty()} { - if (sortingContext.entries.empty()) { - usingGeneralAlgorithm = false; - byBtreeIndex = false; - } else { - std::visit(overloaded{[](const OneOf &) noexcept {}, - [&](const SortingContext::FieldEntry &sortEntry) noexcept { - if (sortEntry.index && sortEntry.index->IsOrdered()) { - byBtreeIndex = (sortingContext.isOptimizationEnabled() || sortingContext.enableSortOrders); - multiColumnByBtreeIndex = (byBtreeIndex && multiColumn); - } - usingGeneralAlgorithm = !byBtreeIndex; - }}, - sortingContext.entries[0]); - } -} - -bool SortingOptions::postLoopSortingRequired() const noexcept { - return multiColumn || usingGeneralAlgorithm || forcedMode || haveExpression; -} - -} // namespace reindexer diff --git a/cpp_src/core/nsselecter/sortingcontext.h b/cpp_src/core/nsselecter/sortingcontext.h index 22bccbcec..a246eb7d4 100644 --- a/cpp_src/core/nsselecter/sortingcontext.h +++ b/cpp_src/core/nsselecter/sortingcontext.h @@ -1,5 +1,6 @@ #pragma once +#include "core/index/index.h" #include "core/indexopts.h" #include "estl/h_vector.h" #include "sortexpression.h" @@ -27,13 +28,49 @@ struct SortingContext { }; using Entry = std::variant; - [[nodiscard]] int sortId() const noexcept; - [[nodiscard]] Index *sortIndex() const noexcept; - [[nodiscard]] const Index *sortIndexIfOrdered() const noexcept; - [[nodiscard]] bool isOptimizationEnabled() const noexcept; - [[nodiscard]] bool isIndexOrdered() const noexcept; - [[nodiscard]] const Entry &getFirstColumnEntry() const noexcept; - void resetOptimization() noexcept; + [[nodiscard]] int sortId() const noexcept { + if (!enableSortOrders) return 0; + const Index *sortIdx = sortIndex(); + return sortIdx ? int(sortIdx->SortId()) : 0; + } + [[nodiscard]] Index *sortIndex() const noexcept { + if (entries.empty()) return nullptr; + // get_if is truly noexcept, so using it instead of std::visit + if (const auto *fe = std::get_if(&entries[0]); fe) { + return fe->index; + } + return nullptr; + } + [[nodiscard]] const Index *sortIndexIfOrdered() const noexcept { + if (entries.empty() || !isIndexOrdered() || !enableSortOrders) return nullptr; + // get_if is truly noexcept, so using it instead of std::visit + if (const auto *fe = std::get_if(&entries[0]); fe) { + return fe->index; + } + return nullptr; + } + [[nodiscard]] bool isOptimizationEnabled() const noexcept { return (uncommitedIndex >= 0) && sortIndex(); } + [[nodiscard]] bool isIndexOrdered() const noexcept { + if (entries.empty()) return false; + // get_if is truly noexcept, so using it instead of std::visit + if (const auto *fe = std::get_if(&entries[0]); fe) { + return fe->index && fe->index->IsOrdered(); + } + return false; + } + [[nodiscard]] const Entry &getFirstColumnEntry() const noexcept { + assertrx(!entries.empty()); + return entries[0]; + } + void resetOptimization() noexcept { + uncommitedIndex = -1; + if (!entries.empty()) { + // get_if is truly noexcept, so using it instead of std::visit + if (auto *fe = std::get_if(&entries[0]); fe) { + fe->index = nullptr; + } + } + } bool enableSortOrders = false; h_vector entries; @@ -44,8 +81,27 @@ struct SortingContext { }; struct SortingOptions { - SortingOptions(const SortingContext &sortingContext) noexcept; - [[nodiscard]] bool postLoopSortingRequired() const noexcept; + SortingOptions(const SortingContext &sortingContext) noexcept + : forcedMode{sortingContext.forcedMode}, + multiColumn{sortingContext.entries.size() > 1}, + haveExpression{!sortingContext.expressions.empty()} { + if (sortingContext.entries.empty()) { + usingGeneralAlgorithm = false; + byBtreeIndex = false; + } else { + // get_if is truly noexcept, so using it instead of std::visit + if (auto *sortEntry = std::get_if(&sortingContext.entries[0]); sortEntry) { + if (sortEntry->index && sortEntry->index->IsOrdered()) { + byBtreeIndex = (sortingContext.isOptimizationEnabled() || sortingContext.enableSortOrders); + multiColumnByBtreeIndex = (byBtreeIndex && multiColumn); + } + usingGeneralAlgorithm = !byBtreeIndex; + } + } + } + [[nodiscard]] bool postLoopSortingRequired() const noexcept { + return multiColumn || usingGeneralAlgorithm || forcedMode || haveExpression; + } bool byBtreeIndex = false; bool usingGeneralAlgorithm = true; diff --git a/cpp_src/core/payload/payloadfieldvalue.h b/cpp_src/core/payload/payloadfieldvalue.h index 011b01ee8..b9dfcf94a 100644 --- a/cpp_src/core/payload/payloadfieldvalue.h +++ b/cpp_src/core/payload/payloadfieldvalue.h @@ -47,19 +47,36 @@ class PayloadFieldValue { abort(); }); } - Variant Get(bool enableHold = false) const { + Variant Get() noexcept { return Get(Variant::no_hold_t{}); } + template + Variant Get(HoldT h) const noexcept(noexcept(Variant(std::declval(), h))) { return t_.Type().EvaluateOneOf( [&](KeyValueType::Bool) noexcept { return Variant(*reinterpret_cast(p_)); }, [&](KeyValueType::Int) noexcept { return Variant(*reinterpret_cast(p_)); }, [&](KeyValueType::Int64) noexcept { return Variant(*reinterpret_cast(p_)); }, [&](KeyValueType::Double) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::String) { return Variant(*reinterpret_cast(p_), enableHold); }, + [&](KeyValueType::String) noexcept(noexcept(Variant(std::declval(), h))) { + return Variant(*reinterpret_cast(p_), h); + }, [&](KeyValueType::Uuid) noexcept { return Variant(*reinterpret_cast(p_)); }, [](OneOf) noexcept -> Variant { assertrx(0); abort(); }); } + // Variant Get(Variant::hold_t) const noexcept { + // return t_.Type().EvaluateOneOf( + // [&](KeyValueType::Bool) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::Int) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::Int64) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::Double) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::String) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [&](KeyValueType::Uuid) noexcept { return Variant(*reinterpret_cast(p_)); }, + // [](OneOf) noexcept -> Variant { + // assertrx(0); + // abort(); + // }); + // } size_t Hash() const noexcept { return t_.Type().EvaluateOneOf( [&](KeyValueType::Bool) noexcept { return std::hash()(*reinterpret_cast(p_)); }, diff --git a/cpp_src/core/payload/payloadiface.cc b/cpp_src/core/payload/payloadiface.cc index 9c6a0fa02..5e2cd5dea 100644 --- a/cpp_src/core/payload/payloadiface.cc +++ b/cpp_src/core/payload/payloadiface.cc @@ -10,8 +10,39 @@ namespace reindexer { +// Get element(s) by field index +template +void PayloadIface::Get(int field, VariantArray &keys, Variant::hold_t h) const { + get(field, keys, h); +} +template +void PayloadIface::Get(int field, VariantArray &keys) const { + get(field, keys, Variant::no_hold_t{}); +} + +// Get element by field and array index +template +Variant PayloadIface::Get(int field, int idx, Variant::hold_t h) const { + return get(field, idx, h); +} +template +Variant PayloadIface::Get(int field, int idx) const { + return get(field, idx, Variant::no_hold_t{}); +} + +// Get element(s) by field name template -void PayloadIface::Get(int field, VariantArray &keys, bool enableHold) const { +void PayloadIface::Get(std::string_view field, VariantArray &kvs, Variant::hold_t h) const { + get(t_.FieldByName(field), kvs, h); +} +template +void PayloadIface::Get(std::string_view field, VariantArray &kvs) const { + get(t_.FieldByName(field), kvs, Variant::no_hold_t{}); +} + +template +template +void PayloadIface::get(int field, VariantArray &keys, HoldT h) const { assertrx(field < NumFields()); keys.clear(); if (t_.Field(field).IsArray()) { @@ -20,15 +51,16 @@ void PayloadIface::Get(int field, VariantArray &keys, bool enableHold) const for (int i = 0; i < arr->len; i++) { PayloadFieldValue pv(t_.Field(field), v_->Ptr() + arr->offset + i * t_.Field(field).ElemSizeof()); - keys.push_back(pv.Get(enableHold)); + keys.push_back(pv.Get(h)); } } else { - keys.push_back(Field(field).Get(enableHold)); + keys.push_back(Field(field).Get(h)); } } template -Variant PayloadIface::Get(int field, int idx, bool enableHold) const { +template +Variant PayloadIface::get(int field, int idx, HoldT h) const { assertrx(field < NumFields()); if (t_.Field(field).IsArray()) { @@ -36,20 +68,13 @@ Variant PayloadIface::Get(int field, int idx, bool enableHold) const { assertf(idx < arr->len, "Field '%s.%s' bound exceed idx %d > len %d", Type().Name(), Type().Field(field).Name(), idx, arr->len); PayloadFieldValue pv(t_.Field(field), v_->Ptr() + arr->offset + idx * t_.Field(field).ElemSizeof()); - return pv.Get(enableHold); - + return pv.Get(h); } else { assertf(idx == 0, "Field '%s.%s' is not array, can't get idx %d", Type().Name(), Type().Field(field).Name(), idx); - return Field(field).Get(enableHold); + return Field(field).Get(h); } } -// Get element(s) by field index -template -void PayloadIface::Get(std::string_view field, VariantArray &kvs, bool enableHold) const { - Get(t_.FieldByName(field), kvs, enableHold); -} - template void PayloadIface::GetByJsonPath(std::string_view jsonPath, TagsMatcher &tagsMatcher, VariantArray &kvs, KeyValueType expectedType) const { diff --git a/cpp_src/core/payload/payloadiface.h b/cpp_src/core/payload/payloadiface.h index b95f6a791..d1931b3f0 100644 --- a/cpp_src/core/payload/payloadiface.h +++ b/cpp_src/core/payload/payloadiface.h @@ -27,9 +27,11 @@ class PayloadIface { void Reset() noexcept { memset(v_->Ptr(), 0, t_.TotalSize()); } // Get element(s) by field index - void Get(int field, VariantArray &, bool enableHold = false) const; + void Get(int field, VariantArray &, Variant::hold_t) const; + void Get(int field, VariantArray &) const; // Get element by field and array index - [[nodiscard]] Variant Get(int field, int idx, bool enableHold = false) const; + [[nodiscard]] Variant Get(int field, int idx, Variant::hold_t) const; + [[nodiscard]] Variant Get(int field, int idx) const; // Get array as span of typed elements template @@ -105,8 +107,9 @@ class PayloadIface { template ::value>::type * = nullptr> T CopyTo(PayloadType t, bool newFields = true); - // Get element(s) by field index - void Get(std::string_view field, VariantArray &, bool enableHold = false) const; + // Get element(s) by field name + void Get(std::string_view field, VariantArray &, Variant::hold_t) const; + void Get(std::string_view field, VariantArray &) const; // Get element(s) by json path void GetByJsonPath(std::string_view jsonPath, TagsMatcher &tagsMatcher, VariantArray &, KeyValueType expectedType) const; @@ -163,6 +166,7 @@ class PayloadIface { void GetJSON(const TagsMatcher &tm, WrSerializer &ser); private: + enum class HoldPolicy : bool { Hold, NoHold }; template ::value>::type * = nullptr> T CopyWithNewOrUpdatedFields(PayloadType t); @@ -174,6 +178,12 @@ class PayloadIface { void getByJsonPath(const P &path, VariantArray &, KeyValueType expectedType) const; template ::value>::type * = nullptr> void setArray(int field, const VariantArray &keys, bool append); + template + void get(int field, VariantArray &, HoldT h) const; + template + [[nodiscard]] Variant get(int field, int idx, HoldT h) const; + template + void get(std::string_view field, VariantArray &, HoldT h) const; // Array of elements types , not owning const PayloadTypeImpl &t_; diff --git a/cpp_src/core/payload/payloadvalue.h b/cpp_src/core/payload/payloadvalue.h index b94ba1198..87772037b 100644 --- a/cpp_src/core/payload/payloadvalue.h +++ b/cpp_src/core/payload/payloadvalue.h @@ -54,8 +54,8 @@ class PayloadValue { void Resize(size_t oldSize, size_t newSize); // Get data pointer uint8_t *Ptr() const noexcept { return p_ + sizeof(dataHeader); } - void SetLSN(int64_t lsn) { header()->lsn = lsn; } - int64_t GetLSN() const { return p_ ? header()->lsn : 0; } + void SetLSN(int64_t lsn) noexcept { header()->lsn = lsn; } + int64_t GetLSN() const noexcept { return p_ ? header()->lsn : 0; } bool IsFree() const noexcept { return bool(p_ == nullptr); } void Free() noexcept { release(); } size_t GetCapacity() const noexcept { return header()->cap; } diff --git a/cpp_src/core/query/query.cc b/cpp_src/core/query/query.cc index c8e5fe0fa..22d8f68b8 100644 --- a/cpp_src/core/query/query.cc +++ b/cpp_src/core/query/query.cc @@ -293,7 +293,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { Debug(ser.GetVarUint()); break; case QueryStrictMode: - strictMode_ = StrictMode(ser.GetVarUint()); + Strict(StrictMode(ser.GetVarUint())); break; case QueryLimit: count_ = ser.GetVarUint(); @@ -305,7 +305,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { calcTotal_ = CalcTotalMode(ser.GetVarUint()); break; case QuerySelectFilter: - selectFilter_.push_back(std::string(ser.GetVString())); + selectFilter_.emplace_back(ser.GetVString()); break; case QueryEqualPosition: { const unsigned bracketPosition = ser.GetVarUint(); @@ -315,16 +315,16 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { break; } case QueryExplain: - explain_ = true; + Explain(true); break; case QueryWithRank: withRank_ = true; break; case QuerySelectFunction: - selectFunctions_.push_back(std::string(ser.GetVString())); + selectFunctions_.emplace_back(ser.GetVString()); break; case QueryDropField: { - Drop(std::string(ser.GetVString())); + Drop(ser.GetVString()); break; } case QueryUpdateFieldV2: { diff --git a/cpp_src/core/query/query.h b/cpp_src/core/query/query.h index bad029f06..f4d5f6c3f 100644 --- a/cpp_src/core/query/query.h +++ b/cpp_src/core/query/query.h @@ -86,13 +86,15 @@ class Query { /// @param cond - type of condition. /// @param val - value of index to be compared with. /// @return Query object ready to be executed. - template > * = nullptr> - Query &Where(Str &&field, CondType cond, Input val) & { - return Where(std::forward(field), cond, {std::forward(val)}); + template > * = nullptr, + std::enable_if_t> * = nullptr> + Query &Where(Str &&field, CondType cond, Input &&val) & { + return Where(std::forward(field), cond, VariantArray{Variant{std::forward(val)}}); } - template > * = nullptr> - [[nodiscard]] Query &&Where(Str &&field, CondType cond, Input val) && { - return std::move(Where(std::forward(field), cond, {std::move(val)})); + template > * = nullptr, + std::enable_if_t> * = nullptr> + [[nodiscard]] Query &&Where(Str &&field, CondType cond, Input &&val) && { + return std::move(Where(std::forward(field), cond, VariantArray{Variant{std::forward(val)}})); } /// Adds a condition with several values. Analog to sql Where clause. @@ -220,6 +222,7 @@ class Query { } else { q.checkSubQueryWithData(); if (!q.selectFilter_.empty() && !q.HasLimit() && !q.HasOffset()) { + // Transforms main query condition into subquerie's condition q.sortingEntries_.clear(); q.Where(q.selectFilter_[0], cond, std::move(values)); q.selectFilter_.clear(); @@ -249,6 +252,14 @@ class Query { [[nodiscard]] Query &&Where(Query &&q, CondType cond, std::initializer_list values) && { return std::move(Where(std::move(q), cond, VariantArray::Create(values))); } + template > * = nullptr> + [[nodiscard]] Query &Where(Query &&q, CondType cond, Input &&val) & { + return Where(std::move(q), cond, VariantArray{Variant{std::forward(val)}}); + } + template > * = nullptr> + [[nodiscard]] Query &&Where(Query &&q, CondType cond, Input &&val) && { + return std::move(Where(std::move(q), cond, VariantArray{Variant{std::forward(val)}})); + } template > * = nullptr> Query &Where(Str &&field, CondType cond, Query &&q) & { diff --git a/cpp_src/core/query/sql/sqlencoder.cc b/cpp_src/core/query/sql/sqlencoder.cc index 6a745bf1c..b90f97685 100644 --- a/cpp_src/core/query/sql/sqlencoder.cc +++ b/cpp_src/core/query/sql/sqlencoder.cc @@ -56,7 +56,7 @@ namespace reindexer { void SQLEncoder::DumpSingleJoinQuery(size_t idx, WrSerializer &ser, bool stripArgs) const { assertrx(idx < query_.GetJoinQueries().size()); const auto &jq = query_.GetJoinQueries()[idx]; - ser << ' ' << jq.joinType; + ser << jq.joinType; if (jq.Entries().Empty() && !jq.HasLimit() && jq.sortingEntries_.empty()) { ser << ' ' << jq.NsName() << " ON "; } else { @@ -82,6 +82,7 @@ void SQLEncoder::DumpSingleJoinQuery(size_t idx, WrSerializer &ser, bool stripAr void SQLEncoder::dumpJoined(WrSerializer &ser, bool stripArgs) const { for (size_t i = 0; i < query_.GetJoinQueries().size(); ++i) { if (query_.GetJoinQueries()[i].joinType == JoinType::LeftJoin) { + ser << ' '; DumpSingleJoinQuery(i, ser, stripArgs); } } @@ -95,7 +96,7 @@ void SQLEncoder::dumpMerged(WrSerializer &ser, bool stripArgs) const { } } -std::string escapeQuotes(std::string str) { +static std::string escapeQuotes(std::string str) { for (size_t i = 0; i < str.size(); ++i) { if (str[i] == '\'' && (i == 0 || str[i - 1] != '\\')) str.insert(i++, 1, '\\'); } diff --git a/cpp_src/core/query/sql/sqlparser.cc b/cpp_src/core/query/sql/sqlparser.cc index fc13f00b3..a17077266 100644 --- a/cpp_src/core/query/sql/sqlparser.cc +++ b/cpp_src/core/query/sql/sqlparser.cc @@ -23,13 +23,13 @@ Query SQLParser::Parse(std::string_view q) { bool SQLParser::reachedAutocompleteToken(tokenizer &parser, const token &tok) { size_t pos = parser.getPos() + tok.text().length(); - return (pos > ctx_.suggestionsPos); + return pos > ctx_.suggestionsPos; } -token SQLParser::peekSqlToken(tokenizer &parser, int tokenType, bool toLower) { +token SQLParser::peekSqlToken(tokenizer &parser, SqlTokenType tokenType, bool toLower) { token tok = parser.peek_token(toLower ? tokenizer::flags::to_lower : tokenizer::flags::no_flags); - bool eof = ((parser.getPos() + tok.text().length()) == parser.length()); - if (ctx_.autocompleteMode && !tok.text().empty() && reachedAutocompleteToken(parser, tok)) { + const bool eof = ((parser.getPos() + tok.text().length()) == parser.length()); + if (ctx_.autocompleteMode && reachedAutocompleteToken(parser, tok)) { size_t tokenLen = 0; if (ctx_.suggestionsPos >= parser.getPos()) { tokenLen = ctx_.suggestionsPos - parser.getPos() + 1; @@ -47,6 +47,10 @@ token SQLParser::peekSqlToken(tokenizer &parser, int tokenType, bool toLower) { int SQLParser::Parse(tokenizer &parser) { parser.skip_space(); + if (parser.length() == 0) { + ctx_.suggestions.emplace_back(std::string(), Start); + return 0; + } token tok = peekSqlToken(parser, Start); if (tok.text() == "explain"sv) { query_.Explain(true); @@ -57,7 +61,7 @@ int SQLParser::Parse(tokenizer &parser) { if (tok.text() == "select"sv) { query_.type_ = QuerySelect; parser.next_token(); - selectParse(parser); + selectParse(parser); } else if (tok.text() == "delete"sv) { query_.type_ = QueryDelete; tok = parser.next_token(); @@ -82,15 +86,16 @@ int SQLParser::Parse(tokenizer &parser) { return 0; } +template int SQLParser::selectParse(tokenizer &parser) { // Get filter token tok; bool wasSelectFilter = false; std::vector selectFilters; - while (!parser.end()) { + while (true) { auto nameWithCase = peekSqlToken(parser, SingleSelectFieldSqlToken, false); auto name = parser.next_token(); - tok = peekSqlToken(parser, SelectFieldsListSqlToken); + tok = peekSqlToken(parser, FromSqlToken); if (tok.text() == "("sv) { parser.next_token(); tok = peekSqlToken(parser, SingleSelectFieldSqlToken); @@ -165,7 +170,7 @@ int SQLParser::selectParse(tokenizer &parser) { throw Error(errParams, "Expected ')', but found %s, %s", tok.text(), parser.where()); } parser.next_token(); - tok = peekSqlToken(parser, SelectFieldsListSqlToken); + tok = peekSqlToken(parser, FromSqlToken); } else if (name.text() != "*"sv) { if (!query_.CanAddSelectFilter()) { @@ -190,18 +195,19 @@ int SQLParser::selectParse(tokenizer &parser) { } peekSqlToken(parser, FromSqlToken); - if (parser.next_token().text() != "from"sv) + if (parser.next_token().text() != "from"sv) { throw Error(errParams, "Expected 'FROM', but found '%s' in query, %s", tok.text(), parser.where()); + } peekSqlToken(parser, NamespaceSqlToken); query_.SetNsName(parser.next_token().text()); ctx_.updateLinkedNs(query_.NsName()); - while (!parser.end()) { - tok = peekSqlToken(parser, SelectConditionsStart); + do { + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedSelectConditionsStart : SelectConditionsStart); if (tok.text() == "where"sv) { parser.next_token(); - parseWhere(parser); + parseWhere(parser); } else if (tok.text() == "limit"sv) { parser.next_token(); tok = parser.next_token(); @@ -218,40 +224,44 @@ int SQLParser::selectParse(tokenizer &parser) { parser.next_token(); parseOrderBy(parser, query_.sortingEntries_, query_.forcedSortOrder_); ctx_.updateLinkedNs(query_.NsName()); - } else if (tok.text() == "join"sv) { - parser.next_token(); - parseJoin(JoinType::LeftJoin, parser); - } else if (tok.text() == "left"sv) { - parser.next_token(); - peekSqlToken(parser, LeftSqlToken); - if (parser.next_token().text() != "join"sv) { - throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); - } - parseJoin(JoinType::LeftJoin, parser); - } else if (tok.text() == "inner"sv) { - parser.next_token(); - peekSqlToken(parser, InnerSqlToken); - if (parser.next_token().text() != "join") { - throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } else if constexpr (nested == Nested::No) { + if (tok.text() == "join"sv) { + parser.next_token(); + parseJoin(JoinType::LeftJoin, parser); + } else if (tok.text() == "left"sv) { + parser.next_token(); + peekSqlToken(parser, LeftSqlToken); + if (parser.next_token().text() != "join"sv) { + throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } + parseJoin(JoinType::LeftJoin, parser); + } else if (tok.text() == "inner"sv) { + parser.next_token(); + peekSqlToken(parser, InnerSqlToken); + if (parser.next_token().text() != "join") { + throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } + auto jtype = (query_.NextOp() == OpOr) ? JoinType::OrInnerJoin : JoinType::InnerJoin; + query_.And(); + parseJoin(jtype, parser); + } else if (tok.text() == "merge"sv) { + parser.next_token(); + parseMerge(parser); + } else if (tok.text() == "or"sv) { + parser.next_token(); + query_.Or(); + } else { + break; } - auto jtype = (query_.NextOp() == OpOr) ? JoinType::OrInnerJoin : JoinType::InnerJoin; - query_.And(); - parseJoin(jtype, parser); - } else if (tok.text() == "merge"sv) { - parser.next_token(); - parseMerge(parser); - } else if (tok.text() == "or"sv) { - parser.next_token(); - query_.Or(); } else { break; } - } + } while (!parser.end()); return 0; } template -static void MoveAppend(T &dst, T &src) { +static void moveAppend(T &dst, T &src) { if (dst.empty()) { dst = std::move(src); } else { @@ -261,16 +271,20 @@ static void MoveAppend(T &dst, T &src) { } } -int SQLParser::nestedSelectParse(SQLParser &parser, tokenizer &tok) { - try { - int res = parser.selectParse(tok); - MoveAppend(ctx_.suggestions, parser.ctx_.suggestions); - return res; - } catch (...) { - MoveAppend(ctx_.suggestions, parser.ctx_.suggestions); - throw; +class SQLParser::ParserContextsAppendGuard { +public: + ParserContextsAppendGuard(SqlParsingCtx &mainCtx, SqlParsingCtx &nestedCtx) noexcept : mainCtx_{mainCtx}, nestedCtx_{nestedCtx} {} + ~ParserContextsAppendGuard() { + moveAppend(mainCtx_.suggestions, nestedCtx_.suggestions); + if (!mainCtx_.foundPossibleSuggestions && nestedCtx_.foundPossibleSuggestions) { + mainCtx_.suggestionLinkedNs = std::move(nestedCtx_.suggestionLinkedNs); + } } -} + +private: + SqlParsingCtx &mainCtx_; + SqlParsingCtx &nestedCtx_; +}; static KeyValueType detectValueType(const token &currTok) { const std::string_view val = currTok.text(); @@ -412,30 +426,8 @@ int SQLParser::deleteParse(tokenizer &parser) { query_.SetNsName(parser.next_token().text()); ctx_.updateLinkedNs(query_.NsName()); - while (!parser.end()) { - tok = peekSqlToken(parser, DeleteConditionsStart); - if (tok.text() == "where"sv) { - parser.next_token(); - parseWhere(parser); - } else if (tok.text() == "limit"sv) { - parser.next_token(); - tok = parser.next_token(); - if (tok.type != TokenNumber) - throw Error(errParseSQL, "Expected number, but found '%s' in query, %s", tok.text(), parser.where()); - query_.Limit(stoi(tok.text())); - } else if (tok.text() == "offset"sv) { - parser.next_token(); - tok = parser.next_token(); - if (tok.type != TokenNumber) - throw Error(errParseSQL, "Expected number, but found '%s' in query, %s", tok.text(), parser.where()); - query_.Offset(stoi(tok.text())); - } else if (tok.text() == "order"sv) { - parser.next_token(); - parseOrderBy(parser, query_.sortingEntries_, query_.forcedSortOrder_); - ctx_.updateLinkedNs(query_.NsName()); - } else - break; - } + parseModifyConditions(parser); + return 0; } @@ -463,8 +455,9 @@ static void addUpdateValue(const token &currTok, tokenizer &parser, UpdateEntry auto eof = [](tokenizer &parser, bool &inArray) -> bool { if (parser.end()) return true; token nextTok = parser.peek_token(); - bool result = - (nextTok.text() == "where"sv) || (!inArray && nextTok.text() == "]"sv) || (!inArray && nextTok.text() == ","sv); + bool result = (nextTok.text() == "where"sv) || (nextTok.text() == "order"sv) || (nextTok.text() == "limit"sv) || + (nextTok.text() == "offset"sv) || (!inArray && nextTok.text() == "]"sv) || + (!inArray && nextTok.text() == ","sv); if (nextTok.text() == "["sv && !inArray) inArray = true; if (nextTok.text() == "]"sv && inArray) inArray = false; return result; @@ -581,15 +574,39 @@ int SQLParser::updateParse(tokenizer &parser) { throw Error(errParseSQL, "Expected 'SET' or 'DROP' but found '%s' in query %s", tok.text(), parser.where()); } - tok = peekSqlToken(parser, WhereSqlToken); - if (tok.text() == "where"sv) { - parser.next_token(); - parseWhere(parser); - } + parseModifyConditions(parser); return 0; } +void SQLParser::parseModifyConditions(tokenizer &parser) { + while (!parser.end()) { + auto tok = peekSqlToken(parser, ModifyConditionsStart); + if (tok.text() == "where"sv) { + parser.next_token(); + parseWhere(parser); + } else if (tok.text() == "limit"sv) { + parser.next_token(); + tok = parser.next_token(); + if (tok.type != TokenNumber) + throw Error(errParseSQL, "Expected number, but found '%s' in query, %s", tok.text(), parser.where()); + query_.Limit(stoi(tok.text())); + } else if (tok.text() == "offset"sv) { + parser.next_token(); + tok = parser.next_token(); + if (tok.type != TokenNumber) + throw Error(errParseSQL, "Expected number, but found '%s' in query, %s", tok.text(), parser.where()); + query_.Offset(stoi(tok.text())); + } else if (tok.text() == "order"sv) { + parser.next_token(); + parseOrderBy(parser, query_.sortingEntries_, query_.forcedSortOrder_); + ctx_.updateLinkedNs(query_.NsName()); + } else { + break; + } + } +} + int SQLParser::truncateParse(tokenizer &parser) { parser.next_token(); token tok = peekSqlToken(parser, NamespaceSqlToken); @@ -599,7 +616,7 @@ int SQLParser::truncateParse(tokenizer &parser) { return 0; } -bool isCondition(std::string_view text) noexcept { +static bool isCondition(std::string_view text) noexcept { return text == "="sv || text == "=="sv || text == "<>"sv || iequals(text, "is"sv) || text == ">"sv || text == ">="sv || text == "<"sv || text == "<="sv || iequals(text, "in"sv) || iequals(text, "range"sv) || iequals(text, "like"sv) || iequals(text, "allset"sv); } @@ -607,13 +624,16 @@ bool isCondition(std::string_view text) noexcept { Query SQLParser::parseSubQuery(tokenizer &parser) { Query subquery; SQLParser subparser(subquery); + const ParserContextsAppendGuard guard{ctx_, subparser.ctx_}; if (ctx_.autocompleteMode) { subparser.ctx_.suggestionsPos = ctx_.suggestionsPos; subparser.ctx_.autocompleteMode = true; + subparser.ctx_.foundPossibleSuggestions = ctx_.foundPossibleSuggestions; + subparser.ctx_.possibleSuggestionDetectedInThisClause = ctx_.possibleSuggestionDetectedInThisClause; } // skip select auto tok = parser.next_token(); - nestedSelectParse(subparser, parser); + subparser.selectParse(parser); tok = parser.next_token(); if (tok.text() != ")"sv) { throw Error(errParseSQL, "Expected ')', but found %s, %s", tok.text(), parser.where()); @@ -654,7 +674,8 @@ void SQLParser::parseWhereCondition(tokenizer &parser, T &&firstArg, OpType op) tok = parser.next_token(false); } else if (tok.text() == "("sv) { if constexpr (!std::is_same_v) { - if (iequals(parser.peek_token().text(), "select"sv) && !isCondition(parser.peek_second_token().text())) { + if (iequals(peekSqlToken(parser, WhereFieldValueOrSubquerySqlToken, false).text(), "select"sv) && + !isCondition(parser.peek_second_token().text())) { query_.NextOp(op).Where(std::forward(firstArg), condition, parseSubQuery(parser)); return; } @@ -682,11 +703,12 @@ void SQLParser::parseWhereCondition(tokenizer &parser, T &&firstArg, OpType op) } } +template int SQLParser::parseWhere(tokenizer &parser) { token tok; OpType nextOp = OpAnd; - tok = peekSqlToken(parser, WhereFieldSqlToken, false); + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldSqlToken, false); if (iequals(tok.text(), "not"sv)) { nextOp = OpNot; @@ -696,14 +718,11 @@ int SQLParser::parseWhere(tokenizer &parser) { size_t lastBracketPosition = 0; int openBracketsCount = 0; while (!parser.end()) { - tok = peekSqlToken(parser, WhereFieldSqlToken, false); + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldSqlToken, false); parser.next_token(false); if (tok.text() == "("sv) { - tok = peekSqlToken(parser, WhereFieldSqlToken, false); - if (iequals(tok.text(), "select"sv) && !isCondition(parser.peek_second_token().text())) { - parseWhereCondition(parser, parseSubQuery(parser), nextOp); - nextOp = OpAnd; - } else { + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldOrSubquerySqlToken, false); + if (nested == Nested::Yes || !iequals(tok.text(), "select"sv) || isCondition(parser.peek_second_token().text())) { query_.NextOp(nextOp); query_.OpenBracket(); ++openBracketsCount; @@ -714,40 +733,42 @@ int SQLParser::parseWhere(tokenizer &parser) { } else { nextOp = OpAnd; } + continue; } - continue; - } - if (tok.type == TokenNumber) { - throw Error(errParseSQL, "Number is invalid at this location. (text = '%s' location = %s)", tok.text(), parser.where()); - } - if (tok.type == TokenString) { - throw Error(errParseSQL, "String is invalid at this location. (text = '%s' location = %s)", tok.text(), parser.where()); - } - - if (tok.type == TokenName) { - if (iequals(tok.text(), "join"sv)) { - parseJoin(JoinType::LeftJoin, parser); - } else if (iequals(tok.text(), "left"sv)) { - peekSqlToken(parser, LeftSqlToken); - if (parser.next_token().text() != "join"sv) { - throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); - } - parseJoin(JoinType::LeftJoin, parser); - } else if (iequals(tok.text(), "inner"sv)) { - peekSqlToken(parser, InnerSqlToken); - if (parser.next_token().text() != "join") { - throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); - } - auto jtype = nextOp == OpOr ? JoinType::OrInnerJoin : JoinType::InnerJoin; - query_.And(); - parseJoin(jtype, parser); - } else if (iequals(tok.text(), "st_dwithin"sv)) { + parseWhereCondition(parser, parseSubQuery(parser), nextOp); + nextOp = OpAnd; + } else if (tok.type == TokenName) { + if (iequals(tok.text(), "st_dwithin"sv)) { parseDWithin(parser, nextOp); nextOp = OpAnd; + } else if constexpr (nested == Nested::No) { + if (iequals(tok.text(), "join"sv)) { + parseJoin(JoinType::LeftJoin, parser); + } else if (iequals(tok.text(), "left"sv)) { + peekSqlToken(parser, LeftSqlToken); + if (parser.next_token().text() != "join"sv) { + throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } + parseJoin(JoinType::LeftJoin, parser); + } else if (iequals(tok.text(), "inner"sv)) { + peekSqlToken(parser, InnerSqlToken); + if (parser.next_token().text() != "join") { + throw Error(errParseSQL, "Expected JOIN, but found '%s' in query, %s", tok.text(), parser.where()); + } + auto jtype = nextOp == OpOr ? JoinType::OrInnerJoin : JoinType::InnerJoin; + query_.And(); + parseJoin(jtype, parser); + } else { + parseWhereCondition(parser, std::string{tok.text()}, nextOp); + nextOp = OpAnd; + } } else { parseWhereCondition(parser, std::string{tok.text()}, nextOp); nextOp = OpAnd; } + } else if (tok.type == TokenNumber || tok.type == TokenString) { + throw Error(errParseSQL, "%s is invalid at this location. (text = '%s' location = %s)", + tok.type == TokenNumber ? "Number" : "String", tok.text(), parser.where()); } tok = parser.peek_token(); @@ -768,7 +789,7 @@ int SQLParser::parseWhere(tokenizer &parser) { if (iequals(tok.text(), "and"sv)) { nextOp = OpAnd; parser.next_token(); - tok = peekSqlToken(parser, AndSqlToken, false); + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedAndSqlToken : AndSqlToken, false); if (iequals(tok.text(), "not"sv)) { parser.next_token(); nextOp = OpNot; @@ -961,10 +982,14 @@ void SQLParser::parseDWithin(tokenizer &parser, OpType nextOp) { void SQLParser::parseJoin(JoinType type, tokenizer &parser) { JoinedQuery jquery; SQLParser jparser(jquery); + const ParserContextsAppendGuard guard{ctx_, jparser.ctx_}; if (ctx_.autocompleteMode) { jparser.ctx_.suggestionsPos = ctx_.suggestionsPos; jparser.ctx_.autocompleteMode = true; + jparser.ctx_.foundPossibleSuggestions = ctx_.foundPossibleSuggestions; + jparser.ctx_.possibleSuggestionDetectedInThisClause = ctx_.possibleSuggestionDetectedInThisClause; } + peekSqlToken(parser, NamespaceSqlToken); auto tok = parser.next_token(); if (tok.text() == "("sv) { peekSqlToken(parser, SelectSqlToken); @@ -973,7 +998,7 @@ void SQLParser::parseJoin(JoinType type, tokenizer &parser) { throw Error(errParseSQL, "Expected 'SELECT', but found %s, %s", tok.text(), parser.where()); } - nestedSelectParse(jparser, parser); + jparser.selectParse(parser); tok = parser.next_token(); if (tok.text() != ")"sv) { @@ -992,9 +1017,12 @@ void SQLParser::parseJoin(JoinType type, tokenizer &parser) { void SQLParser::parseMerge(tokenizer &parser) { JoinedQuery mquery; SQLParser mparser(mquery); + const ParserContextsAppendGuard guard{ctx_, mparser.ctx_}; if (ctx_.autocompleteMode) { mparser.ctx_.suggestionsPos = ctx_.suggestionsPos; mparser.ctx_.autocompleteMode = true; + mparser.ctx_.foundPossibleSuggestions = ctx_.foundPossibleSuggestions; + mparser.ctx_.possibleSuggestionDetectedInThisClause = ctx_.possibleSuggestionDetectedInThisClause; } auto tok = parser.next_token(); if (tok.text() == "("sv) { @@ -1004,7 +1032,7 @@ void SQLParser::parseMerge(tokenizer &parser) { throw Error(errParseSQL, "Expected 'SELECT', but found %s, %s", tok.text(), parser.where()); } - nestedSelectParse(mparser, parser); + mparser.selectParse(parser); tok = parser.next_token(); if (tok.text() != ")"sv) { diff --git a/cpp_src/core/query/sql/sqlparser.h b/cpp_src/core/query/sql/sqlparser.h index b7f01f4f7..6ef7ed86c 100644 --- a/cpp_src/core/query/sql/sqlparser.h +++ b/cpp_src/core/query/sql/sqlparser.h @@ -1,8 +1,10 @@ #pragma once +#include #include #include "core/keyvalue/variant.h" #include "estl/tokenizer.h" +#include "sqltokentype.h" /// @namespace reindexer /// The base namespace @@ -15,6 +17,9 @@ class UpdateEntry; using EqualPosition_t = h_vector; class SQLParser { + class ParserContextsAppendGuard; + enum class Nested : bool { Yes = true, No = false }; + public: /// Parses pure sql select query and initializes Query object data members as a result. /// @param q - sql query. @@ -26,10 +31,10 @@ class SQLParser { /// Sql parser context struct SqlParsingCtx { struct SuggestionData { - SuggestionData(std::string tok, int tokType) : token(std::move(tok)), tokenType(tokType) {} + SuggestionData(std::string tok, SqlTokenType tokType) : token(std::move(tok)), tokenType(tokType) {} std::string token; - int tokenType = 0; - std::vector variants; + SqlTokenType tokenType = Start; + std::unordered_set variants; }; void updateLinkedNs(const std::string &ns) { if (autocompleteMode && (!foundPossibleSuggestions || possibleSuggestionDetectedInThisClause)) { @@ -41,7 +46,7 @@ class SQLParser { bool foundPossibleSuggestions = false; bool possibleSuggestionDetectedInThisClause = false; size_t suggestionsPos = 0; - std::vector tokens; + std::vector tokens; std::vector suggestions; std::string suggestionLinkedNs; }; @@ -56,7 +61,7 @@ class SQLParser { /// @param tokenType - token type. /// @param toLower - transform to lower representation. /// @return sql token object. - token peekSqlToken(tokenizer &parser, int tokenType, bool toLower = true); + token peekSqlToken(tokenizer &parser, SqlTokenType tokenType, bool toLower = true); /// Is current token last in autocomplete mode? bool reachedAutocompleteToken(tokenizer &parser, const token &tok); @@ -64,14 +69,9 @@ class SQLParser { /// Parses filter part of sql query. /// @param parser - tokenizer object instance. /// @return always returns zero. + template int selectParse(tokenizer &parser); - /// Parses filter part of sql query and gets suggestions from nested SQLParser - /// @param parser - nested parser object instance. - /// @param tok - tokenizer object instance. - /// @return always returns zero. - int nestedSelectParse(SQLParser &parser, tokenizer &tok); - /// Parses filter part of sql delete query. /// @param parser - tokenizer object instance. /// @return always returns zero. @@ -88,6 +88,7 @@ class SQLParser { int truncateParse(tokenizer &parser); /// Parse where entries + template int parseWhere(tokenizer &parser); template void parseWhereCondition(tokenizer &, T &&firstArg, OpType); @@ -116,6 +117,8 @@ class SQLParser { /// Parse merge entries void parseMerge(tokenizer &parser); + void parseModifyConditions(tokenizer &parser); + Query parseSubQuery(tokenizer &); static CondType getCondType(std::string_view cond); diff --git a/cpp_src/core/query/sql/sqlsuggester.cc b/cpp_src/core/query/sql/sqlsuggester.cc index 4cb9e73da..249e05d06 100644 --- a/cpp_src/core/query/sql/sqlsuggester.cc +++ b/cpp_src/core/query/sql/sqlsuggester.cc @@ -4,7 +4,6 @@ #include "core/query/query.h" #include "sqltokentype.h" -#include #include namespace reindexer { @@ -36,17 +35,18 @@ std::vector SQLSuggester::GetSuggestions(std::string_view q, size_t for (auto &it : suggester.ctx_.suggestions) { if (!it.variants.empty()) { - return it.variants; + return {it.variants.begin(), it.variants.end()}; } } - return std::vector(); + return {}; } -std::unordered_map> sqlTokenMatchings = { +std::unordered_map> sqlTokenMatchings = { {Start, {"explain", "select", "delete", "update", "truncate"}}, {StartAfterExplain, {"select", "delete", "update"}}, - {AggregationSqlToken, {"sum", "avg", "max", "min", "facet", "count", "distinct", "rank"}}, + {AggregationSqlToken, {"sum", "avg", "max", "min", "facet", "count", "distinct", "rank", "count_cached"}}, {SelectConditionsStart, {"where", "limit", "offset", "order", "join", "left", "inner", "equal_position", "merge", "or", ";"}}, + {NestedSelectConditionsStart, {"where", "limit", "offset", "order", "equal_position"}}, {ConditionSqlToken, {">", ">=", "<", "<=", "<>", "in", "allset", "range", "is", "==", "="}}, {WhereFieldValueSqlToken, {"null", "empty", "not"}}, {WhereFieldNegateValueSqlToken, {"null", "empty"}}, @@ -65,53 +65,55 @@ std::unordered_map> sqlTokenMatchings = { {SetSqlToken, {"set"}}, {WhereSqlToken, {"where"}}, {AllFieldsToken, {"*"}}, - {DeleteConditionsStart, {"where", "limit", "offset", "order"}}, + {ModifyConditionsStart, {"where", "limit", "offset", "order"}}, {UpdateOptionsSqlToken, {"set", "drop"}}, {EqualPositionSqlToken, {"equal_position"}}, {ST_DWithinSqlToken, {"ST_DWithin"}}, {ST_GeomFromTextSqlToken, {"ST_GeomFromText"}}, }; -static void getMatchingTokens(int tokenType, const std::string &token, std::vector &variants) { - const std::set &suggestions = sqlTokenMatchings[tokenType]; +static void getMatchingTokens(int tokenType, const std::string &token, std::unordered_set &variants) { + const std::unordered_set &suggestions = sqlTokenMatchings[tokenType]; for (auto it = suggestions.begin(); it != suggestions.end(); ++it) { if (isBlank(token) || checkIfStartsWith(token, *it)) { - variants.push_back(*it); + variants.insert(*it); } } } -void SQLSuggester::getMatchingNamespacesNames(const std::string &token, std::vector &variants) { +void SQLSuggester::getMatchingNamespacesNames(const std::string &token, std::unordered_set &variants) { auto namespaces = enumNamespaces_(EnumNamespacesOpts().OnlyNames()); for (auto &ns : namespaces) { - if (isBlank(token) || checkIfStartsWith(token, ns.name)) variants.push_back(ns.name); + if (isBlank(token) || checkIfStartsWith(token, ns.name)) variants.insert(ns.name); } } -void SQLSuggester::getMatchingFieldsNames(const std::string &token, std::vector &variants) { +void SQLSuggester::getMatchingFieldsNames(const std::string &token, std::unordered_set &variants) { auto namespaces = enumNamespaces_(EnumNamespacesOpts().WithFilter(ctx_.suggestionLinkedNs)); - if (namespaces.empty()) return; + if (namespaces.empty() || (namespaces.size() > 1 && isBlank(token))) return; auto dotPos = token.find('.'); - for (auto &idx : namespaces[0].indexes) { - if (idx.name_ == "#pk" || idx.name_ == "-tuple") continue; - if (isBlank(token) || (dotPos != std::string::npos ? checkIfStartsWith(token, idx.name_) - : checkIfStartsWith(token, idx.name_))) { - if (dotPos == std::string::npos) { - variants.push_back(idx.name_); - } else { - variants.push_back(idx.name_.substr(dotPos)); + for (const auto &ns : namespaces) { + for (auto &idx : ns.indexes) { + if (idx.name_ == "#pk" || idx.name_ == "-tuple") continue; + if (isBlank(token) || (dotPos != std::string::npos ? checkIfStartsWith(token, idx.name_) + : checkIfStartsWith(token, idx.name_))) { + if (dotPos == std::string::npos) { + variants.insert(idx.name_); + } else { + variants.insert(idx.name_.substr(dotPos)); + } } } } if (getSchema_) { - auto schema = getSchema_(namespaces[0].name); - if (schema) { - auto fieldsSuggestions = schema->GetSuggestions(token); - for (auto &suggestion : fieldsSuggestions) { - if (std::find(variants.begin(), variants.end(), suggestion) == variants.end()) { - variants.emplace_back(std::move(suggestion)); + for (const auto &ns : namespaces) { + auto schema = getSchema_(ns.name); + if (schema) { + auto fieldsSuggestions = schema->GetSuggestions(token); + for (auto &suggestion : fieldsSuggestions) { + variants.insert(std::move(suggestion)); } } } @@ -124,7 +126,8 @@ void SQLSuggester::getSuggestionsForToken(SqlParsingCtx::SuggestionData &ctx) { case StartAfterExplain: case FromSqlToken: case SelectConditionsStart: - case DeleteConditionsStart: + case NestedSelectConditionsStart: + case ModifyConditionsStart: case ConditionSqlToken: case WhereFieldValueSqlToken: case WhereFieldNegateValueSqlToken: @@ -145,21 +148,22 @@ void SQLSuggester::getSuggestionsForToken(SqlParsingCtx::SuggestionData &ctx) { getMatchingTokens(AggregationSqlToken, ctx.token, ctx.variants); getMatchingFieldsNames(ctx.token, ctx.variants); break; - case SelectFieldsListSqlToken: - getMatchingTokens(FromSqlToken, ctx.token, ctx.variants); - getMatchingTokens(AggregationSqlToken, ctx.token, ctx.variants); - getMatchingFieldsNames(ctx.token, ctx.variants); - break; case NamespaceSqlToken: getMatchingNamespacesNames(ctx.token, ctx.variants); break; + case WhereFieldOrSubquerySqlToken: + getMatchingTokens(SelectSqlToken, ctx.token, ctx.variants); + [[fallthrough]]; case AndSqlToken: case WhereFieldSqlToken: + getMatchingTokens(JoinTypesSqlToken, ctx.token, ctx.variants); + [[fallthrough]]; + case NestedAndSqlToken: + case NestedWhereFieldSqlToken: getMatchingTokens(NotSqlToken, ctx.token, ctx.variants); getMatchingTokens(ST_DWithinSqlToken, ctx.token, ctx.variants); getMatchingFieldsNames(ctx.token, ctx.variants); getMatchingTokens(EqualPositionSqlToken, ctx.token, ctx.variants); - getMatchingTokens(JoinTypesSqlToken, ctx.token, ctx.variants); break; case GeomFieldSqlToken: getMatchingTokens(ST_GeomFromTextSqlToken, ctx.token, ctx.variants); @@ -176,13 +180,31 @@ void SQLSuggester::getSuggestionsForToken(SqlParsingCtx::SuggestionData &ctx) { getMatchingNamespacesNames(ctx.token, ctx.variants); getMatchingFieldsNames(ctx.token, ctx.variants); break; + case WhereFieldValueOrSubquerySqlToken: + getMatchingTokens(SelectSqlToken, ctx.token, ctx.variants); + getMatchingTokens(WhereFieldValueSqlToken, ctx.token, ctx.variants); + break; + case DeleteSqlToken: + case AggregationSqlToken: + case NullSqlToken: + case EmptySqlToken: + case NotSqlToken: + case OrSqlToken: + case AllFieldsToken: + case FieldSqlToken: + case JoinSqlToken: + case MergeSqlToken: + case EqualPositionSqlToken: + case JoinTypesSqlToken: + case ST_DWithinSqlToken: + case ST_GeomFromTextSqlToken: default: break; } } bool SQLSuggester::findInPossibleTokens(int type, const std::string &v) { - const std::set &values = sqlTokenMatchings[type]; + const std::unordered_set &values = sqlTokenMatchings[type]; return (values.find(v) != values.end()); } @@ -226,29 +248,6 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) getSuggestionsForToken(data); } } break; - case SelectFieldsListSqlToken: { - if (isBlank(data.token)) { - getSuggestionsForToken(data); - break; - } - - if ((data.token == ",") || (data.token == "(")) break; - - bool fromKeywordReached = false; - if (ctx_.tokens.size() > 1) { - int prevTokenType = ctx_.tokens.back(); - if ((prevTokenType == SingleSelectFieldSqlToken) || (prevTokenType == SelectFieldsListSqlToken)) { - fromKeywordReached = checkIfStartsWith(data.token, "from"); - if (fromKeywordReached && data.token.length() < strlen("from")) { - getSuggestionsForToken(data); - } - } - } - - if (!fromKeywordReached && !findInPossibleFields(data.token)) { - getSuggestionsForToken(data); - } - } break; case FromSqlToken: if (isBlank(data.token) || !iequals(data.token, "from")) { getSuggestionsForToken(data); @@ -260,13 +259,15 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) } break; case SelectConditionsStart: - case DeleteConditionsStart: + case NestedSelectConditionsStart: + case ModifyConditionsStart: if (isBlank(data.token) || !findInPossibleTokens(data.tokenType, data.token)) { getSuggestionsForToken(data); } break; case GeomFieldSqlToken: case WhereFieldSqlToken: + case NestedWhereFieldSqlToken: if (isBlank(data.token)) { getSuggestionsForToken(data); break; @@ -310,7 +311,49 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) case OnSqlToken: data.tokenType = NamespaceSqlToken; break; - default: + case Start: + case SelectSqlToken: + case DeleteSqlToken: + case StartAfterExplain: + case SingleSelectFieldSqlToken: + case AggregationSqlToken: + case FromSqlToken: + case NamespaceSqlToken: + case SelectConditionsStart: + case NestedSelectConditionsStart: + case WhereFieldSqlToken: + case NestedWhereFieldSqlToken: + case ConditionSqlToken: + case OpSqlToken: + case WhereOpSqlToken: + case FieldNameSqlToken: + case WhereFieldValueSqlToken: + case WhereFieldNegateValueSqlToken: + case NullSqlToken: + case EmptySqlToken: + case NotSqlToken: + case AndSqlToken: + case NestedAndSqlToken: + case OrSqlToken: + case BySqlToken: + case AllFieldsToken: + case SortDirectionSqlToken: + case FieldSqlToken: + case LeftSqlToken: + case InnerSqlToken: + case JoinSqlToken: + case MergeSqlToken: + case JoinedFieldNameSqlToken: + case ModifyConditionsStart: + case SetSqlToken: + case UpdateOptionsSqlToken: + case EqualPositionSqlToken: + case JoinTypesSqlToken: + case ST_DWithinSqlToken: + case ST_GeomFromTextSqlToken: + case GeomFieldSqlToken: + case WhereFieldOrSubquerySqlToken: + case WhereFieldValueOrSubquerySqlToken: break; } getSuggestionsForToken(data); @@ -334,6 +377,7 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) } break; case AndSqlToken: + case NestedAndSqlToken: if (isBlank(data.token)) { getSuggestionsForToken(data); break; @@ -380,6 +424,22 @@ void SQLSuggester::checkForTokenSuggestions(SqlParsingCtx::SuggestionData &data) break; } break; + case DeleteSqlToken: + case AggregationSqlToken: + case NullSqlToken: + case EmptySqlToken: + case NotSqlToken: + case OrSqlToken: + case AllFieldsToken: + case FieldSqlToken: + case JoinSqlToken: + case MergeSqlToken: + case EqualPositionSqlToken: + case JoinTypesSqlToken: + case ST_DWithinSqlToken: + case ST_GeomFromTextSqlToken: + case WhereFieldOrSubquerySqlToken: + case WhereFieldValueOrSubquerySqlToken: default: getSuggestionsForToken(data); break; diff --git a/cpp_src/core/query/sql/sqlsuggester.h b/cpp_src/core/query/sql/sqlsuggester.h index 5ece47455..ded27b5e6 100644 --- a/cpp_src/core/query/sql/sqlsuggester.h +++ b/cpp_src/core/query/sql/sqlsuggester.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "core/schema.h" #include "sqlparser.h" @@ -37,8 +38,8 @@ class SQLSuggester : public SQLParser { /// Tries to find among possible namespaces. [[nodiscard]] bool findInPossibleNamespaces(const std::string &tok); /// Gets names of indexes that start with 'token'. - void getMatchingFieldsNames(const std::string &token, std::vector &variants); - void getMatchingNamespacesNames(const std::string &token, std::vector &variants); + void getMatchingFieldsNames(const std::string &token, std::unordered_set &variants); + void getMatchingNamespacesNames(const std::string &token, std::unordered_set &variants); EnumNamespacesF enumNamespaces_; GetSchemaF getSchema_; }; diff --git a/cpp_src/core/query/sql/sqltokentype.h b/cpp_src/core/query/sql/sqltokentype.h index 5e67dfaab..ccc7b685e 100644 --- a/cpp_src/core/query/sql/sqltokentype.h +++ b/cpp_src/core/query/sql/sqltokentype.h @@ -7,13 +7,14 @@ enum SqlTokenType { DeleteSqlToken, StartAfterExplain, SingleSelectFieldSqlToken, - SelectFieldsListSqlToken, AggregationSqlToken, FromSqlToken, NamespaceSqlToken, SelectConditionsStart, + NestedSelectConditionsStart, WhereSqlToken, WhereFieldSqlToken, + NestedWhereFieldSqlToken, ConditionSqlToken, OpSqlToken, WhereOpSqlToken, @@ -24,6 +25,7 @@ enum SqlTokenType { EmptySqlToken, NotSqlToken, AndSqlToken, + NestedAndSqlToken, OrSqlToken, BySqlToken, AllFieldsToken, @@ -35,7 +37,7 @@ enum SqlTokenType { MergeSqlToken, OnSqlToken, JoinedFieldNameSqlToken, - DeleteConditionsStart, + ModifyConditionsStart, SetSqlToken, UpdateOptionsSqlToken, EqualPositionSqlToken, @@ -43,5 +45,7 @@ enum SqlTokenType { ST_DWithinSqlToken, ST_GeomFromTextSqlToken, GeomFieldSqlToken, + WhereFieldOrSubquerySqlToken, + WhereFieldValueOrSubquerySqlToken, }; } diff --git a/cpp_src/core/querycache.h b/cpp_src/core/querycache.h index a9c0c4c14..2c50e4546 100644 --- a/cpp_src/core/querycache.h +++ b/cpp_src/core/querycache.h @@ -18,6 +18,8 @@ struct QueryCountCacheVal { }; struct QueryCacheKey { + using BufT = h_vector; + QueryCacheKey() = default; QueryCacheKey(QueryCacheKey&& other) = default; QueryCacheKey(const QueryCacheKey& other) = default; @@ -26,13 +28,15 @@ struct QueryCacheKey { QueryCacheKey(const Query& q) { WrSerializer ser; q.Serialize(ser, (SkipJoinQueries | SkipMergeQueries | SkipLimitOffset)); - buf.reserve(ser.Len()); + if rx_unlikely (ser.Len() > BufT::max_size()) { + throw Error(errLogic, "QueryCacheKey: buffer overflow"); + } buf.assign(ser.Buf(), ser.Buf() + ser.Len()); } size_t Size() const noexcept { return sizeof(QueryCacheKey) + (buf.is_hdata() ? 0 : buf.size()); } QueryCacheKey(WrSerializer& ser) : buf(ser.Buf(), ser.Buf() + ser.Len()) {} - h_vector buf; + BufT buf; }; struct EqQueryCacheKey { diff --git a/cpp_src/core/queryresults/joinresults.cc b/cpp_src/core/queryresults/joinresults.cc index 0a467edb5..9d27d51e9 100644 --- a/cpp_src/core/queryresults/joinresults.cc +++ b/cpp_src/core/queryresults/joinresults.cc @@ -1,6 +1,7 @@ #include "joinresults.h" #include "core/cjson/tagsmatcher.h" #include "core/payload/payloadiface.h" +#include "joinresults.h" #include "queryresults.h" #include @@ -8,11 +9,6 @@ namespace reindexer { namespace joins { -JoinedFieldIterator::JoinedFieldIterator(const NamespaceResults* parent, const ItemOffsets& offsets, uint8_t joinedFieldOrder) - : joinRes_(parent), offsets_(&offsets), order_(joinedFieldOrder) { - if (offsets_->size() > 0) updateOffset(); -} - bool JoinedFieldIterator::operator==(const JoinedFieldIterator& other) const { if (joinRes_ != other.joinRes_) throw Error(errLogic, "Comparising joined fields of different namespaces!"); if (offsets_ != other.offsets_) throw Error(errLogic, "Comparising joined fields of different items!"); @@ -20,25 +16,7 @@ bool JoinedFieldIterator::operator==(const JoinedFieldIterator& other) const { return true; } -bool JoinedFieldIterator::operator!=(const JoinedFieldIterator& other) const { return !operator==(other); } - -JoinedFieldIterator::const_reference JoinedFieldIterator::operator[](size_t idx) const { - assertrx(currOffset_ + idx < joinRes_->items_.size()); - return joinRes_->items_[currOffset_ + idx]; -} - -JoinedFieldIterator::reference JoinedFieldIterator::operator[](size_t idx) { - assertrx(currOffset_ + idx < joinRes_->items_.size()); - return const_cast(joinRes_->items_[currOffset_ + idx]); -} - -JoinedFieldIterator& JoinedFieldIterator::operator++() { - ++order_; - updateOffset(); - return *this; -} - -void JoinedFieldIterator::updateOffset() { +void JoinedFieldIterator::updateOffset() noexcept { currField_ = -1; if (order_ == joinRes_->GetJoinedSelectorsCount()) return; @@ -66,7 +44,7 @@ QueryResults JoinedFieldIterator::ToQueryResults() const { return QueryResults(begin, end); } -int JoinedFieldIterator::ItemsCount() const { +int JoinedFieldIterator::ItemsCount() const noexcept { assertrx(order_ < joinRes_->GetJoinedSelectorsCount()); if ((currField_ != -1) && (currField_ < uint8_t(offsets_->size()))) { @@ -76,35 +54,32 @@ int JoinedFieldIterator::ItemsCount() const { return 0; } -const JoinedFieldIterator noJoinedDataIt(nullptr, {}, 0); - -ItemIterator::ItemIterator(const NamespaceResults* parent, IdType rowid) : joinRes_(parent), rowid_(rowid) {} +static const ItemOffsets kEmptyOffsets; +static const JoinedFieldIterator kNoJoinedDataIt(nullptr, kEmptyOffsets, 0); -JoinedFieldIterator ItemIterator::begin() const { +JoinedFieldIterator ItemIterator::begin() const noexcept { auto it = joinRes_->offsets_.find(rowid_); - if (it == joinRes_->offsets_.end()) return noJoinedDataIt; - if (it->second.empty()) return noJoinedDataIt; + if (it == joinRes_->offsets_.end()) return kNoJoinedDataIt; + if (it->second.empty()) return kNoJoinedDataIt; return JoinedFieldIterator(joinRes_, it->second, 0); } JoinedFieldIterator ItemIterator::at(uint8_t joinedField) const { auto it = joinRes_->offsets_.find(rowid_); - if (it == joinRes_->offsets_.end()) return noJoinedDataIt; - if (it->second.empty()) return noJoinedDataIt; + if (it == joinRes_->offsets_.end()) return kNoJoinedDataIt; + if (it->second.empty()) return kNoJoinedDataIt; assertrx(joinedField < joinRes_->GetJoinedSelectorsCount()); return JoinedFieldIterator(joinRes_, it->second, joinedField); } -JoinedFieldIterator ItemIterator::end() const { +JoinedFieldIterator ItemIterator::end() const noexcept { auto it = joinRes_->offsets_.find(rowid_); - if (it == joinRes_->offsets_.end()) return noJoinedDataIt; - if (it->second.empty()) return noJoinedDataIt; + if (it == joinRes_->offsets_.end()) return kNoJoinedDataIt; + if (it->second.empty()) return kNoJoinedDataIt; return JoinedFieldIterator(joinRes_, it->second, joinRes_->GetJoinedSelectorsCount()); } -int ItemIterator::getJoinedFieldsCount() const { return joinRes_->GetJoinedSelectorsCount(); } - -int ItemIterator::getJoinedItemsCount() const { +int ItemIterator::getJoinedItemsCount() const noexcept { if (joinedItemsCount_ == -1) { joinedItemsCount_ = 0; auto it = joinRes_->offsets_.find(rowid_); @@ -116,7 +91,7 @@ int ItemIterator::getJoinedItemsCount() const { return joinedItemsCount_; } -ItemIterator ItemIterator::CreateFrom(const QueryResults::Iterator& it) { +ItemIterator ItemIterator::CreateFrom(const QueryResults::Iterator& it) noexcept { static NamespaceResults empty; static ItemIterator ret(&empty, 0); auto& itemRef = it.qr_->Items()[it.idx_]; diff --git a/cpp_src/core/queryresults/joinresults.h b/cpp_src/core/queryresults/joinresults.h index dede70033..065752d43 100644 --- a/cpp_src/core/queryresults/joinresults.h +++ b/cpp_src/core/queryresults/joinresults.h @@ -74,22 +74,35 @@ class JoinedFieldIterator { using reference = ItemRef&; using const_reference = const ItemRef&; - JoinedFieldIterator(const NamespaceResults* parent, const ItemOffsets& offsets, uint8_t joinedFieldOrder); + JoinedFieldIterator(const NamespaceResults* parent, const ItemOffsets& offsets, uint8_t joinedFieldOrder) noexcept + : joinRes_(parent), offsets_(&offsets), order_(joinedFieldOrder) { + if (offsets_->size() > 0) updateOffset(); + } bool operator==(const JoinedFieldIterator& other) const; - bool operator!=(const JoinedFieldIterator& other) const; - - const_reference operator[](size_t idx) const; - reference operator[](size_t idx); - JoinedFieldIterator& operator++(); + bool operator!=(const JoinedFieldIterator& other) const { return !operator==(other); } + + const_reference operator[](size_t idx) const noexcept { + assertrx(currOffset_ + idx < joinRes_->items_.size()); + return joinRes_->items_[currOffset_ + idx]; + } + reference operator[](size_t idx) noexcept { + assertrx(currOffset_ + idx < joinRes_->items_.size()); + return const_cast(joinRes_->items_[currOffset_ + idx]); + } + JoinedFieldIterator& operator++() noexcept { + ++order_; + updateOffset(); + return *this; + } ItemImpl GetItem(int itemIdx, const PayloadType& pt, const TagsMatcher& tm) const; QueryResults ToQueryResults() const; - int ItemsCount() const; + int ItemsCount() const noexcept; private: - void updateOffset(); + void updateOffset() noexcept; const NamespaceResults* joinRes_ = nullptr; const ItemOffsets* offsets_ = nullptr; uint8_t order_ = 0; @@ -101,16 +114,16 @@ class JoinedFieldIterator { /// Iterates over joined fields (if there are some) of item. class ItemIterator { public: - ItemIterator(const NamespaceResults* parent, IdType rowid); + ItemIterator(const NamespaceResults* parent, IdType rowid) noexcept : joinRes_(parent), rowid_(rowid) {} JoinedFieldIterator at(uint8_t joinedField) const; - JoinedFieldIterator begin() const; - JoinedFieldIterator end() const; + JoinedFieldIterator begin() const noexcept; + JoinedFieldIterator end() const noexcept; - int getJoinedFieldsCount() const; - int getJoinedItemsCount() const; + int getJoinedFieldsCount() const noexcept { return joinRes_->GetJoinedSelectorsCount(); } + int getJoinedItemsCount() const noexcept; - static ItemIterator CreateFrom(const QueryResults::Iterator& it); + static ItemIterator CreateFrom(const QueryResults::Iterator& it) noexcept; private: const NamespaceResults* joinRes_; diff --git a/cpp_src/core/queryresults/queryresults.cc b/cpp_src/core/queryresults/queryresults.cc index a17df13d5..a76aba904 100644 --- a/cpp_src/core/queryresults/queryresults.cc +++ b/cpp_src/core/queryresults/queryresults.cc @@ -139,7 +139,7 @@ h_vector QueryResults::GetNamespaces() const { return ret; } -int QueryResults::GetJoinedNsCtxIndex(int nsid) const { +int QueryResults::GetJoinedNsCtxIndex(int nsid) const noexcept { int ctxIndex = joined_.size(); for (int ns = 0; ns < nsid; ++ns) { ctxIndex += joined_[ns].GetJoinedSelectorsCount(); @@ -396,16 +396,6 @@ Error QueryResults::Iterator::GetCJSON(WrSerializer &ser, bool withHdrLen) { return errOK; } -bool QueryResults::Iterator::IsRaw() const { - auto &itemRef = qr_->items_[idx_]; - return itemRef.Raw(); -} -std::string_view QueryResults::Iterator::GetRaw() const { - auto &itemRef = qr_->items_[idx_]; - assertrx(itemRef.Raw()); - return std::string_view(reinterpret_cast(itemRef.Value().Ptr()), itemRef.Value().GetCapacity()); -} - Item QueryResults::Iterator::GetItem(bool enableHold) { auto &itemRef = qr_->items_[idx_]; @@ -445,25 +435,23 @@ void QueryResults::AddItem(Item &item, bool withData, bool enableHold) { } } -const TagsMatcher &QueryResults::getTagsMatcher(int nsid) const { return ctxs[nsid].tagsMatcher_; } +const TagsMatcher &QueryResults::getTagsMatcher(int nsid) const noexcept { return ctxs[nsid].tagsMatcher_; } -const PayloadType &QueryResults::getPayloadType(int nsid) const { return ctxs[nsid].type_; } +const PayloadType &QueryResults::getPayloadType(int nsid) const noexcept { return ctxs[nsid].type_; } -const FieldsSet &QueryResults::getFieldsFilter(int nsid) const { return ctxs[nsid].fieldsFilter_; } +const FieldsSet &QueryResults::getFieldsFilter(int nsid) const noexcept { return ctxs[nsid].fieldsFilter_; } -TagsMatcher &QueryResults::getTagsMatcher(int nsid) { return ctxs[nsid].tagsMatcher_; } +TagsMatcher &QueryResults::getTagsMatcher(int nsid) noexcept { return ctxs[nsid].tagsMatcher_; } -PayloadType &QueryResults::getPayloadType(int nsid) { return ctxs[nsid].type_; } +PayloadType &QueryResults::getPayloadType(int nsid) noexcept { return ctxs[nsid].type_; } -std::shared_ptr QueryResults::getSchema(int nsid) const { return ctxs[nsid].schema_; } +std::shared_ptr QueryResults::getSchema(int nsid) const noexcept { return ctxs[nsid].schema_; } -int QueryResults::getNsNumber(int nsid) const { +int QueryResults::getNsNumber(int nsid) const noexcept { assertrx(ctxs[nsid].schema_); return ctxs[nsid].schema_->GetProtobufNsNumber(); } -int QueryResults::getMergedNSCount() const { return ctxs.size(); } - void QueryResults::addNSContext(const PayloadType &type, const TagsMatcher &tagsMatcher, const FieldsSet &filter, std::shared_ptr schema) { if (filter.getTagsPathsLength()) nonCacheableData = true; diff --git a/cpp_src/core/queryresults/queryresults.h b/cpp_src/core/queryresults/queryresults.h index 4c06d9d5f..10f4cf545 100644 --- a/cpp_src/core/queryresults/queryresults.h +++ b/cpp_src/core/queryresults/queryresults.h @@ -54,9 +54,10 @@ class QueryResults { void Erase(ItemRefVector::iterator begin, ItemRefVector::iterator end); size_t Count() const noexcept { return items_.size(); } size_t TotalCount() const noexcept { return totalCount; } - const std::string &GetExplainResults() const &noexcept { return explainResults; } + const std::string &GetExplainResults() const & noexcept { return explainResults; } const std::string &GetExplainResults() const && = delete; - const std::vector &GetAggregationResults() const &noexcept { return aggregationResults; } + std::string &&MoveExplainResults() & noexcept { return std::move(explainResults); } + const std::vector &GetAggregationResults() const & noexcept { return aggregationResults; } const std::vector &GetAggregationResults() const && = delete; void Clear(); h_vector GetNamespaces() const; @@ -75,10 +76,14 @@ class QueryResults { // use enableHold = false only if you are sure that the item will be destroyed before the queryResults Item GetItem(bool enableHold = true); joins::ItemIterator GetJoined(); - const ItemRef &GetItemRef() const { return qr_->items_[idx_]; } - int64_t GetLSN() const { return qr_->items_[idx_].Value().GetLSN(); } - bool IsRaw() const; - std::string_view GetRaw() const; + const ItemRef &GetItemRef() const noexcept { return qr_->items_[idx_]; } + int64_t GetLSN() const noexcept { return qr_->items_[idx_].Value().GetLSN(); } + bool IsRaw() const noexcept { return qr_->items_[idx_].Raw(); } + std::string_view GetRaw() const noexcept { + auto &itemRef = qr_->items_[idx_]; + assertrx(itemRef.Raw()); + return std::string_view(reinterpret_cast(itemRef.Value().Ptr()), itemRef.Value().GetCapacity()); + } Iterator &operator++() noexcept { idx_++; return *this; @@ -98,9 +103,9 @@ class QueryResults { Error err_; }; - Iterator begin() const { return Iterator{this, 0, errOK}; } - Iterator end() const { return Iterator{this, int(items_.size()), errOK}; } - Iterator operator[](int idx) const { return Iterator{this, idx, errOK}; } + Iterator begin() const noexcept { return Iterator{this, 0, errOK}; } + Iterator end() const noexcept { return Iterator{this, int(items_.size()), errOK}; } + Iterator operator[](int idx) const noexcept { return Iterator{this, idx, errOK}; } std::vector joined_; std::vector aggregationResults; @@ -122,17 +127,17 @@ class QueryResults { void addNSContext(const PayloadType &type, const TagsMatcher &tagsMatcher, const FieldsSet &fieldsFilter, std::shared_ptr schema); - const TagsMatcher &getTagsMatcher(int nsid) const; - const PayloadType &getPayloadType(int nsid) const; - const FieldsSet &getFieldsFilter(int nsid) const; - TagsMatcher &getTagsMatcher(int nsid); - PayloadType &getPayloadType(int nsid); - std::shared_ptr getSchema(int nsid) const; - int getNsNumber(int nsid) const; - int getMergedNSCount() const; - ItemRefVector &Items() { return items_; } + const TagsMatcher &getTagsMatcher(int nsid) const noexcept; + const PayloadType &getPayloadType(int nsid) const noexcept; + const FieldsSet &getFieldsFilter(int nsid) const noexcept; + TagsMatcher &getTagsMatcher(int nsid) noexcept; + PayloadType &getPayloadType(int nsid) noexcept; + std::shared_ptr getSchema(int nsid) const noexcept; + int getNsNumber(int nsid) const noexcept; + int getMergedNSCount() const noexcept { return ctxs.size(); } + ItemRefVector &Items() noexcept { return items_; } const ItemRefVector &Items() const { return items_; } - int GetJoinedNsCtxIndex(int nsid) const; + int GetJoinedNsCtxIndex(int nsid) const noexcept; // Add owning ns pointer // noLock has always to be 'true' (i.e. this method can only be called unders Namespace's lock) void AddNamespace(NamespaceImplPtr, bool noLock); diff --git a/cpp_src/core/reindexer_impl/rx_selector.cc b/cpp_src/core/reindexer_impl/rx_selector.cc index ffc87deb5..a1127b1e1 100644 --- a/cpp_src/core/reindexer_impl/rx_selector.cc +++ b/cpp_src/core/reindexer_impl/rx_selector.cc @@ -38,12 +38,16 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc } std::vector queryResultsHolder; std::optional queryCopy; + ExplainCalc::Duration preselectTimeTotal{0}; + std::vector subQueryExplains; if (!q.GetSubQueries().empty()) { if (q.GetDebugLevel() >= LogInfo || ns->config_.logLevel >= LogInfo) { logPrintf(LogInfo, "Query before subqueries substitution: %s", q.GetSQL()); } queryCopy.emplace(q); - preselectSubQueries(*queryCopy, queryResultsHolder, locks, func, ctx); + const auto preselectStartTime = ExplainCalc::Clock::now(); + subQueryExplains = preselectSubQueries(*queryCopy, queryResultsHolder, locks, func, ctx); + preselectTimeTotal += ExplainCalc::Clock::now() - preselectStartTime; } const Query& query = queryCopy ? *queryCopy : q; std::vector joinQueryResultsContexts; @@ -58,12 +62,11 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc } JoinedSelectors mainJoinedSelectors; - ExplainCalc::Duration preselectTimeTotal{0}; if (thereAreJoins) { const auto preselectStartTime = ExplainCalc::Clock::now(); mainJoinedSelectors = prepareJoinedSelectors(query, result, locks, func, joinQueryResultsContexts, ctx); result.joined_.resize(1 + query.GetMergeQueries().size()); - preselectTimeTotal = ExplainCalc::Clock::now() - preselectStartTime; + preselectTimeTotal += ExplainCalc::Clock::now() - preselectStartTime; } IsFTQuery isFtQuery{IsFTQuery::NotSet}; { @@ -73,6 +76,7 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc selCtx.contextCollectingMode = true; selCtx.functions = &func; selCtx.nsid = 0; + selCtx.subQueriesExplains = std::move(subQueryExplains); if (!query.GetMergeQueries().empty()) { selCtx.isMergeQuery = IsMergeQuery::Yes; if rx_unlikely (!query.sortingEntries_.empty()) { @@ -142,13 +146,16 @@ void RxSelector::DoSelect(const Query& q, QueryResults& result, NsLocker& loc std::optional mQueryCopy; if (!mq.GetSubQueries().empty()) { mQueryCopy.emplace(mq); - preselectSubQueries(*mQueryCopy, queryResultsHolder, locks, func, ctx); } const JoinedQuery& mQuery = mQueryCopy ? *mQueryCopy : mq; + SelectCtx mctx(mQuery, &query); + if (!mq.GetSubQueries().empty()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + mctx.subQueriesExplains = preselectSubQueries(*mQueryCopy, queryResultsHolder, locks, func, ctx); + } auto mns = locks.Get(mQuery.NsName()); assertrx_throw(mns); - SelectCtx mctx(mQuery, &query); mctx.nsid = ++counter; mctx.isMergeQuery = IsMergeQuery::Yes; mctx.isFtQuery = isFtQuery; @@ -245,7 +252,7 @@ bool RxSelector::isPreResultValuesModeOptimizationAvailable(const Query& jItemQ, template bool RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, NsLocker& locks, SelectFunctionsHolder& func, - const RdxContext& rdxCtx) { + std::vector& explain, const RdxContext& rdxCtx) { auto ns = locks.Get(subQuery.NsName()); assertrx_throw(ns); @@ -259,12 +266,16 @@ bool RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, N QueryResults result; ns->Select(result, sctx, rdxCtx); locks.Delete(ns); + if (!result.GetExplainResults().empty()) { + explain.emplace_back(subQuery.NsName(), result.MoveExplainResults()); + } return sctx.matchedAtLeastOnce; } template VariantArray RxSelector::selectSubQuery(const Query& subQuery, const Query& mainQuery, NsLocker& locks, QueryResults& qr, - SelectFunctionsHolder& func, const RdxContext& rdxCtx) { + SelectFunctionsHolder& func, std::variant fieldOrKeys, + std::vector& explain, const RdxContext& rdxCtx) { NamespaceImpl::Ptr ns = locks.Get(subQuery.NsName()); assertrx_throw(ns); @@ -353,6 +364,10 @@ VariantArray RxSelector::selectSubQuery(const Query& subQuery, const Query& main } } locks.Delete(ns); + if (!qr.GetExplainResults().empty()) { + explain.emplace_back(subQuery.NsName(), std::move(qr.MoveExplainResults())); + explain.back().SetFieldOrKeys(std::move(fieldOrKeys)); + } return result; } @@ -450,8 +465,12 @@ JoinedSelectors RxSelector::prepareJoinedSelectors(const Query& q, QueryResults& } template -void RxSelector::preselectSubQueries(Query& mainQuery, std::vector& queryResultsHolder, NsLocker& locks, - SelectFunctionsHolder& func, const RdxContext& ctx) { +std::vector RxSelector::preselectSubQueries(Query& mainQuery, std::vector& queryResultsHolder, + NsLocker& locks, SelectFunctionsHolder& func, const RdxContext& ctx) { + std::vector explains; + if (mainQuery.GetExplain() || mainQuery.GetDebugLevel() >= LogInfo) { + explains.reserve(mainQuery.GetSubQueries().size()); + } for (size_t i = 0, s = mainQuery.Entries().Size(); i < s; ++i) { mainQuery.Entries().InvokeAppropriate( i, Skip{}, @@ -459,14 +478,16 @@ void RxSelector::preselectSubQueries(Query& mainQuery, std::vector try { const CondType cond = sqe.Condition(); if (cond == CondAny || cond == CondEmpty) { - if (selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, func, ctx) == (cond == CondAny)) { + if (selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, func, explains, ctx) == + (cond == CondAny)) { mainQuery.SetEntry(i); } else { mainQuery.SetEntry(i); } } else { QueryResults qr; - const auto values = selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, qr, func, ctx); + const auto values = selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, qr, func, + sqe.Values().size(), explains, ctx); if (QueryEntries::CheckIfSatisfyCondition(values, sqe.Condition(), sqe.Values())) { mainQuery.SetEntry(i); } else { @@ -481,15 +502,17 @@ void RxSelector::preselectSubQueries(Query& mainQuery, std::vector [&](const SubQueryFieldEntry& sqe) { try { queryResultsHolder.resize(queryResultsHolder.size() + 1); - mainQuery.SetEntry( - i, std::move(mainQuery.GetUpdatableEntry(i)).FieldName(), sqe.Condition(), - selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, queryResultsHolder.back(), func, ctx)); + mainQuery.SetEntry(i, std::move(mainQuery.GetUpdatableEntry(i)).FieldName(), + sqe.Condition(), + selectSubQuery(mainQuery.GetSubQuery(sqe.QueryIndex()), mainQuery, locks, + queryResultsHolder.back(), func, sqe.FieldName(), explains, ctx)); } catch (const Error& err) { throw Error(err.code(), "Error during preprocessing of subquery '" + mainQuery.GetSubQuery(sqe.QueryIndex()).GetSQL() + "': " + err.what()); } }); } + return explains; } template void RxSelector::DoSelect>( diff --git a/cpp_src/core/reindexer_impl/rx_selector.h b/cpp_src/core/reindexer_impl/rx_selector.h index d77e9c5e9..1110f4651 100644 --- a/cpp_src/core/reindexer_impl/rx_selector.h +++ b/cpp_src/core/reindexer_impl/rx_selector.h @@ -83,14 +83,15 @@ class RxSelector { static JoinedSelectors prepareJoinedSelectors(const Query &q, QueryResults &result, NsLocker &locks, SelectFunctionsHolder &func, std::vector &, const RdxContext &ctx); template - static void preselectSubQueries(Query &mainQuery, std::vector &queryResultsHolder, NsLocker &, SelectFunctionsHolder &, - const RdxContext &); + [[nodiscard]] static std::vector preselectSubQueries(Query &mainQuery, std::vector &queryResultsHolder, + NsLocker &, SelectFunctionsHolder &, const RdxContext &); template [[nodiscard]] static bool selectSubQuery(const Query &subQuery, const Query &mainQuery, NsLocker &, SelectFunctionsHolder &, - const RdxContext &); + std::vector &, const RdxContext &); template [[nodiscard]] static VariantArray selectSubQuery(const Query &subQuery, const Query &mainQuery, NsLocker &, QueryResults &, - SelectFunctionsHolder &, const RdxContext &); + SelectFunctionsHolder &, std::variant fieldOrKeys, + std::vector &, const RdxContext &); static bool isPreResultValuesModeOptimizationAvailable(const Query &jItemQ, const NamespaceImpl::Ptr &jns, const Query &mainQ); }; diff --git a/cpp_src/core/schema.h b/cpp_src/core/schema.h index aa147f1c7..309af583d 100644 --- a/cpp_src/core/schema.h +++ b/cpp_src/core/schema.h @@ -136,7 +136,7 @@ class Schema { std::string_view GetJSON() const noexcept { return originalJson_; } Error BuildProtobufSchema(TagsMatcher& tm, PayloadType& pt); Error GetProtobufSchema(WrSerializer& schema) const; - int GetProtobufNsNumber() const { return protobufNsNumber_; } + int GetProtobufNsNumber() const noexcept { return protobufNsNumber_; } const PrefixTree::PrefixTreeNode* GetRoot() const { return &paths_.root_; } static std::string AppendProtobufNumber(std::string_view j, int protobufNsNumber); diff --git a/cpp_src/core/selectfunc/functions/highlight.cc b/cpp_src/core/selectfunc/functions/highlight.cc index 45c1dadab..4e4177843 100644 --- a/cpp_src/core/selectfunc/functions/highlight.cc +++ b/cpp_src/core/selectfunc/functions/highlight.cc @@ -26,6 +26,10 @@ bool Highlight::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStru pl.GetByJsonPath(func.tagsPath, kr, KeyValueType::Undefined{}); } + if (kr.empty() || !kr[0].Type().IsSame(KeyValueType::String{})) { + throw Error(errLogic, "Unable to apply highlight function to the non-string field '%s'", func.field); + } + const std::string *data = p_string(kr[0]).getCxxstr(); auto pva = dataFtCtx->area_[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) return false; diff --git a/cpp_src/core/selectfunc/functions/snippet.cc b/cpp_src/core/selectfunc/functions/snippet.cc index 17e9b0bf4..951399101 100644 --- a/cpp_src/core/selectfunc/functions/snippet.cc +++ b/cpp_src/core/selectfunc/functions/snippet.cc @@ -271,6 +271,9 @@ bool Snippet::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStruct VariantArray kr; pl.Get(func.field, kr); + if (kr.empty() || !kr[0].Type().IsSame(KeyValueType::String{})) { + throw Error(errLogic, "Unable to apply snippet function to the non-string field '%s'", func.field); + } const std::string *data = p_string(kr[0]).getCxxstr(); auto pva = dataFtCtx->area_[it->second].GetAreas(func.fieldNo); diff --git a/cpp_src/core/selectfunc/selectfuncparser.cc b/cpp_src/core/selectfunc/selectfuncparser.cc index 1836f7441..a288bc931 100644 --- a/cpp_src/core/selectfunc/selectfuncparser.cc +++ b/cpp_src/core/selectfunc/selectfuncparser.cc @@ -12,17 +12,23 @@ SelectFuncStruct &SelectFuncParser::Parse(const std::string &query) { token tok = parser.next_token(tokenizer::flags::no_flags); - selectFuncStruct_.field = std::string(tok.text()); - auto dotPos = tok.text().find('.'); - if (dotPos == std::string_view::npos) { + if (dotPos == std::string_view::npos || (parser.peek_token(tokenizer::flags::no_flags).text() == "=")) { + selectFuncStruct_.field = std::string(tok.text()); tok = parser.next_token(tokenizer::flags::no_flags); if (tok.text() != "=") { - throw Error(errParams, "`=` is expected, but found `%s`", tok.text()); + if (tok.text() == ".") { + throw Error(errParams, "Unexpected space symbol before `.` (select function delimiter)"); + } + throw Error(errParams, "Expected `=` or `.` as a select function delimiter, but found `%s`", tok.text()); } token ftok; ParseFunction(parser, false, ftok); } else { + if (dotPos == tok.text_.size() - 1) { + throw Error(errParams, "Unexpected space symbol or token after `.` (select function delimiter): `%s`", tok.text()); + } + selectFuncStruct_.field = std::string(tok.text_.begin(), tok.text_.begin() + dotPos); token ftok(TokenName); ftok.text_.assign(tok.text_.begin() + dotPos + 1, tok.text_.end()); ParseFunction(parser, false, ftok); @@ -215,10 +221,12 @@ SelectFuncStruct &SelectFuncParser::ParseFunction(tokenizer &parser, bool partOf } } if (!selectFuncStruct_.isFunction) { - throw Error(errParseDSL, "%s: The closing parenthesis is required, but found `%s`", selectFuncStruct_.funcName, tok.text()); + throw Error(errParseDSL, "%s: The closing parenthesis is required, but found `%s`. Select function name: `%s`", + selectFuncStruct_.funcName, tok.text(), selectFuncStruct_.funcName); } } else { - throw Error(errParseDSL, "%s: An open parenthesis is required, but found `%s`", selectFuncStruct_.funcName, tok.text()); + throw Error(errParseDSL, "%s: An open parenthesis is required, but found `%s`. Select function name: `%s`", + selectFuncStruct_.funcName, tok.text(), selectFuncStruct_.funcName); } return selectFuncStruct_; diff --git a/cpp_src/core/selectkeyresult.h b/cpp_src/core/selectkeyresult.h index 55f83498b..8018061bd 100644 --- a/cpp_src/core/selectkeyresult.h +++ b/cpp_src/core/selectkeyresult.h @@ -21,11 +21,11 @@ class SingleSelectKeyResult { public: SingleSelectKeyResult() noexcept {} - SingleSelectKeyResult(IndexIterator::Ptr indexForwardIter) : indexForwardIter_(std::move(indexForwardIter)) { + explicit SingleSelectKeyResult(IndexIterator::Ptr &&indexForwardIter) noexcept : indexForwardIter_(std::move(indexForwardIter)) { assertrx(indexForwardIter_ != nullptr); } template - explicit SingleSelectKeyResult(const KeyEntryT &ids, SortType sortId) { + explicit SingleSelectKeyResult(const KeyEntryT &ids, SortType sortId) noexcept { if (ids.Unsorted().IsCommited()) { ids_ = ids.Sorted(sortId); } else { @@ -35,7 +35,7 @@ class SingleSelectKeyResult { useBtree_ = true; } } - explicit SingleSelectKeyResult(IdSet::Ptr ids) noexcept : tempIds_(std::move(ids)), ids_(*tempIds_) {} + explicit SingleSelectKeyResult(IdSet::Ptr &&ids) noexcept : tempIds_(std::move(ids)), ids_(*tempIds_) {} explicit SingleSelectKeyResult(const IdSetRef &ids) noexcept : ids_(ids) {} explicit SingleSelectKeyResult(IdType rBegin, IdType rEnd) noexcept : rBegin_(rBegin), rEnd_(rEnd), isRange_(true) {} SingleSelectKeyResult(const SingleSelectKeyResult &other) noexcept @@ -253,7 +253,7 @@ class SelectKeyResult : public h_vector { } clear(); deferedExplicitSort = false; - emplace_back(mergedIds); + emplace_back(IdSet::Ptr(mergedIds)); return mergedIds; } }; diff --git a/cpp_src/core/sortingprioritiestable.cc b/cpp_src/core/sortingprioritiestable.cc index c98ee154d..8f86f59b1 100644 --- a/cpp_src/core/sortingprioritiestable.cc +++ b/cpp_src/core/sortingprioritiestable.cc @@ -1,14 +1,15 @@ #include "sortingprioritiestable.h" #include -#include "tools/assertrx.h" #include "tools/errors.h" #include "tools/stringstools.h" -using namespace reindexer; +namespace reindexer { SortingPrioritiesTable::SortingPrioritiesTable(const std::string& sortOrderUTF8) - : sortOrder_(std::make_shared()), sortOrderCharacters_(sortOrderUTF8) { - if (sortOrderCharacters_.empty()) throw Error(errLogic, "Custom sort format string cannot be empty!"); + : sortOrder_(make_intrusive()), sortOrderCharacters_(sortOrderUTF8) { + if (sortOrderCharacters_.empty()) { + throw Error(errLogic, "Custom sort format string cannot be empty!"); + } wchar_t prevCh = 0; uint16_t priority = 0; @@ -47,7 +48,7 @@ SortingPrioritiesTable::SortingPrioritiesTable(const std::string& sortOrderUTF8) if (!ranges.empty()) { auto rangeIt = ranges.begin(); uint16_t outOfRangePriority = maxPriority; - for (size_t i = 0; i < tableSize;) { + for (size_t i = 0; i < kTableSize;) { if ((rangeIt != ranges.end()) && (rangeIt->first == i)) { i += rangeIt->second; ++rangeIt; @@ -69,11 +70,4 @@ bool SortingPrioritiesTable::checkForRangeIntersection(std::map(c) < tableSize); - uint16_t ch(static_cast(c)); - return sortOrder_->operator[](ch); -} - -const std::string& SortingPrioritiesTable::GetSortOrderCharacters() const { return sortOrderCharacters_; } +} // namespace reindexer diff --git a/cpp_src/core/sortingprioritiestable.h b/cpp_src/core/sortingprioritiestable.h index 54331149a..0dc957eb3 100644 --- a/cpp_src/core/sortingprioritiestable.h +++ b/cpp_src/core/sortingprioritiestable.h @@ -2,8 +2,9 @@ #include #include -#include #include +#include "estl/intrusive_ptr.h" +#include "tools/assertrx.h" #include "type_consts.h" namespace reindexer { @@ -20,12 +21,17 @@ class SortingPrioritiesTable { explicit SortingPrioritiesTable(const std::string& sortOrderUTF8); /// Returns priority of a character. - /// @param ch - character. + /// @param c - character /// @returns int priority value - int GetPriority(wchar_t ch) const; + int GetPriority(wchar_t c) const noexcept { + assertrx(sortOrder_.get() != nullptr); + // assertrx(static_cast(c) < tableSize); + uint16_t ch(static_cast(c)); + return sortOrder_->operator[](ch); + } /// @returns string of sort order characters - const std::string& GetSortOrderCharacters() const; + const std::string& GetSortOrderCharacters() const noexcept { return sortOrderCharacters_; } private: /// Checks whether ch is in existing ranges ir not. @@ -34,10 +40,11 @@ class SortingPrioritiesTable { /// @returns true, if character is in one of existing ranges already. bool checkForRangeIntersection(std::map& ranges, wchar_t ch); - static const uint32_t tableSize = 0x10000; - using SortOrderTable = std::array; - using SortOrderTablePtr = std::shared_ptr; + constexpr static uint32_t kTableSize = 0x10000; + using SortOrderTable = intrusive_atomic_rc_wrapper>; + using SortOrderTablePtr = intrusive_ptr; SortOrderTablePtr sortOrder_; std::string sortOrderCharacters_; }; + } // namespace reindexer diff --git a/cpp_src/estl/h_vector.h b/cpp_src/estl/h_vector.h index 735ce3f55..88fc55921 100644 --- a/cpp_src/estl/h_vector.h +++ b/cpp_src/estl/h_vector.h @@ -1,12 +1,14 @@ #pragma once -#include +#include #include #include #include +#include #include #include #include "debug_macros.h" +#include "estl/defines.h" #include "trivial_reverse_iterator.h" namespace reindexer { @@ -39,6 +41,9 @@ class h_vector { typedef trivial_reverse_iterator reverse_iterator; typedef unsigned size_type; typedef std::ptrdiff_t difference_type; + static_assert(std::is_trivial_v, "Expecting trivial reverse iterator"); + static_assert(std::is_trivial_v, "Expecting trivial const reverse iterator"); + h_vector() noexcept : e_{0, 0}, size_(0), is_hdata_(1) {} explicit h_vector(size_type size) : h_vector() { resize(size); } h_vector(size_type size, const T& v) : h_vector() { @@ -56,7 +61,8 @@ class h_vector { reserve(other.capacity()); const pointer p = ptr(); const_pointer op = other.ptr(); - for (size_type i = 0; i < other.size(); i++) { + const size_type osz = other.size(); + for (size_type i = 0; i < osz; i++) { new (p + i) T(op[i]); } size_ = other.size_; @@ -65,9 +71,10 @@ class h_vector { if (other.is_hdata()) { const pointer p = reinterpret_cast(hdata_); const pointer op = reinterpret_cast(other.hdata_); - for (size_type i = 0; i < other.size(); i++) { + const size_type osz = other.size(); + for (size_type i = 0; i < osz; i++) { new (p + i) T(std::move(op[i])); - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { op[i].~T(); } } @@ -89,11 +96,13 @@ class h_vector { size_type i = mv; const pointer p = ptr(); const_pointer op = other.ptr(); - for (; i < other.size(); i++) { + const auto osz = other.size(); + for (; i < osz; i++) { new (p + i) T(op[i]); } - if constexpr (!std::is_trivially_destructible::value) { - for (; i < size(); i++) p[i].~T(); + if constexpr (!std::is_trivially_destructible_v) { + const auto old_sz = size(); + for (; i < old_sz; i++) p[i].~T(); } size_ = other.size_; } @@ -104,11 +113,12 @@ class h_vector { if (&other != this) { clear(); if (other.is_hdata()) { - for (size_type i = 0; i < other.size(); i++) { - const pointer p = ptr(); - const pointer op = other.ptr(); + const size_type osz = other.size(); + const pointer p = ptr(); + const pointer op = other.ptr(); + for (size_type i = 0; i < osz; i++) { new (p + i) T(std::move(op[i])); - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { op[i].~T(); } } @@ -126,9 +136,10 @@ class h_vector { bool operator==(const h_vector& other) const noexcept(noexcept(std::declval() == std::declval())) { if (&other != this) { - if (size() != other.size()) return false; - for (size_t i = 0; i < size(); ++i) { - if (!(at(i) == other.at(i))) return false; + const size_type sz = size_; + if (sz != other.size()) return false; + for (size_t i = 0; i < sz; ++i) { + if (!(operator[](i) == other[i])) return false; } return true; } @@ -138,6 +149,8 @@ class h_vector { return !operator==(other); } + static constexpr size_type max_size() noexcept { return std::numeric_limits::max() >> 1; } + template void clear() noexcept { if constexpr (FreeHeapMemory) { @@ -145,7 +158,8 @@ class h_vector { is_hdata_ = 1; } else if constexpr (!std::is_trivially_destructible_v) { const pointer p = ptr(); - for (size_type i = 0; i < size_; ++i) p[i].~T(); + const size_type sz = size_; + for (size_type i = 0; i < sz; ++i) p[i].~T(); } size_ = 0; } @@ -156,26 +170,10 @@ class h_vector { const_iterator end() const noexcept { return ptr() + size_; } const_iterator cbegin() const noexcept { return ptr(); } const_iterator cend() const noexcept { return ptr() + size_; } - const_reverse_iterator rbegin() const noexcept { - const_reverse_iterator it; - it = end(); - return it; - } - const_reverse_iterator rend() const noexcept { - const_reverse_iterator it; - it = begin(); - return it; - } - reverse_iterator rbegin() noexcept { - reverse_iterator it; - it = end(); - return it; - } - reverse_iterator rend() noexcept { - reverse_iterator it; - it = begin(); - return it; - } + const_reverse_iterator rbegin() const noexcept { return end(); } + const_reverse_iterator rend() const noexcept { return begin(); } + reverse_iterator rbegin() noexcept { return end(); } + reverse_iterator rend() noexcept { return begin(); } size_type size() const noexcept { return size_; } size_type capacity() const noexcept { return is_hdata_ ? holdSize : e_.cap_; } bool empty() const noexcept { return size_ == 0; } @@ -188,13 +186,13 @@ class h_vector { return ptr()[pos]; } const_reference at(size_type pos) const { - if (pos >= size()) { + if rx_unlikely (pos >= size()) { throw std::logic_error("h_vector: Out of range (pos: " + std::to_string(pos) + ", size: " + std::to_string(size())); } return ptr()[pos]; } reference at(size_type pos) { - if (pos >= size()) { + if rx_unlikely (pos >= size()) { throw std::logic_error("h_vector: Out of range (pos: " + std::to_string(pos) + ", size: " + std::to_string(size())); } return ptr()[pos]; @@ -222,34 +220,49 @@ class h_vector { grow(sz); if constexpr (!reindexer::is_trivially_default_constructible::value) { const pointer p = ptr(); - for (size_type i = size_; i < sz; ++i) new (p + i) T(); + const size_type old_sz = size_; + for (size_type i = old_sz; i < sz; ++i) new (p + i) T(); } - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { const pointer p = ptr(); - for (size_type i = sz; i < size_; ++i) p[i].~T(); + const size_type old_sz = size_; + for (size_type i = sz; i < old_sz; ++i) p[i].~T(); } size_ = sz; } void resize(size_type sz, const T& default_value) { grow(sz); - for (size_type i = size_; i < sz; i++) new (ptr() + i) T(default_value); - if constexpr (!std::is_trivially_destructible::value) { - for (size_type i = sz; i < size_; i++) ptr()[i].~T(); + const size_type old_sz = size_; + const pointer p = ptr(); + for (size_type i = old_sz; i < sz; ++i) { + new (p + i) T(default_value); + } + if constexpr (!std::is_trivially_destructible_v) { + for (size_type i = sz; i < old_sz; ++i) { + p[i].~T(); + } } size_ = sz; } void reserve(size_type sz) { if (sz > capacity()) { - if (sz <= holdSize) { - throw std::logic_error("Unexpected reserved size"); + if rx_unlikely (sz > max_size()) { + throw std::logic_error("h_vector: max capacity overflow (requested: " + std::to_string(sz) + + ", max_size: " + std::to_string(max_size()) + " )"); + } + if rx_unlikely (sz <= holdSize) { + throw std::logic_error("h_vector: unexpected reserved size"); } // NOLINTNEXTLINE(bugprone-sizeof-expression) pointer new_data = static_cast(operator new(sz * sizeof(T))); // ?? dynamic pointer oold_data = ptr(); pointer old_data = oold_data; - for (size_type i = 0; i < size_; i++) { + // Creating those explicit old_sz variable for better vectorization + for (size_type i = 0, old_sz = size_; i < old_sz; ++i) { new (new_data + i) T(std::move(*old_data)); - if (!std::is_trivially_destructible::value) old_data->~T(); + if constexpr (!std::is_trivially_destructible_v) { + old_data->~T(); + } ++old_data; } if (!is_hdata()) operator delete(oold_data); @@ -260,17 +273,19 @@ class h_vector { } void grow(size_type sz) { const auto cap = capacity(); - if (sz > cap) reserve(std::max(sz, cap * 2)); + if (sz > cap) { + reserve(std::max(sz, std::min(max_size(), cap * 2))); + } } void push_back(const T& v) { grow(size_ + 1); new (ptr() + size_) T(v); - size_++; + ++size_; } void push_back(T&& v) { grow(size_ + 1); new (ptr() + size_) T(std::move(v)); - size_++; + ++size_; } template reference emplace_back(Args&&... args) { @@ -282,7 +297,7 @@ class h_vector { } void pop_back() { rx_debug_check_nonempty(); - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { ptr()[--size_].~T(); } else { --size_; @@ -294,10 +309,11 @@ class h_vector { push_back(v); } else { rx_debug_check_subscript(i); - grow(size_ + 1); + const size_type sz = size_; + grow(sz + 1); const pointer p = ptr(); - new (p + size_) T(std::move(p[size_ - 1])); - for (size_type j = size_ - 1; j > i; --j) { + new (p + sz) T(std::move(p[sz - 1])); + for (size_type j = sz - 1; j > i; --j) { p[j] = std::move(p[j - 1]); } p[i] = v; @@ -311,10 +327,11 @@ class h_vector { push_back(std::move(v)); } else { rx_debug_check_subscript(i); - grow(size_ + 1); + const size_type sz = size_; + grow(sz + 1); const pointer p = ptr(); - new (p + size_) T(std::move(p[size_ - 1])); - for (size_type j = size_ - 1; j > i; --j) { + new (p + sz) T(std::move(p[sz - 1])); + for (size_type j = sz - 1; j > i; --j) { p[j] = std::move(p[j - 1]); } p[i] = std::move(v); @@ -326,16 +343,17 @@ class h_vector { if (count == 0) return const_cast(pos); difference_type i = pos - begin(); rx_debug_check_subscript_le(i); - grow(size_ + count); + const int64_t sz = size_; + grow(sz + count); const pointer p = ptr(); - difference_type j = size_ + count - 1; - for (; j >= static_cast(size_) && j >= count + i; --j) { + difference_type j = sz + count - 1; + for (; j >= sz && j >= count + i; --j) { new (p + j) T(std::move(p[j - count])); } for (; j >= count + i; --j) { p[j] = std::move(p[j - count]); } - for (; j >= size_; --j) { + for (; j >= sz; --j) { new (p + j) T(v); } for (; j >= i; --j) { @@ -347,17 +365,18 @@ class h_vector { template iterator emplace(const_iterator pos, Args&&... args) { const size_type i = pos - begin(); - if (i == size()) { + const size_type sz = size_; + if (i == sz) { emplace_back(std::forward(args)...); } else { rx_debug_check_subscript(i); - grow(size_ + 1); + grow(sz + 1); const pointer p = ptr(); - new (p + size_) T(std::move(p[size_ - 1])); - for (size_type j = size_ - 1; j > i; --j) { + new (p + sz) T(std::move(p[sz - 1])); + for (size_type j = sz - 1; j > i; --j) { p[j] = std::move(p[j - 1]); } - p[i] = {std::forward(args)...}; + p[i] = T(std::forward(args)...); ++size_; } return begin() + i; @@ -370,7 +389,7 @@ class h_vector { auto firstPtr = p + i; std::move(firstPtr + 1, p + size_, firstPtr); --size_; - if constexpr (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { p[size_].~T(); } return firstPtr; @@ -382,16 +401,17 @@ class h_vector { if (cnt == 0) return const_cast(pos); const difference_type i = pos - begin(); rx_debug_check_subscript_le(i); - grow(size_ + cnt); + const int64_t sz = size_; + grow(sz + cnt); const pointer p = ptr(); - difference_type j = size_ + cnt - 1; - for (; j >= static_cast(size_) && j >= cnt + i; --j) { + difference_type j = sz + cnt - 1; + for (; j >= sz && j >= cnt + i; --j) { new (p + j) T(std::move(p[j - cnt])); } for (; j >= cnt + i; --j) { p[j] = std::move(p[j - cnt]); } - for (; j >= static_cast(size_); --j) { + for (; j >= sz; --j) { new (p + j) T(*--last); } for (; j >= i; --j) { @@ -402,8 +422,20 @@ class h_vector { } template void assign(InputIt first, InputIt last) { - clear(); - insert(begin(), first, last); + static_assert(std::is_same_v::iterator_category, std::random_access_iterator_tag>, + "Expecting random access iterators here"); + rx_debug_check_valid_range(first, last); + const int64_t cnt = std::distance(first, last); + const int64_t cap = capacity(); + if (cap >= cnt && cap - (cnt >> 2) <= cnt) { + // Allow up to 25% extra memory + clear(); + } else { + clear(); + grow(cnt); + } + std::uninitialized_copy(first, last, begin()); + size_ = cnt; } iterator erase(const_iterator first, const_iterator last) { rx_debug_check_valid_range(first, last); @@ -416,21 +448,24 @@ class h_vector { return firstPtr; } rx_debug_check_subscript(i); + const size_type sz = size_; - std::move(firstPtr + cnt, p + size_, firstPtr); - const auto newSize = size_ - cnt; - if constexpr (!std::is_trivially_destructible::value) { - for (size_type j = newSize; j < size_; ++j) p[j].~T(); + std::move(std::make_move_iterator(firstPtr + cnt), std::make_move_iterator(p + sz), firstPtr); + const auto newSize = sz - cnt; + if constexpr (!std::is_trivially_destructible_v) { + for (size_type j = newSize; j < sz; ++j) p[j].~T(); } size_ = newSize; return firstPtr; } void shrink_to_fit() { - if (is_hdata() || size_ == capacity()) return; + const auto sz = size(); + if (is_hdata() || sz == capacity()) return; h_vector tmp; - tmp.reserve(size()); - tmp.insert(tmp.begin(), std::make_move_iterator(begin()), std::make_move_iterator(end())); + tmp.reserve(sz); + std::move(std::make_move_iterator(begin()), std::make_move_iterator(end()), tmp.begin()); + tmp.size_ = sz; *this = std::move(tmp); } size_t heap_size() const noexcept { return is_hdata() ? 0 : capacity() * sizeof(T); } @@ -442,11 +477,13 @@ class h_vector { void destruct() noexcept { if (is_hdata()) { if constexpr (!std::is_trivially_destructible_v) { - for (size_type i = 0; i < size_; ++i) reinterpret_cast(hdata_)[i].~T(); + const size_type sz = size_; + for (size_type i = 0; i < sz; ++i) reinterpret_cast(hdata_)[i].~T(); } } else { if constexpr (!std::is_trivially_destructible_v) { - for (size_type i = 0; i < size_; ++i) e_.data_[i].~T(); + const size_type sz = size_; + for (size_type i = 0; i < sz; ++i) e_.data_[i].~T(); } operator delete(e_.data_); } diff --git a/cpp_src/estl/span.h b/cpp_src/estl/span.h index 1ad2bac9c..d16134e89 100644 --- a/cpp_src/estl/span.h +++ b/cpp_src/estl/span.h @@ -17,6 +17,8 @@ class span { typedef trivial_reverse_iterator const_reverse_iterator; typedef trivial_reverse_iterator reverse_iterator; typedef size_t size_type; + static_assert(std::is_trivial_v, "Expecting trivial reverse iterator"); + static_assert(std::is_trivial_v, "Expecting trivial const reverse iterator"); constexpr span() noexcept : data_(nullptr), size_(0) {} constexpr span(const span& other) noexcept : data_(other.data_), size_(other.size_) {} @@ -44,16 +46,8 @@ class span { constexpr span(T (&arr)[L]) noexcept : data_(arr), size_(L) {} constexpr iterator begin() const noexcept { return data_; } constexpr iterator end() const noexcept { return data_ + size_; } - /*constexpr*/ reverse_iterator rbegin() const noexcept { - reverse_iterator it; - it = end(); - return it; - } - /*constexpr*/ reverse_iterator rend() const noexcept { - reverse_iterator it; - it = begin(); - return it; - } + constexpr reverse_iterator rbegin() const noexcept { return end(); } + constexpr reverse_iterator rend() const noexcept { return begin(); } constexpr size_type size() const noexcept { return size_; } constexpr bool empty() const noexcept { return size_ == 0; } constexpr const T& operator[](size_type pos) const noexcept { return data_[pos]; } diff --git a/cpp_src/estl/trivial_reverse_iterator.h b/cpp_src/estl/trivial_reverse_iterator.h index 3919e6bbb..ccf1aa514 100644 --- a/cpp_src/estl/trivial_reverse_iterator.h +++ b/cpp_src/estl/trivial_reverse_iterator.h @@ -6,7 +6,7 @@ using std::iterator_traits; template class trivial_reverse_iterator { -public: +public: typedef trivial_reverse_iterator this_type; typedef Iterator iterator_type; typedef typename iterator_traits::iterator_category iterator_category; @@ -16,91 +16,83 @@ class trivial_reverse_iterator { typedef typename iterator_traits::pointer pointer; public: - // if CTOR is enabled std::is_trivial> return false; - // trivial_reverse_iterator() : current_(nullptr) {} + constexpr trivial_reverse_iterator() = default; + constexpr trivial_reverse_iterator(Iterator it) noexcept : current_(it) { + static_assert(std::is_trivial_v, "Expecting std::is_trivial_v"); + } template - trivial_reverse_iterator& operator=(const trivial_reverse_iterator& u) { + trivial_reverse_iterator& operator=(const trivial_reverse_iterator& u) noexcept { current_ = u.base(); return *this; } - Iterator base() const { return current_; } - reference operator*() const { + Iterator base() const noexcept { return current_; } + reference operator*() const noexcept { Iterator tmp = current_; return *--tmp; } - pointer operator->() const { return std::addressof(operator*()); } - trivial_reverse_iterator& operator++() { + pointer operator->() const noexcept { return std::addressof(operator*()); } + trivial_reverse_iterator& operator++() noexcept { --current_; return *this; } - trivial_reverse_iterator operator++(int) { + trivial_reverse_iterator operator++(int) noexcept { trivial_reverse_iterator tmp(*this); --current_; return tmp; } - trivial_reverse_iterator& operator--() { + trivial_reverse_iterator& operator--() noexcept { ++current_; return *this; } - trivial_reverse_iterator operator--(int) { + trivial_reverse_iterator operator--(int) noexcept { trivial_reverse_iterator tmp(*this); ++current_; return tmp; } - trivial_reverse_iterator operator+(difference_type n) const { - Iterator ptr = current_ - n; - trivial_reverse_iterator tmp; - tmp = ptr; - return tmp; - } - trivial_reverse_iterator& operator+=(difference_type n) { + trivial_reverse_iterator operator+(difference_type n) const noexcept { return current_ - n; } + trivial_reverse_iterator& operator+=(difference_type n) noexcept { current_ -= n; return *this; } - trivial_reverse_iterator operator-(difference_type n) const { - Iterator ptr = current_ + n; - trivial_reverse_iterator tmp; - tmp = ptr; - return tmp; - } - trivial_reverse_iterator& operator-=(difference_type n) { + trivial_reverse_iterator operator-(difference_type n) const noexcept { return current_ + n; } + trivial_reverse_iterator& operator-=(difference_type n) noexcept { current_ += n; return *this; } - reference operator[](difference_type n) const { return *(*this + n); } + reference operator[](difference_type n) const noexcept { return *(*this + n); } // Assign operator overloading from const std::reverse_iterator template - trivial_reverse_iterator& operator=(const std::reverse_iterator& u) { + trivial_reverse_iterator& operator=(const std::reverse_iterator& u) noexcept { if (current_ != u.base()) current_ = u.base(); return *this; } // Assign operator overloading from non-const std::reverse_iterator template - trivial_reverse_iterator& operator=(std::reverse_iterator& u) { + trivial_reverse_iterator& operator=(std::reverse_iterator& u) noexcept { if (current_ != u.base()) current_ = u.base(); return *this; } // Assign native pointer template - trivial_reverse_iterator& operator=(Upn ptr) { + trivial_reverse_iterator& operator=(Upn ptr) noexcept { static_assert(std::is_pointer::value, "attempting assign a non-trivial pointer"); /*if (current_ != ptr)*/ current_ = ptr; return *this; } - inline bool operator!=(const this_type& rhs) const { return !EQ(current_, rhs.current_); } - inline bool operator==(const this_type& rhs) const { return EQ(current_, rhs.current_); } + inline bool operator!=(const this_type& rhs) const noexcept { return !EQ(current_, rhs.current_); } + inline bool operator==(const this_type& rhs) const noexcept { return EQ(current_, rhs.current_); } protected: Iterator current_; private: - inline bool EQ(Iterator lhs, Iterator rhs) const { return lhs == rhs; } + inline bool EQ(Iterator lhs, Iterator rhs) const noexcept { return lhs == rhs; } }; } // namespace reindexer diff --git a/cpp_src/gtests/bench/fixtures/api_tv_simple.cc b/cpp_src/gtests/bench/fixtures/api_tv_simple.cc index 7c8cec8b9..2fe2dd32a 100644 --- a/cpp_src/gtests/bench/fixtures/api_tv_simple.cc +++ b/cpp_src/gtests/bench/fixtures/api_tv_simple.cc @@ -111,6 +111,9 @@ void ApiTvSimple::RegisterAllCases() { Register("FromCJSONPKOnly", &ApiTvSimple::FromCJSONPKOnly, this); Register("GetCJSON", &ApiTvSimple::GetCJSON, this); Register("ExtractField", &ApiTvSimple::ExtractField, this); + Register("SubQueryEq", &ApiTvSimple::SubQueryEq, this); + Register("SubQuerySet", &ApiTvSimple::SubQuerySet, this); + Register("SubQueryAggregate", &ApiTvSimple::SubQueryAggregate, this); // Those benches should be last, because they are recreating indexes cache Register("Query4CondRangeDropCache", &ApiTvSimple::Query4CondRangeDropCache, this)->Iterations(1000); @@ -197,12 +200,14 @@ reindexer::Error ApiTvSimple::Initialize() { err = db_->Commit(stringSelectNs_); if (!err.ok()) return err; - NamespaceDef mainNsDef{innerJoinLowSelectivityMainNs_}; + NamespaceDef mainNsDef{mainNs_}; mainNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()).AddIndex("field", "hash", "int", IndexOpts()); err = db_->AddNamespace(mainNsDef); if (!err.ok()) return err; - NamespaceDef rightNsDef{innerJoinLowSelectivityRightNs_}; - rightNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()).AddIndex("field", "hash", "int", IndexOpts()); + NamespaceDef rightNsDef{rightNs_}; + rightNsDef.AddIndex("id", "hash", "int", IndexOpts().PK()) + .AddIndex("field", "hash", "int", IndexOpts()) + .AddIndex("id_tree", "tree", "int", IndexOpts()); err = db_->AddNamespace(rightNsDef); if (!err.ok()) return err; @@ -227,6 +232,7 @@ reindexer::Error ApiTvSimple::Initialize() { reindexer::JsonBuilder bld2(wrSer_); bld2.Put("id", i); bld2.Put("field", i); + bld2.Put("id_tree", i); bld2.End(); err = rItem.FromJSON(wrSer_.Slice()); if (!err.ok()) return err; @@ -805,9 +811,9 @@ void ApiTvSimple::Query0CondInnerJoinUnlimit(benchmark::State& state) { void ApiTvSimple::Query0CondInnerJoinUnlimitLowSelectivity(benchmark::State& state) { AllocsTracker allocsTracker(state); for (auto _ : state) { // NOLINT(*deadcode.DeadStores) - Query q4join(innerJoinLowSelectivityRightNs_); + Query q4join(rightNs_); q4join.Where("id", CondLe, 250); - Query q(innerJoinLowSelectivityMainNs_); + Query q(mainNs_); q.InnerJoin("id", "id", CondEq, std::move(q4join)).ReqTotal(); QueryResults qres; @@ -816,6 +822,43 @@ void ApiTvSimple::Query0CondInnerJoinUnlimitLowSelectivity(benchmark::State& sta } } +void ApiTvSimple::SubQueryEq(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + Query q = Query(mainNs_).Where( + "id", CondEq, Query(rightNs_).Select({"field"}).Where("id", CondEq, VariantArray::Create(int(rand() % kTotalItemsMainJoinNs)))); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + +void ApiTvSimple::SubQuerySet(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + const int rangeMin = rand() % (kTotalItemsMainJoinNs - 500); + Query q = Query(mainNs_).Where( + "id", CondSet, Query(rightNs_).Select({"id"}).Where("id_tree", CondRange, VariantArray::Create(rangeMin, rangeMin + 500))); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + +void ApiTvSimple::SubQueryAggregate(benchmark::State& state) { + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + Query q = Query(mainNs_).Where("id", CondEq, + Query(rightNs_) + .Aggregate(AggAvg, {"id"}) + .Where("id", CondLt, VariantArray::Create(int(rand() % kTotalItemsMainJoinNs))) + .Limit(500)); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + } +} + void ApiTvSimple::Query2CondInnerJoin(benchmark::State& state) { AllocsTracker allocsTracker(state); for (auto _ : state) { // NOLINT(*deadcode.DeadStores) @@ -1202,7 +1245,7 @@ void ApiTvSimple::query2CondIdSet(benchmark::State& state, const std::vector>& idsets); reindexer::Error prepareCJsonBench(); @@ -147,8 +150,8 @@ class ApiTvSimple : private BaseFixture { std::unordered_map>> idsets_; reindexer::WrSerializer wrSer_; std::string stringSelectNs_{"string_select_ns"}; - std::string innerJoinLowSelectivityMainNs_{"inner_join_low_selectivity_main_ns"}; - std::string innerJoinLowSelectivityRightNs_{"inner_join_low_selectivity_right_ns"}; + std::string mainNs_{"main_ns"}; + std::string rightNs_{"right_ns"}; std::string cjsonNsName_{"cjson_ns_name"}; std::unique_ptr itemForCjsonBench_; std::vector fieldsToExtract_; diff --git a/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc b/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc index 30846061c..4ffafedc9 100644 --- a/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc +++ b/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc @@ -4,7 +4,7 @@ #include "core/cjson/jsonbuilder.h" #include "core/nsselecter/joinedselector.h" #include "core/reindexer.h" -// #include "gtests/tools.h" +#include "gtests/tools.h" #include "tools/string_regexp_functions.h" #include "helpers.h" @@ -51,6 +51,7 @@ void ApiTvSimpleComparators::RegisterAllCases() { Register("GetEqArrayInt", &ApiTvSimpleComparators::GetEqArrayInt, this); Register("GetEqString", &ApiTvSimpleComparators::GetEqString, this); Register("GetByRangeIDAndSort", &ApiTvSimpleComparators::GetByRangeIDAndSort, this); + Register("GetUuidStr", &ApiTvSimpleComparators::GetUuidStr, this); Register("Query1Cond", &ApiTvSimpleComparators::Query1Cond, this); Register("Query1CondTotal", &ApiTvSimpleComparators::Query1CondTotal, this); @@ -93,6 +94,11 @@ reindexer::Error ApiTvSimpleComparators::Initialize() { locations_ = {"mos", "ct", "dv", "sth", "vlg", "sib", "ural"}; + uuids_.reserve(1000); + for (size_t i = 0; i < 1000; ++i) { + uuids_.emplace_back(randStrUuid()); + } + for (int i = 0; i < 10; i++) packages_.emplace_back(randomNumArray(20, 10000, 10)); for (int i = 0; i < 20; i++) priceIDs_.emplace_back(randomNumArray(10, 7000, 50)); @@ -159,6 +165,7 @@ reindexer::Item ApiTvSimpleComparators::MakeItem(benchmark::State&) { item["location"] = locations_.at(random(0, locations_.size() - 1)); item["start_time"] = start_times_.at(random(0, start_times_.size() - 1)); item["end_time"] = startTime + random(1, 5) * 1000; + item["uuid_str"] = uuids_[rand() % uuids_.size()]; return item; } @@ -237,6 +244,19 @@ void ApiTvSimpleComparators::GetByRangeIDAndSort(benchmark::State& state) { } } +void ApiTvSimpleComparators::GetUuidStr(benchmark::State& state) { + const auto& uuid = uuids_[rand() % uuids_.size()]; + AllocsTracker allocsTracker(state); + for (auto _ : state) { // NOLINT(*deadcode.DeadStores) + Query q(nsdef_.name); + q.Where("uuid_str", CondEq, uuid); + QueryResults qres; + auto err = db_->Select(q, qres); + if (!err.ok()) state.SkipWithError(err.what().c_str()); + if (!qres.Count()) state.SkipWithError("Results does not contain any value"); + } +} + void ApiTvSimpleComparators::Query1Cond(benchmark::State& state) { AllocsTracker allocsTracker(state); for (auto _ : state) { // NOLINT(*deadcode.DeadStores) diff --git a/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h b/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h index 96e5167f8..91e769bce 100644 --- a/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h +++ b/cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h @@ -19,7 +19,8 @@ class ApiTvSimpleComparators : private BaseFixture { .AddIndex("price_id", "-", "int", IndexOpts().Array()) .AddIndex("location", "-", "string", IndexOpts()) .AddIndex("end_time", "-", "int", IndexOpts()) - .AddIndex("start_time", "-", "int", IndexOpts()); + .AddIndex("start_time", "-", "int", IndexOpts()) + .AddIndex("uuid_str", "-", "string", IndexOpts()); } void RegisterAllCases(); @@ -36,6 +37,7 @@ class ApiTvSimpleComparators : private BaseFixture { void GetEqArrayInt(State& state); void GetEqString(State& state); void GetByRangeIDAndSort(State& state); + void GetUuidStr(State& state); void Query1Cond(State& state); void Query1CondTotal(State& state); @@ -61,6 +63,7 @@ class ApiTvSimpleComparators : private BaseFixture { std::vector start_times_; std::vector> packages_; std::vector> priceIDs_; + std::vector uuids_; #if !defined(REINDEX_WITH_ASAN) && !defined(REINDEX_WITH_TSAN) && !defined(RX_WITH_STDLIB_DEBUG) constexpr static unsigned kTotalItemsStringSelectNs = 100'000; #else // !defined(REINDEX_WITH_ASAN) && !defined(REINDEX_WITH_TSAN) && !defined(RX_WITH_STDLIB_DEBUG) diff --git a/cpp_src/gtests/bench/fixtures/ft_fixture.cc b/cpp_src/gtests/bench/fixtures/ft_fixture.cc index f7358ae57..649c9e264 100644 --- a/cpp_src/gtests/bench/fixtures/ft_fixture.cc +++ b/cpp_src/gtests/bench/fixtures/ft_fixture.cc @@ -368,16 +368,16 @@ void FullText::Fast3PhraseLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w3 = + const std::string& w3 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + w3.size() + 32); ftQuery.append("'").append(w1).append(" ").append(w2).append("' ").append(w3); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); QueryResults qres; auto err = db_->Select(q, qres); @@ -394,16 +394,16 @@ void FullText::Fast3WordsLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w3 = + const std::string& w3 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + w3.size() + 32); ftQuery.append("+").append(w1).append(" +").append(w2).append(" +").append(w3); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); QueryResults qres; auto err = db_->Select(q, qres); @@ -420,14 +420,14 @@ void FullText::Fast2PhraseLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + 32); ftQuery.append("'").append(w1).append(" ").append(w2).append("'~50"); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); QueryResults qres; auto err = db_->Select(q, qres); @@ -444,14 +444,14 @@ void FullText::Fast2AndWordLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + 32); ftQuery.append("+").append(w1).append(" +").append(w2); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); QueryResults qres; auto err = db_->Select(q, qres); @@ -468,16 +468,16 @@ void FullText::Fast3PhraseWithAreasLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w3 = + const std::string& w3 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + w3.size() + 32); ftQuery.append("'").append(w1).append(" ").append(w2).append("' ").append(w3); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); q.AddFunction("search = highlight(!,!)"); QueryResults qres; auto err = db_->Select(q, qres); @@ -492,7 +492,7 @@ void FullText::Fast1WordWithAreaHighDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - std::string& word = + const std::string& word = words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})); q.Where("searchfast", CondEq, word); q.AddFunction("search = highlight(!,!)"); @@ -509,16 +509,16 @@ void FullText::Fast3WordsWithAreasLowDiversity(State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(lowWordsDiversityNsDef_.name); - std::string& w1 = + const std::string& w1 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w2 = + const std::string& w2 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); - std::string& w3 = + const std::string& w3 = words2_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words2_.size() - 1)})); std::string ftQuery; ftQuery.reserve(w1.size() + w2.size() + w3.size() + 32); ftQuery.append(w1).append(" ").append(w2).append(" ").append(w3); - q.Where("search", CondEq, ftQuery); + q.Where("search", CondEq, std::move(ftQuery)); q.AddFunction("search = highlight(!,!)"); QueryResults qres; auto err = db_->Select(q, qres); @@ -603,7 +603,7 @@ void FullText::Fast2WordsMatch(benchmark::State& state) { words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})) + " " + words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})); - q.Where("searchfast", CondEq, words); + q.Where("searchfast", CondEq, std::move(words)); QueryResults qres; auto err = db_->Select(q, qres); if (!err.ok()) state.SkipWithError(err.what().c_str()); @@ -639,7 +639,7 @@ void FullText::Fuzzy2WordsMatch(benchmark::State& state) { words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})) + " " + words_.at(randomGenerator_(randomEngine_, std::uniform_int_distribution::param_type{0, int(words_.size() - 1)})); - q.Where("searchfuzzy", CondEq, words); + q.Where("searchfuzzy", CondEq, std::move(words)); QueryResults qres; auto err = db_->Select(q, qres); @@ -656,9 +656,7 @@ void FullText::Fast1PrefixMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - auto word = MakePrefixWord(); - q.Where("searchfast", CondEq, word); + q.Where("searchfast", CondEq, MakePrefixWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -675,9 +673,7 @@ void FullText::Fast2PrefixMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - auto words = MakePrefixWord() + " " + MakePrefixWord(); - q.Where("searchfast", CondEq, words); + q.Where("searchfast", CondEq, MakePrefixWord().append(" ").append(MakePrefixWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -692,9 +688,7 @@ void FullText::Fuzzy1PrefixMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - auto word = MakePrefixWord(); - q.Where("searchfuzzy", CondEq, word); + q.Where("searchfuzzy", CondEq, MakePrefixWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -709,9 +703,7 @@ void FullText::Fuzzy2PrefixMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string words = MakePrefixWord() + " " + MakePrefixWord(); - q.Where("searchfuzzy", CondEq, words); + q.Where("searchfuzzy", CondEq, MakePrefixWord().append(" ").append(MakePrefixWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -728,8 +720,7 @@ void FullText::Fast1SuffixMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - std::string word = MakeSuffixWord(); - q.Where("searchfast", CondEq, word); + q.Where("searchfast", CondEq, MakeSuffixWord()); QueryResults qres; auto err = db_->Select(q, qres); if (!err.ok()) state.SkipWithError(err.what().c_str()); @@ -745,9 +736,7 @@ void FullText::Fast2SuffixMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - std::string words = MakeSuffixWord() + " " + MakeSuffixWord(); - q.Where("searchfast", CondEq, words); + q.Where("searchfast", CondEq, MakeSuffixWord().append(" ").append(MakeSuffixWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -762,9 +751,7 @@ void FullText::Fuzzy1SuffixMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string word = MakeSuffixWord(); - q.Where("searchfuzzy", CondEq, word); + q.Where("searchfuzzy", CondEq, MakeSuffixWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -779,9 +766,7 @@ void FullText::Fuzzy2SuffixMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string words = MakeSuffixWord() + " " + MakeSuffixWord(); - q.Where("searchfuzzy", CondEq, words); + q.Where("searchfuzzy", CondEq, MakeSuffixWord().append(" ").append(MakeSuffixWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -798,9 +783,7 @@ void FullText::Fast1TypoWordMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - std::string word = MakeTypoWord(); - q.Where("searchfast", CondEq, word); + q.Where("searchfast", CondEq, MakeTypoWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -817,9 +800,7 @@ void FullText::Fast2TypoWordMatch(benchmark::State& state) { for (auto _ : state) { // NOLINT(*deadcode.DeadStores) TIMEMEASURE(); Query q(nsdef_.name); - - std::string words = MakeTypoWord() + " " + MakeTypoWord(); - q.Where("searchfast", CondEq, words); + q.Where("searchfast", CondEq, MakeTypoWord().append(" ").append(MakeTypoWord())); QueryResults qres; auto err = db_->Select(q, qres); @@ -834,9 +815,7 @@ void FullText::Fuzzy1TypoWordMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string word = MakeTypoWord(); - q.Where("searchfuzzy", CondEq, word); + q.Where("searchfuzzy", CondEq, MakeTypoWord()); QueryResults qres; auto err = db_->Select(q, qres); @@ -851,9 +830,7 @@ void FullText::Fuzzy2TypoWordMatch(benchmark::State& state) { size_t cnt = 0; for (auto _ : state) { // NOLINT(*deadcode.DeadStores) Query q(nsdef_.name); - - std::string words = MakeTypoWord() + " " + MakeTypoWord(); - q.Where("searchfuzzy", CondEq, words); + q.Where("searchfuzzy", CondEq, MakeTypoWord().append(" ").append(MakeTypoWord())); QueryResults qres; auto err = db_->Select(q, qres); diff --git a/cpp_src/gtests/tests/API/base_tests.cc b/cpp_src/gtests/tests/API/base_tests.cc index 491319105..5bfb1f41d 100644 --- a/cpp_src/gtests/tests/API/base_tests.cc +++ b/cpp_src/gtests/tests/API/base_tests.cc @@ -1,10 +1,8 @@ -#include #include #include "reindexer_api.h" #include "tools/errors.h" #include "core/item.h" -#include "core/keyvalue/key_string.h" #include "core/keyvalue/variant.h" #include "core/queryresults/joinresults.h" #include "core/reindexer.h" @@ -14,10 +12,8 @@ #include #include -#include "debug/backtrace.h" #include "core/keyvalue/p_string.h" -#include "gason/gason.h" #include "server/loggerwrapper.h" #include "tools/serializer.h" @@ -72,7 +68,7 @@ TEST_F(ReindexerApi, RenameNamespace) { err = rt.reindexer->OpenNamespace(existingNamespace); ASSERT_TRUE(err.ok()) << err.what(); - auto testInList = [&](const std::string& testNamespaceName, bool inList) { + auto testInList = [&](std::string_view testNamespaceName, bool inList) { std::vector namespacesList; err = rt.reindexer->EnumNamespaces(namespacesList, reindexer::EnumNamespacesOpts()); ASSERT_TRUE(err.ok()) << err.what(); @@ -85,7 +81,7 @@ TEST_F(ReindexerApi, RenameNamespace) { } }; - auto getRowsInJSON = [&](const std::string& namespaceName, std::vector& resStrings) { + auto getRowsInJSON = [&](std::string_view namespaceName, std::vector& resStrings) { QueryResults result; auto err = rt.reindexer->Select(Query(namespaceName), result); ASSERT_TRUE(err.ok()) << err.what(); @@ -1061,7 +1057,7 @@ TEST_F(ReindexerApi, SortByUnorderedIndexes) { } TEST_F(ReindexerApi, SortByUnorderedIndexWithJoins) { - const std::string secondNamespace = "test_namespace_2"; + constexpr std::string_view secondNamespace = "test_namespace_2"; std::vector secondNamespacePKs; auto err = rt.reindexer->OpenNamespace(default_namespace, StorageOpts().Enabled(false)); @@ -1318,7 +1314,7 @@ TEST_F(ReindexerApi, DslFieldsTest) { } TEST_F(ReindexerApi, DistinctQueriesEncodingTest) { - const std::string sql = "select distinct(country), distinct(city) from clients;"; + constexpr std::string_view sql = "select distinct(country), distinct(city) from clients;"; Query q1 = Query::FromSQL(sql); EXPECT_EQ(q1.Entries().Size(), 0); @@ -1441,18 +1437,19 @@ TEST_F(ReindexerApi, ContextCancelingTest) { } TEST_F(ReindexerApi, JoinConditionsSqlParserTest) { - const std::string sql1 = "SELECT * FROM ns WHERE a > 0 AND INNER JOIN (SELECT * FROM ns2 WHERE b > 10 AND c = 1) ON ns2.id = ns.fk_id"; + constexpr std::string_view sql1 = + "SELECT * FROM ns WHERE a > 0 AND INNER JOIN (SELECT * FROM ns2 WHERE b > 10 AND c = 1) ON ns2.id = ns.fk_id"; const auto q1 = Query::FromSQL(sql1); ASSERT_EQ(q1.GetSQL(), sql1); - const std::string sql2 = - "SELECT * FROM ns WHERE a > 0 AND INNER JOIN (SELECT * FROM ns2 WHERE b > 10 AND c = 1 LIMIT 0) ON ns2.id = ns.fk_id"; + constexpr std::string_view sql2 = + "SELECT * FROM ns WHERE a > 0 AND INNER JOIN (SELECT * FROM ns2 WHERE b > 10 AND c = 1 LIMIT 0) ON ns2.id = ns.fk_id"; const auto q2 = Query::FromSQL(sql2); ASSERT_EQ(q2.GetSQL(), sql2); } TEST_F(ReindexerApi, UpdateWithBoolParserTest) { - const std::string sql = "UPDATE ns SET flag1 = true,flag2 = false WHERE id > 100"; + constexpr std::string_view sql = "UPDATE ns SET flag1 = true,flag2 = false WHERE id > 100"; Query query = Query::FromSQL(sql); ASSERT_EQ(query.UpdateFields().size(), 2); EXPECT_EQ(query.UpdateFields().front().Column(), "flag1"); @@ -1469,7 +1466,7 @@ TEST_F(ReindexerApi, UpdateWithBoolParserTest) { } TEST_F(ReindexerApi, EqualPositionsSqlParserTest) { - const std::string sql = + constexpr std::string_view sql = "SELECT * FROM ns WHERE (f1 = 1 AND f2 = 2 OR f3 = 3 equal_position(f1, f2) equal_position(f1, f3)) OR (f4 = 4 AND f5 > 5 " "equal_position(f4, f5))"; @@ -1500,13 +1497,17 @@ TEST_F(ReindexerApi, SchemaSuggestions) { Error err = rt.reindexer->OpenNamespace(default_namespace); ASSERT_TRUE(err.ok()) << err.what(); + err = rt.reindexer->OpenNamespace("second_ns"); + ASSERT_TRUE(err.ok()) << err.what(); + // clang-format off - const std::string jsonschema = R"xxx( + constexpr std::string_view jsonschema = R"xxx( { "required": [ "Countries", "Nest_fake", - "nested" + "nested", + "second_field" ], "properties": { "Countries": { @@ -1538,29 +1539,125 @@ TEST_F(ReindexerApi, SchemaSuggestions) { "additionalProperties": false, "type": "object" } + "second_field": { + "type": "number" + }, }, "additionalProperties": false, "type": "object" })xxx"; // clang-format on + // clang-format off + constexpr std::string_view jsonschema2 = R"xxx( + { + "required": [ + "id", + "Field", + ], + "properties": { + "id": { + "type": "number" + }, + "Field": { + "type": "number" + } + }, + "additionalProperties": false, + "type": "object" + })xxx"; + // clang-format on err = rt.reindexer->SetSchema(default_namespace, jsonschema); ASSERT_TRUE(err.ok()) << err.what(); - auto validateSuggestions = [this](std::string_view sql, const std::unordered_set& expected) { + err = rt.reindexer->SetSchema("second_ns", jsonschema2); + ASSERT_TRUE(err.ok()) << err.what(); + + auto validateSuggestions = [this](std::string_view sql, const std::unordered_set& expected, size_t position) { std::vector suggestions; - auto err = rt.reindexer->GetSqlSuggestions(sql, sql.size() - 1, suggestions); + auto err = rt.reindexer->GetSqlSuggestions(sql, position, suggestions); ASSERT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(suggestions.size(), expected.size()) << sql; for (auto& sugg : suggestions) { - EXPECT_TRUE(expected.find(sugg) != expected.end()) << "Unexpected suggestion: " << sugg; + EXPECT_TRUE(expected.find(sugg) != expected.end()) << sql << '\n' + << std::string(position, ' ') << "^\nUnexpected suggestion: " << sugg; + } + for (auto& expSugg : expected) { + EXPECT_TRUE(std::find(suggestions.begin(), suggestions.end(), expSugg) != suggestions.end()) + << sql << '\n' + << std::string(position, ' ') << "^\nExpected but not found suggestion: " << expSugg; } }; - validateSuggestions("select * from test_namespace where ne", {"Nest_fake", "nested"}); - validateSuggestions("select * from test_namespace where nested", {}); - validateSuggestions("select * from test_namespace where nested.", {".Name", ".Naame", ".Age"}); - validateSuggestions("select * from test_namespace where nested.Na", {".Name", ".Naame"}); + struct { + std::string_view sql; + std::unordered_set expected; + size_t position = sql.empty() ? 0 : sql.size() - 1; + } testData[]{ + {"select * from test_namespace where ne", {"Nest_fake", "nested"}}, + {"select * from test_namespace where nested", {}}, + {"select * from test_namespace where nested.", {".Name", ".Naame", ".Age"}}, + {"select * from test_namespace where nested.Na", {".Name", ".Naame"}}, + + {"", {"explain", "select", "delete", "update", "truncate"}}, + {"s", {"select"}}, + {"select", {}}, + {"select ", {"*", "avg", "min", "max", "facet", "sum", "distinct", "rank", "count", "count_cached"}}, + {"select *,", {}}, + {"select *, ", {"*", "avg", "min", "max", "facet", "sum", "distinct", "rank", "count", "count_cached"}}, + {"select *, f", {"facet", "Field"}}, + {"select f", {"facet", "Field"}}, + {"select * ", {"from"}}, + {"select * f", {"from"}}, + {"select * from ", + {"test_namespace", "second_ns", "#memstats", "#activitystats", "#config", "#queriesperfstats", "#namespaces", "#perfstats", + "#clientsstats"}}, + {"select * from te", {"test_namespace"}}, + {"select * from test_namespace ", + {"where", ";", "equal_position", "inner", "join", "left", "limit", "merge", "offset", "or", "order"}}, + {"select * from test_namespace w", {"where"}}, + {"select * from test_namespace where ", + {"second_field", "ST_DWithin", "Countries", "nested", "Nest_fake", "inner", "join", "left", "not", "equal_position"}}, + {"select * from test_namespace where s", {"second_field", "ST_DWithin"}}, + {"select * from second_ns where i", {"id", "inner"}}, + {"select * from test_namespace where (", {}}, + {"select * from test_namespace where (s", {"second_field", "ST_DWithin", "select"}}, + {"select * from test_namespace where (select m", {"max", "min"}}, + {"select * from test_namespace where (select i", {"id", "items_count", "is_subscribed", "ip"}}, + {"select * from test_namespace where (select second_field f", {"from"}}, + {"select * from test_namespace where (select id from s", {"second_ns"}}, + {"select * from test_namespace where (select Field from second_ns where ", {"id", "ST_DWithin", "Field", "not", "equal_position"}}, + {"select * from test_namespace where C", {"Countries"}}, + {"select * from test_namespace where Countries == (", {}}, + {"select * from test_namespace where Countries == (s", {"select"}}, + {"select * from test_namespace where Countries == (select m", {"max", "min"}}, + {"select * from test_namespace where Countries == (select i", {"id", "ip", "is_subscribed", "items_count"}}, + {"select * from test_namespace where Countries == (select second_field f", {"from"}}, + {"select * from test_namespace where Countries == (select second_field from ", + {"test_namespace", "second_ns", "#memstats", "#activitystats", "#config", "#queriesperfstats", "#namespaces", "#perfstats", + "#clientsstats"}}, + {"select * from test_namespace where Countries == (select second_field from s", {"second_ns"}}, + {"select * from test_namespace where i", {"inner"}}, + {"select * from test_namespace where inner j", {"join"}}, + {"select * from test_namespace where inner join s", {"second_ns"}}, + {"select * from test_namespace where inner join (s", {"select"}}, + {"select * from test_namespace where inner join (select m", {"min", "max"}}, + {"select * from test_namespace where inner join (select i", {"id", "ip", "is_subscribed", "items_count"}}, + {"select * from test_namespace where inner join (select second_field f", {"from"}}, + {"select * from test_namespace where inner join (select second_field from s", {"second_ns"}}, + {"SELECT * FROM ns WHERE id = ( ", {"null", "empty", "not", "select"}}, + }; + + for (const auto& [sql, expected, position] : testData) { + if (sql.empty() || sql.back() == ' ') { + validateSuggestions(sql, expected, position); + } else { + for (const auto& td : testData) { + if (reindexer::checkIfStartsWith(sql, td.sql)) { + validateSuggestions(td.sql, expected, position); + } + } + } + } } TEST_F(ReindexerApi, LoggerWriteInterruptTest) { @@ -1821,7 +1918,7 @@ TEST_F(ReindexerApi, UpdateDoublesItemByPKIndex) { { reindexer::QueryResults qr; - const std::string sql = "UPDATE test_namespace SET v1=125, id = 3 WHERE id = 2"; + constexpr std::string_view sql = "UPDATE test_namespace SET v1=125, id = 3 WHERE id = 2"; Query query = Query::FromSQL(sql); err = rt.reindexer->Update(query, qr); ASSERT_EQ(err.code(), errLogic); diff --git a/cpp_src/gtests/tests/fixtures/ft_api.h b/cpp_src/gtests/tests/fixtures/ft_api.h index 542630dec..4664eaea2 100644 --- a/cpp_src/gtests/tests/fixtures/ft_api.h +++ b/cpp_src/gtests/tests/fixtures/ft_api.h @@ -111,7 +111,7 @@ class FTApi : public ::testing::TestWithParam fields; - reindexer::fast_hash_set stopWords; + reindexer::fast_hash_set stopWords; std::string extraWordSymbols = "-/+"; }; int counter_ = 0; diff --git a/cpp_src/gtests/tests/fixtures/join_selects_api.h b/cpp_src/gtests/tests/fixtures/join_selects_api.h index 084e2b307..f39ef4e87 100644 --- a/cpp_src/gtests/tests/fixtures/join_selects_api.h +++ b/cpp_src/gtests/tests/fixtures/join_selects_api.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -398,12 +399,21 @@ class JoinSelectsApi : public ReindexerApi { EXPECT_EQ(err.what(), expectedText) << sql; } { - Query q = Query::FromSQL(sql); + const Query q = Query::FromSQL(sql); auto err = rt.reindexer->Select(q, qr); EXPECT_EQ(err.code(), expectedCode) << sql; EXPECT_EQ(err.what(), expectedText) << sql; } } + void ValidateQueryThrow(std::string_view sql, ErrorCode expectedCode, std::string_view expectedRegex) { + QueryResults qr; + { + auto err = rt.reindexer->Select(sql, qr); + EXPECT_EQ(err.code(), expectedCode) << sql; + EXPECT_THAT(err.what(), testing::ContainsRegex(expectedRegex)) << sql; + } + EXPECT_THROW(const Query q = Query::FromSQL(sql), Error) << sql; + } static std::string addQuotes(const std::string& str) { std::string output; diff --git a/cpp_src/gtests/tests/fixtures/queries_verifier.h b/cpp_src/gtests/tests/fixtures/queries_verifier.h index 6929958fc..033a149ed 100644 --- a/cpp_src/gtests/tests/fixtures/queries_verifier.h +++ b/cpp_src/gtests/tests/fixtures/queries_verifier.h @@ -1,7 +1,16 @@ #pragma once #include + +#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#include +#pragma GCC diagnostic pop +#else // REINDEX_WITH_ASAN #include +#endif // REINDEX_WITH_ASAN + #include #include "core/nsselecter/joinedselectormock.h" #include "core/nsselecter/sortexpression.h" diff --git a/cpp_src/gtests/tests/unit/ft/ft_generic.cc b/cpp_src/gtests/tests/unit/ft/ft_generic.cc index 5b4c03b6e..d3a0233b5 100644 --- a/cpp_src/gtests/tests/unit/ft/ft_generic.cc +++ b/cpp_src/gtests/tests/unit/ft/ft_generic.cc @@ -1311,6 +1311,87 @@ TEST_P(FTGenericApi, ExplainWithFtPreselect) { } } +TEST_P(FTGenericApi, StopWordsWithMorphemes) { + reindexer::FtFastConfig cfg = GetDefaultConfig(); + + Init(cfg); + Add("Шахматы из слоновой кости"sv); + Add("Мат в эфире "sv); + Add("Известняк"sv); + Add("Известия"sv); + Add("Изверг"sv); + + Add("Подобрал подосиновики, положил в лубочек"sv); + Add("Подопытный кролик"sv); + Add("Шла Саша по шоссе"sv); + + Add("Зайка серенький под елочкой скакал"sv); + Add("За Альянс! (с)"sv); + Add("Заноза в пальце"sv); + + Add("На западном фронте без перемен"sv); + Add("Наливные яблочки"sv); + Add("Нарком СССР"sv); + + CheckResults("*из*", {{"!Известняк!", ""}, {"!Известия!", ""}, {"!Изверг!", ""}}, false); + CheckResults("из", {}, false); + + CheckResults("*под*", {{"!Подобрал подосиновики!, положил в лубочек", ""}, {"!Подопытный! кролик", ""}}, false); + CheckResults("под", {}, false); + + CheckResults( + "*за*", {{"!Зайка! серенький под елочкой скакал", ""}, {"!Заноза! в пальце", ""}, {"На !западном! фронте без перемен", ""}}, false); + CheckResults("за", {}, false); + + CheckResults("*на*", + { + {"!Наливные! яблочки", ""}, + {"!Нарком! СССР", ""}, + }, + false); + CheckResults("на", {}, false); + + cfg.stopWords.clear(); + + cfg.stopWords.insert({"на"}); + cfg.stopWords.insert({"мат", reindexer::StopWord::Type::Morpheme}); + + SetFTConfig(cfg); + + CheckResults("*из*", {{"Шахматы !из! слоновой кости", ""}, {"!Известняк!", ""}, {"!Известия!", ""}, {"!Изверг!", ""}}, false); + CheckResults("из", {{"Шахматы !из! слоновой кости", ""}}, false); + + CheckResults( + "*под*", + {{"!Подобрал подосиновики!, положил в лубочек", ""}, {"!Подопытный! кролик", ""}, {"Зайка серенький !под! елочкой скакал", ""}}, + false); + CheckResults("под", {{"Зайка серенький !под! елочкой скакал", ""}}, false); + + CheckResults("*по*", + {{"Шла Саша !по! шоссе", ""}, + {"!Подобрал подосиновики, положил! в лубочек", ""}, + {"!Подопытный! кролик", ""}, + {"Зайка серенький !под! елочкой скакал", ""}}, + false); + CheckResults("по~", {{"Шла Саша !по! шоссе", ""}, {"Зайка серенький !под! елочкой скакал", ""}}, false); + CheckResults("по", {{"Шла Саша !по! шоссе", ""}}, false); + + CheckResults("*мат*", {{"!Шахматы! из слоновой кости", ""}}, false); + CheckResults("мат", {}, false); + + CheckResults("*за*", + {{"!Зайка! серенький под елочкой скакал", ""}, + {"!Заноза! в пальце", ""}, + {"!За! Альянс! (с)", ""}, + {"На !западном! фронте без перемен", ""}}, + false); + CheckResults("за", {{"!За! Альянс! (с)", ""}}, false); + + CheckResults("*на*", {}, false); + CheckResults("на~", {}, false); + CheckResults("на", {}, false); +} + INSTANTIATE_TEST_SUITE_P(, FTGenericApi, ::testing::Values(reindexer::FtFastConfig::Optimization::Memory, reindexer::FtFastConfig::Optimization::CPU), [](const auto& info) { diff --git a/cpp_src/gtests/tests/unit/join_test.cc b/cpp_src/gtests/tests/unit/join_test.cc index 6fb37ea94..c36fd00a5 100644 --- a/cpp_src/gtests/tests/unit/join_test.cc +++ b/cpp_src/gtests/tests/unit/join_test.cc @@ -608,7 +608,7 @@ TEST_F(JoinSelectsApi, TestNestedJoinsError) { for (auto& firstJoin : joinTypes) { for (auto& secondJoin : joinTypes) { auto sql = fmt::sprintf(sqlPattern, firstJoin, secondJoin); - ValidateQueryError(sql, errParams, "JOINs nested into the other JOINs are not supported"); + ValidateQueryThrow(sql, errParseSQL, "Expected ')', but found .*, line: 1 column: .*"); } } } @@ -620,7 +620,7 @@ TEST_F(JoinSelectsApi, TestNestedMergesInJoinsError) { auto joinTypes = {"inner join", "join", "left join"}; for (auto& join : joinTypes) { auto sql = fmt::sprintf(sqlPattern, join); - ValidateQueryError(sql, errParams, "MERGEs nested into the JOINs are not supported"); + ValidateQueryThrow(sql, errParseSQL, "Expected ')', but found merge, line: 1 column: .*"); } } diff --git a/cpp_src/gtests/tests/unit/queries_test.cc b/cpp_src/gtests/tests/unit/queries_test.cc index 75118509d..a09710bce 100644 --- a/cpp_src/gtests/tests/unit/queries_test.cc +++ b/cpp_src/gtests/tests/unit/queries_test.cc @@ -222,8 +222,8 @@ TEST_F(QueriesApi, SqlParseGenerate) { .Or() .Where("age", CondSet, {"1", "2", "3", "4"}) .Limit(10000000)}, - {"SELECT * FROM test_namespace WHERE INNER JOIN join_ns ON test_namespace.id = join_ns.id ORDER BY 'year + join_ns.year * (5 - " - "rand())'", + {"SELECT * FROM test_namespace WHERE INNER JOIN join_ns ON test_namespace.id = join_ns.id " + "ORDER BY 'year + join_ns.year * (5 - rand())'", Query{"test_namespace"}.InnerJoin("id", "id", CondEq, Query{"join_ns"}).Sort("year + join_ns.year * (5 - rand())", false)}, {"SELECT * FROM "s + geomNs + " WHERE ST_DWithin(" + kFieldNamePointNonIndex + ", ST_GeomFromText('POINT(1.25 -7.25)'), 0.5)", Query{geomNs}.DWithin(kFieldNamePointNonIndex, reindexer::Point{1.25, -7.25}, 0.5)}, @@ -242,6 +242,74 @@ TEST_F(QueriesApi, SqlParseGenerate) { Query{"main_ns"}.Where("id", CondGt, Query{"second_ns"}.Aggregate(AggAvg, {"id"}).Where("id", CondLt, 10))}, {"SELECT * FROM main_ns WHERE id > (SELECT COUNT(*) FROM second_ns WHERE id < 10 LIMIT 0)", Query{"main_ns"}.Where("id", CondGt, Query{"second_ns"}.Where("id", CondLt, 10).ReqTotal())}, + {"SELECT * FROM main_ns WHERE (SELECT * FROM second_ns WHERE id < 10 LIMIT 0) IS NOT NULL AND value IN (5,4,1)", + Query{"main_ns"} + .Where(Query{"second_ns"}.Where("id", CondLt, 10), CondAny, {}) + .Where("value", CondSet, {Variant{5}, Variant{4}, Variant{1}})}, + {"SELECT * FROM main_ns WHERE ((SELECT * FROM second_ns WHERE id < 10 LIMIT 0) IS NOT NULL) AND value IN (5,4,1)", + Query{"main_ns"} + .OpenBracket() + .Where(Query{"second_ns"}.Where("id", CondLt, 10), CondAny, {}) + .CloseBracket() + .Where("value", CondSet, {Variant{5}, Variant{4}, Variant{1}})}, + {"SELECT * FROM main_ns WHERE id IN (SELECT id FROM second_ns WHERE id < 999) AND value >= 1000", + Query{"main_ns"}.Where("id", CondSet, Query{"second_ns"}.Select({"id"}).Where("id", CondLt, 999)).Where("value", CondGe, 1000)}, + {"SELECT * FROM main_ns WHERE (id IN (SELECT id FROM second_ns WHERE id < 999)) AND value >= 1000", + Query{"main_ns"} + .OpenBracket() + .Where("id", CondSet, Query{"second_ns"}.Select({"id"}).Where("id", CondLt, 999)) + .CloseBracket() + .Where("value", CondGe, 1000)}, + {"SELECT * FROM main_ns " + "WHERE (SELECT id FROM second_ns WHERE id < 999 AND xxx IS NULL ORDER BY 'value' DESC LIMIT 10) = 0 " + "ORDER BY 'tree'", + Query{"main_ns"} + .Where(Query{"second_ns"} + .Select({"id"}) + .Where("id", CondLt, 999) + .Where("xxx", CondEmpty, VariantArray{}) + .Limit(10) + .Sort("value", true), + CondEq, 0) + .Sort("tree", false)}, + {"SELECT * FROM main_ns " + "WHERE ((SELECT id FROM second_ns WHERE id < 999 AND xxx IS NULL ORDER BY 'value' DESC LIMIT 10) = 0) " + "ORDER BY 'tree'", + Query{"main_ns"} + .OpenBracket() + .Where(Query{"second_ns"} + .Select({"id"}) + .Where("id", CondLt, 999) + .Where("xxx", CondEmpty, VariantArray{}) + .Limit(10) + .Sort("value", true), + CondEq, 0) + .CloseBracket() + .Sort("tree", false)}, + {"SELECT * FROM main_ns " + "WHERE INNER JOIN (SELECT * FROM second_ns WHERE NOT val = 10) ON main_ns.id = second_ns.uid " + "AND id IN (SELECT id FROM third_ns WHERE id < 999) " + "AND INNER JOIN (SELECT * FROM fourth_ns WHERE val IS NOT NULL OFFSET 2 LIMIT 1) ON main_ns.uid = fourth_ns.id", + Query{"main_ns"} + .InnerJoin("id", "uid", CondEq, Query("second_ns").Not().Where("val", CondEq, 10)) + .Where("id", CondSet, Query{"third_ns"}.Select({"id"}).Where("id", CondLt, 999)) + .InnerJoin("uid", "id", CondEq, Query("fourth_ns").Where("val", CondAny, VariantArray{}).Limit(1).Offset(2))}, + {"SELECT * FROM main_ns " + "WHERE INNER JOIN (SELECT * FROM second_ns WHERE NOT val = 10 OFFSET 2 LIMIT 1) ON main_ns.id = second_ns.uid " + "AND id IN (SELECT id FROM third_ns WHERE id < 999) " + "LEFT JOIN (SELECT * FROM fourth_ns WHERE val IS NOT NULL) ON main_ns.uid = fourth_ns.id", + Query{"main_ns"} + .InnerJoin("id", "uid", CondEq, Query("second_ns").Not().Where("val", CondEq, 10).Limit(1).Offset(2)) + .Where("id", CondSet, Query{"third_ns"}.Select({"id"}).Where("id", CondLt, 999)) + .LeftJoin("uid", "id", CondEq, Query("fourth_ns").Where("val", CondAny, VariantArray{}))}, + {"SELECT * FROM main_ns " + "WHERE id IN (SELECT id FROM third_ns WHERE id < 999 OFFSET 7 LIMIT 5) " + "LEFT JOIN (SELECT * FROM second_ns WHERE NOT val = 10 OFFSET 2 LIMIT 1) ON main_ns.id = second_ns.uid " + "LEFT JOIN (SELECT * FROM fourth_ns WHERE val IS NOT NULL) ON main_ns.uid = fourth_ns.id", + Query{"main_ns"} + .LeftJoin("id", "uid", CondEq, Query("second_ns").Not().Where("val", CondEq, 10).Limit(1).Offset(2)) + .Where("id", CondSet, Query{"third_ns"}.Select({"id"}).Where("id", CondLt, 999).Limit(5).Offset(7)) + .LeftJoin("uid", "id", CondEq, Query("fourth_ns").Where("val", CondAny, VariantArray{}))}, }; for (const auto& [sql, expected, direction] : cases) { diff --git a/cpp_src/gtests/tests/unit/string_function_test.cc b/cpp_src/gtests/tests/unit/string_function_test.cc index 441a1e29c..62ad2cea4 100644 --- a/cpp_src/gtests/tests/unit/string_function_test.cc +++ b/cpp_src/gtests/tests/unit/string_function_test.cc @@ -1,4 +1,12 @@ +#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #include +#pragma GCC diagnostic pop +#else // REINDEX_WITH_ASAN +#include +#endif // REINDEX_WITH_ASAN + #include "gtest/gtest.h" #include "reindexer_api.h" #include "tools/customlocal.h" diff --git a/cpp_src/readme.md b/cpp_src/readme.md index b496cca9e..dba0b23d3 100644 --- a/cpp_src/readme.md +++ b/cpp_src/readme.md @@ -46,18 +46,29 @@ yum update yum install reindexer-server ``` -Available distros: `centos-7`, `fedora-38`, `fedora-39`, `redos-7` +Available distros: `centos-7`, `fedora-38`, `fedora-39`. ### Ubuntu/Debian ```bash -curl https://repo.reindexer.io/RX-KEY.GPG | apt-key add +wget https://repo.reindexer.io/RX-KEY.GPG -O /etc/apt/trusted.gpg.d/reindexer.asc echo "deb https://repo.reindexer.io/ /" >> /etc/apt/sources.list apt update apt install reindexer-server ``` -Available distros: `debian-bookworm`, `debian-bullseye`, `ubuntu-bionic`, `ubuntu-focal`, `ubuntu-jammy` +Available distros: `debian-bookworm`, `debian-bullseye`, `ubuntu-focal`, `ubuntu-jammy` + +### Redos + +```bash +rpm --import https://repo.reindexer.io/RX-KEY.GPG +dnf config-manager --add-repo https://repo.reindexer.io//x86_64/ +dnf update +dnf install reindexer-server +``` + +Available distros: `redos-7`. ## OSX brew @@ -110,7 +121,7 @@ service start reindexer ## HTTP REST API The simplest way to use reindexer with any program language - is using REST API. The -[complete REST API documentation is here](server/contrib/server.md). +[complete REST API documentation is here](server/contrib/server.md). [Or explore interactive version of Reindexer's swagger documentation](https://editor.swagger.io/?url=https://raw.githubusercontent.com/Restream/reindexer/master/cpp_src/server/contrib/server.yml) ## GRPC API diff --git a/cpp_src/server/CMakeLists.txt b/cpp_src/server/CMakeLists.txt index 9cf7e2b4e..f605f12ef 100644 --- a/cpp_src/server/CMakeLists.txt +++ b/cpp_src/server/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.0) project(reindexer_server_library) set (SWAGGER_VERSION "2.x") -set (GH_FACE_VERSION "3.21.0") +set (GH_FACE_VERSION "3.22.0") set (GH_FACE_TAG "v${GH_FACE_VERSION}") set (TARGET reindexer_server_library) set (SERVER_LIB_DIR ${PROJECT_BINARY_DIR} PARENT_SCOPE) diff --git a/cpp_src/server/contrib/CMakeLists.txt b/cpp_src/server/contrib/CMakeLists.txt index a162877a8..039475da5 100644 --- a/cpp_src/server/contrib/CMakeLists.txt +++ b/cpp_src/server/contrib/CMakeLists.txt @@ -40,6 +40,6 @@ if(python3) WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/server/contrib COMMENT "Generate query.json.h" ) - add_custom_target(query_json DEPENDS ${QUERY_SCHEMA}) + add_custom_target(query_json ALL DEPENDS ${QUERY_SCHEMA}) endif() endif () diff --git a/cpp_src/server/contrib/server.md b/cpp_src/server/contrib/server.md index 856261b09..a80689532 100644 --- a/cpp_src/server/contrib/server.md +++ b/cpp_src/server/contrib/server.md @@ -77,6 +77,7 @@ * [FulltextConfig](#fulltextconfig) * [FulltextFieldConfig](#fulltextfieldconfig) * [FulltextSynonym](#fulltextsynonym) + * [StopWordObject](#stopwordobject) * [Index](#index) * [IndexCacheMemStats](#indexcachememstats) * [IndexMemStat](#indexmemstat) @@ -2195,6 +2196,7 @@ Query execution explainings |**selectors**
*optional*|Filter selectors, used to proccess query conditions|< [selectors](#explaindef-selectors) > array| |**sort_by_uncommitted_index**
*optional*|Optimization of sort by uncompleted index has been performed|boolean| |**sort_index**
*optional*|Index, which used for sort results|string| +|**subqueries**
*optional*|Explain of subqueries preselect|< [subqueries](#explaindef-subqueries) > array| |**total_us**
*optional*|Total query execution time|integer| @@ -2231,6 +2233,7 @@ Query execution explainings |---|---|---| |**comparators**
*optional*|Count of comparators used, for this selector|integer| |**cost**
*optional*|Cost expectation of this selector|integer| +|**description**
*optional*|Description of the selector|string| |**explain_preselect**
*optional*|Preselect in joined namespace execution explainings|[ExplainDef](#explaindef)| |**explain_select**
*optional*|One of selects in joined namespace execution explainings|[ExplainDef](#explaindef)| |**field**
*optional*|Field or index name|string| @@ -2239,6 +2242,17 @@ Query execution explainings |**keys**
*optional*|Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching|integer| |**matched**
*optional*|Count of processed documents, matched this selector|integer| |**method**
*optional*|Method, used to process condition|enum (scan, index, inner_join, left_join)| +|**type**
*optional*|Type of the selector|string| + + +**subqueries** + +|Name|Description|Schema| +|---|---|---| +|**explain**
*optional*|Explain of the subquery's preselect|[ExplainDef](#explaindef)| +|**field**
*optional*|Name of field being compared with the subquery's result|string| +|**keys**
*optional*|Count of keys being compared with the subquery's result|integer| +|**namespace**
*optional*|Subquery's namespace name|string| @@ -2295,7 +2309,7 @@ Fulltext Index configuration |**position_boost**
*optional*|Boost of search query term position
**Default** : `1.0`
**Minimum value** : `0`
**Maximum value** : `10`|number (float)| |**position_weight**
*optional*|Weight of search query term position in final rank. 0: term position will not change final rank. 1: term position will affect to final rank in 0 - 100% range
**Default** : `0.1`
**Minimum value** : `0`
**Maximum value** : `1`|number (float)| |**stemmers**
*optional*|List of stemmers to use|< string > array| -|**stop_words**
*optional*|List of stop words. Words from this list will be ignored in documents and queries|< string > array| +|**stop_words**
*optional*|List of objects of stop words. Words from this list will be ignored when building indexes. |< [StopWordObject](#stopwordobject) > array| |**sum_ranks_by_fields_ratio**
*optional*|Ratio to summation of ranks of match one term in several fields. For example, if value of this ratio is K, request is '@+f1,+f2,+f3 word', ranks of match in fields are R1, R2, R3 and R2 < R1 < R3, final rank will be R = R2 + K*R1 + K*K*R3
**Default** : `0.0`
**Minimum value** : `0`
**Maximum value** : `1`|number (float)| |**synonyms**
*optional*|List of synonyms for replacement|< [FulltextSynonym](#fulltextsynonym) > array| |**term_len_boost**
*optional*|Boost of search query term length
**Default** : `1.0`
**Minimum value** : `0`
**Maximum value** : `10`|number (float)| @@ -2356,6 +2370,13 @@ Fulltext synonym definition +### StopWordObject +Stop word object definition +|Name|Description|Schema| +|---|---|---| +|**word**
*optional*|Stop word|string| +|**is_morpheme**
*optional*|If the value is true, the word can be included in search results in queries such as 'word*', 'word~' etc.|boolean| + ### Index |Name|Description|Schema| diff --git a/cpp_src/server/contrib/server.yml b/cpp_src/server/contrib/server.yml index f4f66051a..146540990 100644 --- a/cpp_src/server/contrib/server.yml +++ b/cpp_src/server/contrib/server.yml @@ -2779,6 +2779,17 @@ definitions: description: "Descent or ascent sorting direction" type: boolean + FtStopWordObject: + type: object + properties: + word: + description: "Stop word" + type: string + is_morpheme: + type: boolean + description: "If the value is true, the word can be included in search results in queries such as 'word*', 'word~' etc." + default: false + FulltextConfig: type: object description: "Fulltext Index configuration" @@ -2817,9 +2828,9 @@ definitions: description: "List of symbols, which will be threated as word part, all other symbols will be thrated as wors separators" stop_words: type: array - description: "List of stop words. Words from this list will be ignored in documents and queries" + description: "List of objects of stop words. Words from this list will be ignored when building indexes" items: - type: string + $ref: "#/definitions/FtStopWordObject" stemmers: type: array default: ["en","ru"] @@ -3299,6 +3310,12 @@ definitions: keys: type: integer description: "Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching" + type: + type: string + description: "Type of the selector" + description: + type: string + description: "Description of the selector" explain_preselect: description: "Preselect in joined namespace execution explainings" $ref: "#/definitions/ExplainDef" @@ -3366,6 +3383,24 @@ definitions: values_count: type: integer description: resulting size of query values set + subqueries: + type: array + description: "Explain of subqueries preselect" + items: + type: object + properties: + namespace: + type: string + description: "Subquery's namespace name" + keys: + type: integer + description: "Count of keys being compared with the subquery's result" + field: + type: string + description: "Name of field being compared with the subquery's result" + explain: + description: "Explain of the subquery's preselect" + $ref: "#/definitions/ExplainDef" AggregationResDef: diff --git a/cpp_src/server/httpserver.cc b/cpp_src/server/httpserver.cc index 87cd16189..f0a2c65cd 100644 --- a/cpp_src/server/httpserver.cc +++ b/cpp_src/server/httpserver.cc @@ -22,6 +22,7 @@ #include "resources_wrapper.h" #include "statscollect/istatswatcher.h" #include "statscollect/prometheus.h" +#include "tools/alloc_ext/je_malloc_extension.h" #include "tools/alloc_ext/tc_malloc_extension.h" #include "tools/flagguard.h" #include "tools/fsops.h" diff --git a/cpp_src/tools/json2kv.cc b/cpp_src/tools/json2kv.cc index c693796a8..e7a7f1a15 100644 --- a/cpp_src/tools/json2kv.cc +++ b/cpp_src/tools/json2kv.cc @@ -32,7 +32,9 @@ Variant jsonValue2Variant(const gason::JsonValue &v, KeyValueType t, std::string -> Variant { throw Error(errLogic, "Error parsing json field '%s' - got number, expected %s", fieldName, t.Name()); }); case gason::JSON_STRING: return t.EvaluateOneOf( - [&](OneOf) { return Variant(p_string(json_string_ftr{v.sval.ptr})); }, + [&](OneOf) { + return Variant(p_string(json_string_ftr{v.sval.ptr}), Variant::no_hold_t{}); + }, [&](KeyValueType::Uuid) { return Variant{Uuid{v.toString()}}; }, [&](OneOf) -> Variant { @@ -59,7 +61,7 @@ Variant jsonValue2Variant(const gason::JsonValue &v, KeyValueType t, std::string [](KeyValueType::Double) noexcept { return Variant(0.0); }, [](KeyValueType::Bool) noexcept { return Variant(false); }, [](KeyValueType::Int) noexcept { return Variant(0); }, [](KeyValueType::Int64) noexcept { return Variant(static_cast(0)); }, - [](KeyValueType::String) { return Variant(p_string(static_cast(nullptr))); }, + [](KeyValueType::String) { return Variant(static_cast(nullptr)); }, [](KeyValueType::Uuid) noexcept { return Variant{Uuid{}}; }, [&](OneOf) -> Variant { throw Error(errLogic, "Error parsing json field '%s' - got null, expected %s", fieldName, t.Name()); diff --git a/cpp_src/tools/serializer.h b/cpp_src/tools/serializer.h index f370263ce..91d57bb3e 100644 --- a/cpp_src/tools/serializer.h +++ b/cpp_src/tools/serializer.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "core/cjson/ctag.h" #include "core/keyvalue/uuid.h" diff --git a/cpp_src/tools/stringstools.cc b/cpp_src/tools/stringstools.cc index dbf99ecf1..9c8f72b45 100644 --- a/cpp_src/tools/stringstools.cc +++ b/cpp_src/tools/stringstools.cc @@ -9,7 +9,6 @@ #include "estl/fast_hash_map.h" #include "itoa/itoa.h" #include "tools/assertrx.h" -#include "tools/customlocal.h" #include "tools/randomgenerator.h" #include "tools/stringstools.h" #include "utf8cpp/utf8.h" @@ -236,24 +235,6 @@ std::pair calcUtf8BeforeDelims(const char *str, int pos, size_t return std::make_pair(str + pos - ptr, charCounter); } -void check_for_replacement(wchar_t &ch) { - if (ch == 0x451) { // 'ё' - ch = 0x435; // 'е' - } -} - -void check_for_replacement(uint32_t &ch) { - if (ch == 0x451) { // 'ё' - ch = 0x435; // 'е' - } -} - -bool is_number(std::string_view str) { - uint16_t i = 0; - while ((i < str.length() && IsDigit(str[i]))) i++; - return (i && i == str.length()); -} - void split(std::string_view str, std::string &buf, std::vector &words, const std::string &extraWordSymbols) { // assuming that the 'ToLower' function and the 'check for replacement' function should not change the character size in bytes buf.resize(str.length()); @@ -415,7 +396,7 @@ template bool checkIfEndsWith(std::string_view pattern, std: template bool checkIfEndsWith(std::string_view pattern, std::string_view src) noexcept; template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) noexcept { auto itl = lhs.begin(); auto itr = rhs.begin(); @@ -437,11 +418,11 @@ int collateCompare(std::string_view lhs, std::string_view rhs, con } template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) noexcept { auto itl = lhs.data(); auto itr = rhs.data(); - for (; itl != lhs.data() + lhs.size() && itr != rhs.size() + rhs.data();) { + for (auto lhsEnd = lhs.data() + lhs.size(), rhsEnd = rhs.size() + rhs.data(); itl != lhsEnd && itr != rhsEnd;) { auto chl = ToLower(utf8::unchecked::next(itl)); auto chr = ToLower(utf8::unchecked::next(itr)); @@ -458,7 +439,7 @@ int collateCompare(std::string_view lhs, std::string_view rhs, cons } template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) noexcept { char *posl = nullptr; char *posr = nullptr; @@ -478,7 +459,7 @@ int collateCompare(std::string_view lhs, std::string_view rhs, c } template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &sortOrderTable) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &sortOrderTable) noexcept { auto itl = lhs.data(); auto itr = rhs.data(); @@ -502,7 +483,7 @@ int collateCompare(std::string_view lhs, std::string_view rhs, co } template <> -int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) { +int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable &) noexcept { size_t l1 = lhs.size(); size_t l2 = rhs.size(); int res = memcmp(lhs.data(), rhs.data(), std::min(l1, l2)); @@ -711,7 +692,7 @@ Error getBytePosInMultilineString(std::string_view str, const size_t line, const } if ((currLine == line) && (charPos == currCharPos)) { bytePos = it - str.begin() - 1; - return errOK; + return Error(); } return Error(errNotValid, "Wrong cursor position: line=%d, pos=%d", line, charPos); } diff --git a/cpp_src/tools/stringstools.h b/cpp_src/tools/stringstools.h index 97c451577..b162ee94a 100644 --- a/cpp_src/tools/stringstools.h +++ b/cpp_src/tools/stringstools.h @@ -10,6 +10,7 @@ #include "core/indexopts.h" #include "core/type_consts.h" #include "tools/customhash.h" +#include "tools/customlocal.h" #include "tools/errors.h" namespace reindexer { @@ -104,18 +105,18 @@ template [[nodiscard]] Pos wordToByteAndCharPos(std::string_view str, int wordPosition, const std::string& extraWordSymbols); template -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable& sortOrderTable); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable& sortOrderTable) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; template <> -[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&); -[[nodiscard]] inline int collateCompare(std::string_view lhs, std::string_view rhs, const CollateOpts& collateOpts) { +[[nodiscard]] int collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept; +[[nodiscard]] inline int collateCompare(std::string_view lhs, std::string_view rhs, const CollateOpts& collateOpts) noexcept { switch (collateOpts.mode) { case CollateASCII: return collateCompare(lhs, rhs, collateOpts.sortOrderTable); @@ -136,9 +137,18 @@ std::string utf16_to_utf8(const std::wstring& src); std::wstring& utf8_to_utf16(std::string_view src, std::wstring& dst); std::string& utf16_to_utf8(const std::wstring& src, std::string& dst); -void check_for_replacement(wchar_t& ch); -void check_for_replacement(uint32_t& ch); -bool is_number(std::string_view str); +inline void check_for_replacement(wchar_t& ch) noexcept { + ch = (ch == 0x451) ? 0x435 : ch; // 'ё' -> 'е' +} +inline void check_for_replacement(uint32_t& ch) noexcept { + ch = (ch == 0x451) ? 0x435 : ch; // 'ё' -> 'е' +} +inline bool is_number(std::string_view str) noexcept { + uint16_t i = 0; + for (; (i < str.length() && IsDigit(str[i])); ++i) + ; + return (i && i == str.length()); +} int fast_strftime(char* buf, const tm* tm); std::string urldecode2(std::string_view str); diff --git a/cpp_src/vendor/cpp-btree/btree.h b/cpp_src/vendor/cpp-btree/btree.h index 38f2cad9c..49a80f0b7 100644 --- a/cpp_src/vendor/cpp-btree/btree.h +++ b/cpp_src/vendor/cpp-btree/btree.h @@ -719,10 +719,10 @@ struct btree_iterator { typedef btree_iterator const_iterator; typedef btree_iterator self_type; - btree_iterator() : node(NULL), position(-1) {} - btree_iterator(Node *n, int p) : node(n), position(p) {} - btree_iterator(const iterator &x) : node(x.node), position(x.position) {} - btree_iterator &operator=(const iterator &x) { + btree_iterator() noexcept : node(NULL), position(-1) {} + btree_iterator(Node *n, int p) noexcept : node(n), position(p) {} + btree_iterator(const iterator &x) noexcept : node(x.node), position(x.position) {} + btree_iterator &operator=(const iterator &x) noexcept { if (reinterpret_cast(this) != &x) { node = x.node; position = x.position; @@ -731,45 +731,45 @@ struct btree_iterator { } // Increment/decrement the iterator. - void increment() { + void increment() noexcept { if (node->leaf() && ++position < node->count()) { return; } increment_slow(); } - void increment_by(int count); - void increment_slow(); + void increment_by(int count) noexcept; + void increment_slow() noexcept; - void decrement() { + void decrement() noexcept { if (node->leaf() && --position >= 0) { return; } decrement_slow(); } - void decrement_slow(); + void decrement_slow() noexcept; - bool operator==(const const_iterator &x) const { return node == x.node && position == x.position; } - bool operator!=(const const_iterator &x) const { return node != x.node || position != x.position; } + bool operator==(const const_iterator &x) const noexcept { return node == x.node && position == x.position; } + bool operator!=(const const_iterator &x) const noexcept { return node != x.node || position != x.position; } // Accessors for the key/value the iterator is pointing at. - const key_type &key() const { return node->key(position); } - reference operator*() const { return node->value(position); } - pointer operator->() const { return &node->value(position); } + const key_type &key() const noexcept { return node->key(position); } + reference operator*() const noexcept { return node->value(position); } + pointer operator->() const noexcept { return &node->value(position); } - self_type &operator++() { + self_type &operator++() noexcept { increment(); return *this; } - self_type &operator--() { + self_type &operator--() noexcept { decrement(); return *this; } - self_type operator++(int) { + self_type operator++(int) noexcept { self_type tmp = *this; ++*this; return tmp; } - self_type operator--(int) { + self_type operator--(int) noexcept { self_type tmp = *this; --*this; return tmp; @@ -1534,7 +1534,7 @@ void btree_node

::swap(btree_node *x) { //// // btree_iterator methods template -void btree_iterator::increment_slow() { +void btree_iterator::increment_slow() noexcept { if (node->leaf()) { assertrx(position >= node->count()); self_type save(*this); @@ -1557,7 +1557,7 @@ void btree_iterator::increment_slow() { } template -void btree_iterator::increment_by(int count) { +void btree_iterator::increment_by(int count) noexcept { while (count > 0) { if (node->leaf()) { int rest = node->count() - position; @@ -1574,7 +1574,7 @@ void btree_iterator::increment_by(int count) { } template -void btree_iterator::decrement_slow() { +void btree_iterator::decrement_slow() noexcept { if (node->leaf()) { assertrx(position <= -1); self_type save(*this); diff --git a/cpp_src/vendor/cpp-btree/btree_container.h b/cpp_src/vendor/cpp-btree/btree_container.h index 94469d201..759db216e 100644 --- a/cpp_src/vendor/cpp-btree/btree_container.h +++ b/cpp_src/vendor/cpp-btree/btree_container.h @@ -53,14 +53,14 @@ class btree_container { btree_container(const self_type &x) : tree_(x.tree_) {} // Iterator routines. - iterator begin() { return tree_.begin(); } - const_iterator begin() const { return tree_.begin(); } - iterator end() { return tree_.end(); } - const_iterator end() const { return tree_.end(); } - reverse_iterator rbegin() { return tree_.rbegin(); } - const_reverse_iterator rbegin() const { return tree_.rbegin(); } - reverse_iterator rend() { return tree_.rend(); } - const_reverse_iterator rend() const { return tree_.rend(); } + iterator begin() noexcept(noexcept(std::declval().begin())) { return tree_.begin(); } + const_iterator begin() const noexcept(noexcept(std::declval().begin())) { return tree_.begin(); } + iterator end() noexcept(noexcept(std::declval().end())) { return tree_.end(); } + const_iterator end() const noexcept(noexcept(std::declval().end())) { return tree_.end(); } + reverse_iterator rbegin() noexcept(noexcept(std::declval().rbegin())) { return tree_.rbegin(); } + const_reverse_iterator rbegin() const noexcept(noexcept(std::declval().rbegin())) { return tree_.rbegin(); } + reverse_iterator rend() noexcept(noexcept(std::declval().rend())) { return tree_.rend(); } + const_reverse_iterator rend() const noexcept(noexcept(std::declval().rend())) { return tree_.rend(); } // Lookup routines. iterator lower_bound(const key_type &key) { return tree_.lower_bound(key); } @@ -102,18 +102,18 @@ class btree_container { void verify() const { tree_.verify(); } // Size routines. - size_type size() const { return tree_.size(); } - size_type max_size() const { return tree_.max_size(); } - bool empty() const { return tree_.empty(); } - size_type height() const { return tree_.height(); } - size_type internal_nodes() const { return tree_.internal_nodes(); } - size_type leaf_nodes() const { return tree_.leaf_nodes(); } - size_type nodes() const { return tree_.nodes(); } - size_type bytes_used() const { return tree_.bytes_used(); } - static double average_bytes_per_value() { return Tree::average_bytes_per_value(); } - double fullness() const { return tree_.fullness(); } - double overhead() const { return tree_.overhead(); } - const key_compare &key_comp() const { return tree_.key_comp(); } + size_type size() const noexcept(noexcept(std::declval().size())) { return tree_.size(); } + size_type max_size() const noexcept(noexcept(std::declval().max_size())) { return tree_.max_size(); } + bool empty() const noexcept(noexcept(std::declval().empty())) { return tree_.empty(); } + size_type height() const noexcept(noexcept(std::declval().height())) { return tree_.height(); } + size_type internal_nodes() const noexcept(noexcept(std::declval().internal_nodes())) { return tree_.internal_nodes(); } + size_type leaf_nodes() const noexcept(noexcept(std::declval().leaf_nodes())) { return tree_.leaf_nodes(); } + size_type nodes() const noexcept(noexcept(std::declval().nodes())) { return tree_.nodes(); } + size_type bytes_used() const noexcept(noexcept(std::declval().bytes_used())) { return tree_.bytes_used(); } + static double average_bytes_per_value() noexcept(noexcept(Tree::average_bytes_per_value())) { return Tree::average_bytes_per_value(); } + double fullness() const noexcept(noexcept(std::declval().fullness())) { return tree_.fullness(); } + double overhead() const noexcept(noexcept(std::declval().overhead())) { return tree_.overhead(); } + const key_compare &key_comp() const noexcept(noexcept(std::declval().key_comp())) { return tree_.key_comp(); } bool operator==(const self_type &x) const { if (size() != x.size()) { @@ -315,4 +315,4 @@ class btree_multi_container : public btree_container { } // namespace btree -#endif // UTIL_BTREE_BTREE_CONTAINER_H__ +#endif // UTIL_BTREE_BTREE_CONTAINER_H__ diff --git a/cpp_src/vendor/prometheus/family.h b/cpp_src/vendor/prometheus/family.h index 5c59122d0..6992bc161 100644 --- a/cpp_src/vendor/prometheus/family.h +++ b/cpp_src/vendor/prometheus/family.h @@ -157,7 +157,7 @@ T& Family::Add(std::map&& labels, int64_t epoch, Ar auto metrics_iter = metrics_.find(hash); if (metrics_iter != metrics_.end()) { -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(WITH_STDLIB_DEBUG) auto labels_iter = labels_.find(hash); assertrx(labels_iter != labels_.end()); const auto& old_labels = labels_iter->second; @@ -166,7 +166,7 @@ T& Family::Add(std::map&& labels, int64_t epoch, Ar metrics_iter->second.epoch = epoch; return *metrics_iter->second.ptr; } else { -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(WITH_STDLIB_DEBUG) for (auto& label_pair : labels) { auto& label_name = label_pair.first; assertrx(CheckLabelName(label_name)); diff --git a/cpp_src/vendor/prometheus/impl/check_names.cc b/cpp_src/vendor/prometheus/impl/check_names.cc index 0aabbc88c..6a800ce31 100644 --- a/cpp_src/vendor/prometheus/impl/check_names.cc +++ b/cpp_src/vendor/prometheus/impl/check_names.cc @@ -1,15 +1,21 @@ #include "prometheus/check_names.h" -#include - #if defined(__GLIBCXX__) && __GLIBCXX__ <= 20150623 #define STD_REGEX_IS_BROKEN #endif +#if defined(__GNUC__) && (__GNUC__ == 12) && (__GNUC_MINOR__ == 2) && defined(REINDEX_WITH_ASAN) +// regex header is broken in GCC 12.2 with ASAN +#define STD_REGEX_IS_BROKEN +#endif #if defined(_MSC_VER) && _MSC_VER < 1900 #define STD_REGEX_IS_BROKEN #endif +#ifndef STD_REGEX_IS_BROKEN +#include +#endif + namespace prometheus { bool CheckMetricName(const std::string& name) { // see https://prometheus.io/docs/concepts/data_model/ diff --git a/cpp_src/vendor/spdlog/details/os.h b/cpp_src/vendor/spdlog/details/os.h index cf8501181..a3f05d782 100644 --- a/cpp_src/vendor/spdlog/details/os.h +++ b/cpp_src/vendor/spdlog/details/os.h @@ -247,8 +247,9 @@ inline size_t filesize(FILE *f) #else // unix int fd = fileno(f); - //64 bits(but not in osx or cygwin, where fstat64 is deprecated) -#if !defined(__FreeBSD__) && !defined(__APPLE__) && (defined(__x86_64__) || defined(__ppc64__)) && !defined(__CYGWIN__) + // 64 bits(but not in osx, linux/musl or cygwin, where fstat64 is deprecated) +#if ((defined(__linux__) && defined(__GLIBC__)) || defined(__sun) || defined(_AIX)) && \ + (defined(__LP64__) || defined(_LP64)) struct stat64 st ; if (fstat64(fd, &st) == 0) return static_cast(st.st_size); diff --git a/dependencies.sh b/dependencies.sh index a6427bd3b..10e3d35f7 100755 --- a/dependencies.sh +++ b/dependencies.sh @@ -37,7 +37,7 @@ almalinux9_rpms="gcc-c++ make snappy-devel leveldb-devel gperftools-devel findut fedora_debs=" gcc-c++ make snappy-devel leveldb-devel gperftools-devel findutils curl tar unzip rpm-build rpmdevtools git" centos7_debs="centos-release-scl devtoolset-9-gcc devtoolset-9-gcc-c++ make snappy-devel leveldb-devel gperftools-devel findutils curl tar unzip rpm-build rpmdevtools git" debian_debs="build-essential g++ libunwind-dev libgoogle-perftools-dev libsnappy-dev libleveldb-dev make curl unzip git" -alpine_apks="g++ snappy-dev leveldb-dev libexecinfo-dev make curl cmake unzip git" +alpine_apks="g++ snappy-dev leveldb-dev libunwind-dev make curl cmake unzip git" arch_pkgs="gcc snappy leveldb make curl cmake unzip git" redos_rpms="gcc gcc-c++ make snappy-devel leveldb-devel gperftools-devel findutils curl tar unzip git cmake rpm-build python-srpm-macros" @@ -237,7 +237,7 @@ install_alpine() { if [ $? -eq 0 ]; then success_msg "Package '$pkg' was installed successfully." else - error_msg "Could not install '$pkg' package. Try 'apt-get update && apt-get install $pkg'" && return 1 + error_msg "Could not install '$pkg' package. Try 'apk update && apk add $pkg'" && return 1 fi fi done diff --git a/dsl/dsl.go b/dsl/dsl.go index e110dde15..bf4945939 100644 --- a/dsl/dsl.go +++ b/dsl/dsl.go @@ -80,13 +80,13 @@ type Sort struct { } type Filter struct { - Op string `json:"op,omitempty"` - Field string `json:"field,omitempty"` - Joined *JoinQuery `json:"join_query,omitempty"` - SubQ *SubQuery `json:"subquery,omitempty"` - Cond string `json:"cond,omitempty"` - Value interface{} `json:"value,omitempty"` - Filters []Filter `json:"filters,omitempty"` + Op string `json:"Op,omitempty"` + Field string `json:"Field,omitempty"` + Joined *JoinQuery `json:"Join_Query,omitempty"` + SubQ *SubQuery `json:"Subquery,omitempty"` + Cond string `json:"Cond,omitempty"` + Value interface{} `json:"Value,omitempty"` + Filters []Filter `json:"Filters,omitempty"` } type JoinOnCondition struct { diff --git a/ftfastconfig.go b/ftfastconfig.go index 2eafbea2d..d55dfccb5 100644 --- a/ftfastconfig.go +++ b/ftfastconfig.go @@ -41,7 +41,6 @@ type FtTyposDetailedConfig struct { MaxExtraLetters int `json:"max_extra_letters"` } - type FtBaseRanking struct { // Relevancy of full word match // Values range: [0,500] @@ -81,6 +80,11 @@ type FtBaseRanking struct { Synonyms int `json:"synonyms_proc"` } +type StopWord struct { + Word string `json:"word"` + IsMorpheme bool `json:"is_morpheme"` +} + // FtFastConfig configurarion of FullText search index type FtFastConfig struct { // boost of bm25 ranking. default value 1. @@ -139,8 +143,10 @@ type FtFastConfig struct { EnableTranslit bool `json:"enable_translit"` // Enable wrong keyboard layout variants processing EnableKbLayout bool `json:"enable_kb_layout"` - // List of stop words. Words from this list will be ignored in documents and queries - StopWords []string `json:"stop_words"` + // List of objects of stop words. Words from this list will be ignored when building indexes + // but can be included in search results in queries such as 'word*', 'word~' etc. if for the stop-word attribute is_morpheme is true. + // The list item can be either a reindexer.StopWord, or string + StopWords []interface{} `json:"stop_words"` // List of synonyms for replacement Synonyms []struct { // List source tokens in query, which will be replaced with alternatives @@ -201,8 +207,7 @@ func DefaultFtFastConfig() FtFastConfig { MaxTotalAreasToCache: -1, Optimization: "Memory", EnablePreselectBeforeFt: false, - FtBaseRankingConfig: &FtBaseRanking{FullMatch: 100, PrefixMin: 50, SuffixMin:10, Typo:85, TypoPenalty: 15, StemmerPenalty: 15, Kblayout: 90, Translit:90, Synonyms:95}, - + FtBaseRankingConfig: &FtBaseRanking{FullMatch: 100, PrefixMin: 50, SuffixMin: 10, Typo: 85, TypoPenalty: 15, StemmerPenalty: 15, Kblayout: 90, Translit: 90, Synonyms: 95}, } } diff --git a/ftfuzzyconfig.go b/ftfuzzyconfig.go index 570970a54..223faed4b 100644 --- a/ftfuzzyconfig.go +++ b/ftfuzzyconfig.go @@ -39,15 +39,15 @@ type FtFuzzyConfig struct { EnableTranslit bool `json:"enable_translit"` // Enable wrong keyboard layout variants processing EnableKbLayout bool `json:"enable_kb_layout"` - // List of stop words. Words from this list will be ignored in documents and queries - StopWords []string `json:"stop_words"` + // List of objects of stop words. Words from this list will be ignored when building indexes + // but can be included in search results in queries such as 'word*', 'word~' etc. if for the stop-word attribute is_morpheme is true + StopWords []interface{} `json:"stop_words"` // Log level of full text search engine LogLevel int `json:"log_level"` // Extra symbols, which will be threated as parts of word to addition to letters and digits ExtraWordSymbols string `json:"extra_word_symbols"` // Config for subterm rank multiplier FtBaseRankingConfig *FtBaseRanking `json:"base_ranking,omitempty"` - } func DefaultFtFuzzyConfig() FtFuzzyConfig { @@ -69,6 +69,6 @@ func DefaultFtFuzzyConfig() FtFuzzyConfig { EnableKbLayout: true, LogLevel: 0, ExtraWordSymbols: "/-+", - FtBaseRankingConfig: &FtBaseRanking{FullMatch: 100, PrefixMin: 50, SuffixMin:10, Typo:85, TypoPenalty: 15, StemmerPenalty: 15, Kblayout: 90, Translit:90, Synonyms:95}, + FtBaseRankingConfig: &FtBaseRanking{FullMatch: 100, PrefixMin: 50, SuffixMin: 10, Typo: 85, TypoPenalty: 15, StemmerPenalty: 15, Kblayout: 90, Translit: 90, Synonyms: 95}, } } diff --git a/fulltext.md b/fulltext.md index 672ab7e52..7e081734c 100644 --- a/fulltext.md +++ b/fulltext.md @@ -6,11 +6,11 @@ Reindexer has builtin full text search engine. This document describes usage of - [Define full text index fields](#define-full-text-index-fields) - [Query to full text index](#query-to-full-text-index) - [Text query format](#text-query-format) - - [Patterns](#patterns) - - [Field selection](#field-selection) - - [Binary operators](#binary-operators) - - [Escape character](#escape-character) - - [Phrase search](#phrase-search) + - [Patterns](#patterns) + - [Field selection](#field-selection) + - [Binary operators](#binary-operators) + - [Escape character](#escape-character) + - [Phrase search](#phrase-search) - [Examples of text queris](#examples-of-text-queris) - [Natural language processing](#natural-language-processing) - [Merging queries results](#merging-queries-results) @@ -24,9 +24,10 @@ Reindexer has builtin full text search engine. This document describes usage of - [Performance and memory usage](#performance-and-memory-usage) - [Configuration](#configuration) - [Base config parameters](#base-config-parameters) + - [Stopwords details](#stopwords-details) - [Detailed typos config](#detailed-typos-config) - [Base ranking config](#base-ranking-config) - - [Limitations and know issues](#limitations-and-know-issues) + - [Limitations and know issues](#limitations-and-know-issues) ## LIKE @@ -34,17 +35,17 @@ Reindexer has builtin full text search engine. This document describes usage of For simple search in text can be used operator `LIKE`. It search strings which match a pattern. In the pattern `_` means any char and `%` means any sequence of chars. ``` - In Go: - query := db.Query("items"). - Where("field", reindexer.LIKE, "pattern") + In Go: + query := db.Query("items"). + Where("field", reindexer.LIKE, "pattern") - In SQL: - SELECT * FROM items WHERE fields LIKE 'pattern' + In SQL: + SELECT * FROM items WHERE fields LIKE 'pattern' ``` ``` - 'me_t' corresponds to 'meet', 'meat', 'melt' and so on - '%tion' corresponds to 'tion', 'condition', 'creation' and so on + 'me_t' corresponds to 'meet', 'meat', 'melt' and so on + '%tion' corresponds to 'tion', 'condition', 'creation' and so on ``` @@ -54,8 +55,8 @@ Full text search is performed in fields marked with `text` tag: ```go type Item struct { - ID int64 `reindex:"id,,pk"` - Description string `reindex:"description,text"` + ID int64 `reindex:"id,,pk"` + Description string `reindex:"description,text"` } ``` @@ -63,10 +64,10 @@ Full text search is also available for multiple fields of composite index marked ```go type Item struct { - ID int64 `reindex:"id,,pk"` - Name string `reindex:"name,-"` - Description string `reindex:"description,-"` - _ struct{} `reindex:"name+description=text_search,text,composite` + ID int64 `reindex:"id,,pk"` + Name string `reindex:"name,-"` + Description string `reindex:"description,-"` + _ struct{} `reindex:"name+description=text_search,text,composite` } ``` In this example full text index will include fields `name` and `description`,`text_search` is short alias of composite index name for using in Queries. @@ -78,22 +79,22 @@ Full text index is case insensitive. The source text is tokenized to set of word Queries to full text index are constructed by usual query interface ```go - query := db.Query ("items"). - Match ("name+description","text query","") + query := db.Query ("items"). + Match ("name+description","text query","") ``` Or equivalent query using name alias: ```go - query := db.Query ("items"). - Match ("text_search","text query","") + query := db.Query ("items"). + Match ("text_search","text query","") ``` Queries to full text index can be combined with conditions on another fields. e.g: ```go - query := db.Query ("items"). - Match ("description","text query"). - WhereInt("year",reindexer.GT,2010) + query := db.Query ("items"). + Match ("description","text query"). + WhereInt("year",reindexer.GT,2010) ``` Each result of query contains rank of match. Rank is integer from 0 to 255. 0 - lowest relevancy, 255 - best relevancy. The query Iterator has method `Rank()`, which returns rank of current result @@ -166,33 +167,37 @@ There are built in stemmers support in full text search. It enables natural lang It is possible to merge multiple queries results and sort final result by relevancy. ```go - query := db.Query ("items"). - Match ("description","text query1") - q2 := db.Query ("another_items"). - Match ("description","text query2") - query.Merge (q2) + query := db.Query ("items"). + Match ("description","text query1") + q2 := db.Query ("another_items"). + Match ("description","text query2") + query.Merge (q2) iterator = query.Exec () - // Check the error - if err := iterator.Error(); err != nil { - panic(err) - } - // Iterate over results - for iterator.Next() { - // Get the next document and cast it to a pointer - switch elem := iterator.Object().(type) { - case Item: - fmt.Printf ("%v,rank=%d\n",*elem,iterator.Rank()) - case AnotherItem: - fmt.Printf ("%v,rank=%d\n",*elem,iterator.Rank()) - } - } + // Check the error + if err := iterator.Error(); err != nil { + panic(err) + } + // Iterate over results + for iterator.Next() { + // Get the next document and cast it to a pointer + switch elem := iterator.Object().(type) { + case Item: + fmt.Printf ("%v,rank=%d\n",*elem,iterator.Rank()) + case AnotherItem: + fmt.Printf ("%v,rank=%d\n",*elem,iterator.Rank()) + } + } ``` ## Using select functions It is possible to use select functions to process result data. -For now you can use snippet, snippet_n and highlight. Those functions does not work for composite fulltext indexes. +For now you can use snippet, snippet_n and highlight. For composite indexes the result of the function will be written in to corresponding subfields. You can not put [,)\0] symbols in functions params. If the value contains special characters, it must be enclosed in single quotes. +Notice: although text indexes may be created over numeric fields, select functions can not be applied to any non-string field. + +For all the functions there are two types of supported syntax with the same behavior: `field.func_name(...)` and `field = func_name(...)`. + ### Highlight This functions just highlights text area that was found. It has two arguments - @@ -319,20 +324,20 @@ Several parameters of full text search engine can be configured from application ```go ... ftconfig := reindexer.DefaultFtFastConfig() - // Setup configuration - ftconfig.LogLevel = reindexer.TRACE - // Setup another parameters - // ... - // Create index definition - indexDef := reindexer.IndexDef { - Name: "description", - JSONPaths: []string{"description"}, - IndexType: "text", - FieldType: "string", - Config: ftconfig, - } - // Add index with configuration - return db.AddIndex ("items",indexDef) + // Setup configuration + ftconfig.LogLevel = reindexer.TRACE + // Setup another parameters + // ... + // Create index definition + indexDef := reindexer.IndexDef { + Name: "description", + JSONPaths: []string{"description"}, + IndexType: "text", + FieldType: "string", + Config: ftconfig, + } + // Add index with configuration + return db.AddIndex ("items",indexDef) ``` @@ -361,7 +366,7 @@ Several parameters of full text search engine can be configured from application | | Stemmers | []string | List of stemmers to use | "en","ru" | | | EnableTranslit | bool | Enable russian translit variants processing. e.g. term "luntik" will match word "лунтик" | true | | | EnableKbLayout | bool | Enable wrong keyboard layout variants processing. e.g. term "keynbr" will match word "лунтик" | true | -| | StopWords | []string | List of stop words. Words from this list will be ignored in documents and queries | | +| | StopWords | []struct | List of objects of stopwords. Words from this list will be ignored when building indexes, but may be used in fulltext queries (such as 'word*', 'word~' etc) and produce non-empty search results. [More...](#stopwords-details) | | | | SumRanksByFieldsRatio | float | Ratio of summation of ranks of match one term in several fields | 0.0 | | | LogLevel | int | Log level of full text search engine | 0 | | | FieldsCfg | []struct | Configs for certain fields. Overlaps parameters from main config. Contains parameters: FieldName, Bm25Boost, Bm25Weight, TermLenBoost, TermLenWeight, PositionBoost, PositionWeight. | empty | @@ -372,6 +377,42 @@ Several parameters of full text search engine can be configured from application | | Optimization | string | Optimize the index by 'memory' or by 'cpu' | "memory" | | | FtBaseRanking | struct | Relevance of the word in different forms | | + +### Stopwords details +The list item can be either a string or a structure containing a string (the stopword) and a bool attribute (`is_morpheme`) indicating whether the stopword can be part of a word that can be shown in query-results. +If the stopword is set as a string, then the `is_morpheme` attribute is `false` by default and following entries are equivalent: +```json +"StopWords":[ + { + "word": "some_word", + "is_morpheme": false + }, + ///... +] +``` +, +```json +"StopWords":[ + "some_word", + ///... +] +``` + +#### Example: +If the list of stopwords looks like this: +```json +"StopWords":[ + { + "word": "under", + "is_morpheme": true + }, + ///... +] +``` +and there are pair of documents containing this word: `{"...under the roof ..."}, {"... to understand and forgive..."}`. Then for the query 'under*' we will get as a result only document `{"... to understand and forgive..."}` and for the query 'under' we will get nothing as a result. + +If the "StopWords" section is not specified in the config, then the [default](./cpp_src/core/ft/stopwords/stop_en.cc) stopwords list will be used, and if it is explicitly specified empty, it means that there are no stopwords. + ### Detailed typos config FtTyposDetailedConfig: config for more precise typos algorithm tuning. diff --git a/iterator.go b/iterator.go index d70832764..9fd8d9165 100644 --- a/iterator.go +++ b/iterator.go @@ -15,11 +15,11 @@ import ( type ExplainSelector struct { // Field or index name - Field string `json:"field"` + Field string `json:"field,omitempty"` // Field type enum: indexed, non-indexed FieldType string `json:"field_type,omitempty"` // Method, used to process condition - Method string `json:"method"` + Method string `json:"method,omitempty"` // Number of uniq keys, processed by this selector (may be incorrect, in case of internal query optimization/caching Keys int `json:"keys"` // Count of comparators used, for this selector @@ -30,6 +30,8 @@ type ExplainSelector struct { Matched int `json:"matched"` // Count of scanned documents by this selector Items int `json:"items"` + Type string `json:"type,omitempty"` + Description string `json:"description,omitempty"` // Preselect in joined namespace execution explainings ExplainPreselect *ExplainResults `json:"explain_preselect,omitempty"` // One of selects in joined namespace execution explainings @@ -37,6 +39,13 @@ type ExplainSelector struct { Selectors []ExplainSelector `json:"selectors,omitempty"` } +type ExplainSubQuery struct { + Namespace string `json:"namespace"` + Explain ExplainResults `json:"explain"` + Keys int `json:"keys,omitempty"` + Field string `json:"field,omitempty"` +} + // ExplainResults presents query plan type ExplainResults struct { // Total query execution time @@ -61,6 +70,8 @@ type ExplainResults struct { Selectors []ExplainSelector `json:"selectors"` // Explaining attempts to inject Join queries ON-conditions into the Main Query WHERE clause OnConditionsInjections []ExplainJoinOnInjections `json:"on_conditions_injections,omitempty"` + // Explaining of subqueries' preselect + SubQueriesExplains []ExplainSubQuery `json:"subqueries,omitempty"` } // Describes the process of a single JOIN-query ON-conditions injection into the Where clause of a main query diff --git a/query.go b/query.go index c2f1be9cb..7a001df5d 100644 --- a/query.go +++ b/query.go @@ -555,20 +555,24 @@ func (q *Query) DWithin(index string, point Point, distance float64) *Query { return q } -func (q *Query) AggregateSum(field string) { +func (q *Query) AggregateSum(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggSum).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateAvg(field string) { +func (q *Query) AggregateAvg(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggAvg).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateMin(field string) { +func (q *Query) AggregateMin(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggMin).PutVarCUInt(1).PutVString(field) + return q } -func (q *Query) AggregateMax(field string) { +func (q *Query) AggregateMax(field string) *Query { q.ser.PutVarCUInt(queryAggregation).PutVarCUInt(AggMax).PutVarCUInt(1).PutVString(field) + return q } type AggregateFacetRequest struct { diff --git a/test/config_test.go b/test/config_test.go index 6857911b1..8a965e8c6 100644 --- a/test/config_test.go +++ b/test/config_test.go @@ -10,6 +10,18 @@ import ( "github.com/restream/reindexer/v3" ) +type FtConfCheck struct { + ID int `reindex:"id,,pk"` +} + +const ( + ftCfgNsName = "ft_cfg_check" +) + +func init() { + tnamespaces[ftCfgNsName] = FtConfCheck{} +} + func TestSetDefaultQueryDebug(t *testing.T) { t.Run("set debug level to exist ns config", func(t *testing.T) { ns := "ns_with_config" @@ -89,3 +101,78 @@ func TestSetDefaultQueryDebug(t *testing.T) { assert.True(t, found) }) } + +func TestFtConfigCompablitity(t *testing.T) { + config := reindexer.DefaultFtFastConfig() + + stopWordsStrs := append(make([]interface{}, 0), "под", "на", "из") + stopWordsObjs := append(make([]interface{}, 0), + reindexer.StopWord{ + Word: "пред", + IsMorpheme: true, + }, reindexer.StopWord{ + Word: "над", + IsMorpheme: true, + }, reindexer.StopWord{ + Word: "за", + IsMorpheme: false, + }) + + stopWordsMix := append(make([]interface{}, 0), + "под", + reindexer.StopWord{ + Word: "пред", + IsMorpheme: true, + }, + reindexer.StopWord{ + Word: "за", + IsMorpheme: false, + }, + "на", + reindexer.StopWord{ + Word: "над", + IsMorpheme: true, + }, + "из") + + checkFtConfigAfterAddIndex := func(index string) { + err := DB.AddIndex(ftCfgNsName, reindexer.IndexDef{ + Name: index, + JSONPaths: []string{index}, + Config: config, + IndexType: "text", + FieldType: "string", + }) + assert.NoError(t, err) + + item, err := DBD.Query(reindexer.NamespacesNamespaceName).Where("name", reindexer.EQ, ftCfgNsName).Exec().FetchOne() + assert.NoError(t, err) + + indexes := item.(*reindexer.NamespaceDescription).Indexes + ftConf := indexes[len(indexes)-1].Config.(map[string]interface{}) + + actual := ftConf["stop_words"].([]interface{}) + assert.Equal(t, len(actual), len(config.StopWords)) + + for idx, wordI := range config.StopWords { + switch wordI.(type) { + case string: + assert.Equal(t, wordI, actual[idx]) + case reindexer.StopWord: + word := wordI.(reindexer.StopWord) + assert.Equal(t, word.Word, actual[idx].(map[string]interface{})["word"]) + assert.Equal(t, word.IsMorpheme, actual[idx].(map[string]interface{})["is_morpheme"]) + } + } + } + + config.StopWords = stopWordsStrs + checkFtConfigAfterAddIndex("textStrs") + + config.StopWords = stopWordsObjs + checkFtConfigAfterAddIndex("textObjs") + + config.StopWords = stopWordsMix + checkFtConfigAfterAddIndex("textMix") + +} diff --git a/test/ft/fx.go b/test/ft/fx.go index f11e505d4..1ed7da775 100644 --- a/test/ft/fx.go +++ b/test/ft/fx.go @@ -37,7 +37,7 @@ func createReindexDbInstance(rx *reindexer.Reindexer, namespace string, indexTyp if indexType == "fuzzytext" { // Disable non exact searchers, disable stop word dictionat cfg := reindexer.DefaultFtFuzzyConfig() - cfg.StopWords = []string{} + cfg.StopWords = make([]interface{}, 0) cfg.Stemmers = []string{} cfg.EnableKbLayout = false cfg.EnableTranslit = false @@ -47,7 +47,7 @@ func createReindexDbInstance(rx *reindexer.Reindexer, namespace string, indexTyp config = cfg } else { cfg := reindexer.DefaultFtFastConfig() - cfg.StopWords = []string{} + cfg.StopWords = make([]interface{}, 0) cfg.Stemmers = []string{} cfg.EnableKbLayout = false cfg.EnableTranslit = false diff --git a/test/join_test.go b/test/join_test.go index 46687225c..08837876c 100644 --- a/test/join_test.go +++ b/test/join_test.go @@ -471,6 +471,7 @@ type expectedExplain struct { Field string FieldType string Method string + Description string Keys int Comparators int Matched int @@ -499,6 +500,13 @@ type expectedExplainJoinOnInjections struct { Conditions []expectedExplainConditionInjection } +type expectedExplainSubQuery struct { + Namespace string + Keys int + Field string + Selectors []expectedExplain +} + func checkExplain(t *testing.T, res []reindexer.ExplainSelector, expected []expectedExplain, fieldName string) { require.Equal(t, len(expected), len(res)) for i := 0; i < len(expected); i++ { @@ -514,6 +522,7 @@ func checkExplain(t *testing.T, res []reindexer.ExplainSelector, expected []expe assert.Equalf(t, expected[i].Matched, res[i].Matched, fieldName+expected[i].Field) assert.Equalf(t, expected[i].Keys, res[i].Keys, fieldName+expected[i].Field) assert.Equalf(t, expected[i].Comparators, res[i].Comparators, fieldName+expected[i].Field) + assert.Equalf(t, expected[i].Description, res[i].Description, fieldName+expected[i].Field) if len(expected[i].Preselect) == 0 { assert.Nil(t, res[i].ExplainPreselect, fieldName+expected[i].Field) } else { @@ -561,6 +570,16 @@ func checkExplainJoinOnInjections(t *testing.T, res []reindexer.ExplainJoinOnInj } } +func checkExplainSubqueries(t *testing.T, res []reindexer.ExplainSubQuery, expected []expectedExplainSubQuery) { + require.Equal(t, len(expected), len(res)) + for i := 0; i < len(expected); i++ { + assert.Equal(t, expected[i].Namespace, res[i].Namespace) + assert.Equal(t, expected[i].Field, res[i].Field) + assert.Equal(t, expected[i].Keys, res[i].Keys) + checkExplain(t, res[i].Explain.Selectors, expected[i].Selectors, "") + } +} + func TestExplainJoin(t *testing.T) { nsMain := "test_explain_main" nsJoined := "test_explain_joined" diff --git a/test/queries_test.go b/test/queries_test.go index eae306113..191c84146 100644 --- a/test/queries_test.go +++ b/test/queries_test.go @@ -241,6 +241,8 @@ func init() { tnamespaces["test_items_eqaul_position"] = TestItemEqualPosition{} tnamespaces["test_items_strict"] = TestItem{} tnamespaces["test_items_strict_joined"] = TestJoinItem{} + + tnamespaces["test_items_explain"] = TestItemSimple{} } func FillTestItemsForNot() { @@ -1119,7 +1121,7 @@ func callQueriesSequence(t *testing.T, namespace string, distinct []string, sort newTestQuery(DB, namespace).Distinct(distinct).Sort(sort, desc).ReqTotal(). WhereQuery(t, newTestQuery(DB, namespace).Where("id", reindexer.EQ, mkID(rand.Int()%5000)), - reindexer.ANY, nil). + reindexer.ANY, nil). ExecAndVerify(t) newTestQuery(DB, namespace).Distinct(distinct).Sort(sort, desc).ReqTotal(). @@ -2173,3 +2175,183 @@ func TestQrIdleTimeout(t *testing.T) { } }) } + +func TestQueryExplain(t *testing.T) { + t.Parallel() + + ns := "test_items_explain" + + tx := newTestTx(DB, ns) + for i := 0; i < 5; i++ { + tx.Upsert(TestItemSimple{ID: i, Year: i, Name: randString()}) + } + tx.MustCommit() + + t.Run("Subquery explain check (WhereQuery)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + WhereQuery(t, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 1), reindexer.GE, 0) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 5, + }, + { + Description: "always true", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 1, + }, + }, + }, + }) + }) + + t.Run("Subquery explain check (Where)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + Where("id", reindexer.EQ, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 3)) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "id", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Field: "id", + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 1, + Comparators: 0, + Matched: 1, + }, + }, + }, + }) + }) + + t.Run("Subquery explain check (Where + WhereQuery)", func(t *testing.T) { + q := DB.Query(ns).Explain(). + Where("id", reindexer.SET, DB.Query(ns).Select("id").Where("year", reindexer.SET, []int{1, 2})). + WhereQuery(t, DB.Query(ns).Select("id").Where("year", reindexer.EQ, 5), reindexer.LE, 10) + it := q.MustExec(t) + defer it.Close() + explainRes, err := it.GetExplainResults() + require.NoError(t, err) + require.NotNil(t, explainRes) + + printExplainRes(explainRes) + checkExplain(t, explainRes.Selectors, []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 1, + }, + { + Description: "always false", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 0, + }, + }, "") + checkExplainSubqueries(t, explainRes.SubQueriesExplains, []expectedExplainSubQuery{ + { + Namespace: ns, + Field: "id", + Selectors: []expectedExplain{ + { + Field: "-scan", + Method: "scan", + Keys: 0, + Comparators: 0, + Matched: 5, + }, + { + Field: "year", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 2, + }, + }, + }, + { + Namespace: ns, + Selectors: []expectedExplain{ + { + Field: "year", + FieldType: "indexed", + Method: "index", + Keys: 0, + Comparators: 0, + Matched: 0, + }, + { + Field: "id", + FieldType: "indexed", + Method: "scan", + Keys: 0, + Comparators: 1, + Matched: 0, + }, + }, + }, + }) + }) +} diff --git a/test/reindexer_bench_test.go b/test/reindexer_bench_test.go index 3d5802948..63e8a174d 100644 --- a/test/reindexer_bench_test.go +++ b/test/reindexer_bench_test.go @@ -409,6 +409,31 @@ func Benchmark2CondQueryTotal(b *testing.B) { } } +func BenchmarkSubQueryEq(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.EQ, DBD.Query("test_join_items").Select("id").WhereInt32("id", reindexer.EQ, prices[rand.Int()%len(prices)])).Limit(20) + q.MustExec().FetchAll() + } +} + +func BenchmarkSubQuerySet(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + rangeMin := prices[rand.Int()%len(prices)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.SET, DBD.Query("test_join_items").Select("id").WhereInt32("id", reindexer.RANGE, rangeMin, rangeMin + 500)).Limit(20) + q.MustExec().FetchAll() + } +} + +func BenchmarkSubQueryAggregate(b *testing.B) { + for i := 0; i < b.N; i++ { + prices := priceIds[rand.Int()%len(priceIds)] + q := DBD.Query("test_items_bench").Where("price_id", reindexer.LT, DBD.Query("test_join_items").AggregateAvg("id").WhereInt32("id", reindexer.SET, prices...).Limit(500)).Limit(20) + q.MustExec().FetchAll() + } +} + func Benchmark2CondQueryLeftJoin(b *testing.B) { ctx := &TestJoinCtx{} for i := 0; i < b.N; i++ { diff --git a/test/select_function_test.go b/test/select_function_test.go index aa8a5caa8..d49c846ff 100644 --- a/test/select_function_test.go +++ b/test/select_function_test.go @@ -1,54 +1,84 @@ package reindexer import ( + "fmt" "testing" "github.com/restream/reindexer/v3" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) type TestSelectTextItem struct { - ID int `reindex:"id,,pk"` - Name string `reindex:"name,text"` + ID int `reindex:"id,,pk"` + Name string `reindex:"name,text"` + _ struct{} `reindex:"id+name=comp_idx,text,composite"` } +const testSelectFuncNs = "test_select_func" + func init() { - tnamespaces["test_select_text_item"] = TestSelectTextItem{} + tnamespaces[testSelectFuncNs] = TestSelectTextItem{} } -func FillTestSelectTextItemsTx(count int, tx *txTest) { - for i := 0; i < count; i++ { - if err := tx.Upsert(&TestSelectTextItem{ +func FillTestSelectTextItems(names []string) { + tx := newTestTx(DB, testSelectFuncNs) + for i := 0; i < len(names); i++ { + item := TestSelectTextItem{ ID: mkID(i), - Name: randLangString(), - }); err != nil { + Name: names[i], + } + if err := tx.Upsert(&item); err != nil { panic(err) } } -} -func FillTestSelectTextItems(count int) { - tx := newTestTx(DB, "test_select_text_item") - FillTestSelectTextItemsTx(count, tx) tx.MustCommit() } -func TestSelectFunction(t *testing.T) { - FillTestSelectTextItems(50) - CheckSelectItemsQueries(t) +func checkSelectFunc(t *testing.T, qt *queryTest, expected string) { + res_slice, err := qt.MustExec(t).FetchAll() + require.NoError(t, err) + require.Len(t, res_slice, 1) + res := res_slice[0].(*TestSelectTextItem) + require.EqualValues(t, expected, res.Name) } -func CheckSelectItemsQueries(t *testing.T) { +func TestSelectFunctions(t *testing.T) { + t.Parallel() + + const ns = testSelectFuncNs + words := []string{"some wordrx", "w(here rx fin)d", "somerxhere"} + FillTestSelectTextItems(words) - first := randLangString() + delimiters := []string{".", "=", " = "} - q1 := DB.Query("test_select_text_item").Where("name", reindexer.EQ, first).Functions("name.snippet(,,3,3)") + t.Run("check select_function highlight", func(t *testing.T) { + for _, delim := range delimiters { + q := DB.Query(ns).Where("name", reindexer.EQ, "rx"). + Functions(fmt.Sprintf("name%shighlight(<,>)", delim)) + checkSelectFunc(t, q, "w(here fin)d") + } + }) - res, _, err := q1.MustExec(t).FetchAllWithRank() - assert.NoError(t, err) + t.Run("check select_function snippet", func(t *testing.T) { + for _, delim := range delimiters { + q := DB.Query(ns).Where("name", reindexer.EQ, "rx"). + Functions(fmt.Sprintf("name%ssnippet(<,>,2,3,'!','#')", delim)) + checkSelectFunc(t, q, "!e fi#") + } + }) - for _, item := range res { - _, ok := item.(*TestSelectTextItem) - assert.True(t, ok, "Unknown type after merge ") - } + t.Run("check select_function snippet_n", func(t *testing.T) { + for _, delim := range delimiters { + q := DB.Query(ns).Where("name", reindexer.EQ, "rx"). + Functions(fmt.Sprintf("name%ssnippet_n('<','>',10,2,pre_delim='[',post_delim=']',left_bound='(',right_bound=')',with_area=1)", delim)) + checkSelectFunc(t, q, "[[2,11]here f]") + } + }) + t.Run("check can't select_function snippet with composite nonstring idx field", func(t *testing.T) { + q := DB.Query(ns).Where("comp_idx", reindexer.EQ, "rx").Functions("comp_idx=snippet(<,>,3,3,'!','!')") + result, err := q.Exec(t).FetchAll() + require.ErrorContains(t, err, "Unable to apply snippet function to the non-string field 'id'") + require.Nil(t, result) + }) }