From d0afc09496603123343c7e3c2c1ff170fe04bdd5 Mon Sep 17 00:00:00 2001 From: Alexandr Shelepin <57798122+graveart@users.noreply.github.com> Date: Fri, 13 Oct 2023 12:34:22 +0300 Subject: [PATCH] Update to version v4.12.0 --- bindings/builtin/builtin.go | 21 +- bindings/builtinserver/config/config.go | 16 +- bindings/consts.go | 2 +- bindings/cproto/cproto.go | 30 +- bindings/cproto/cproto_test.go | 2 +- changelog.md | 44 +- cjson/creflect.go | 50 +- cjson/decoder.go | 6 +- cjson/encoder.go | 418 +++- cpp_src/CMakeLists.txt | 20 +- cpp_src/client/coroqueryresults.cc | 35 + cpp_src/client/coroqueryresults.h | 3 + cpp_src/client/cororeindexer.cc | 9 +- cpp_src/client/cororeindexer.h | 8 + cpp_src/client/itemimplbase.cc | 8 +- cpp_src/client/reindexer.cc | 16 + cpp_src/client/reindexer.h | 13 + cpp_src/client/reindexerimpl.cc | 58 + cpp_src/client/reindexerimpl.h | 13 +- cpp_src/client/resultserializer.cc | 7 +- cpp_src/client/rpcclient.cc | 17 +- cpp_src/client/rpcclient.h | 5 + cpp_src/cluster/config.cc | 8 +- .../replication/clusterdatareplicator.cc | 2 +- .../cluster/replication/replicationthread.cc | 135 +- .../cluster/replication/replicationthread.h | 1 + .../cluster/sharding/locatorserviceadapter.cc | 11 + .../cluster/sharding/locatorserviceadapter.h | 41 + cpp_src/cluster/sharding/sharding.cc | 29 +- cpp_src/cluster/sharding/sharding.h | 12 +- .../sharding/shardingcontrolrequest.cc | 66 + .../cluster/sharding/shardingcontrolrequest.h | 89 + cpp_src/cluster/updaterecord.cc | 143 ++ cpp_src/cluster/updaterecord.h | 41 +- .../reindexer_server/contrib/Dockerfile.deb | 2 +- cpp_src/cmd/reindexer_server/main.cc | 9 + .../cmd/reindexer_tool/commandsexecutor.cc | 11 +- cpp_src/cmd/reindexer_tool/reindexer_tool.cc | 20 +- cpp_src/core/activity_context.cc | 40 +- cpp_src/core/activity_context.h | 22 +- cpp_src/core/cbinding/reindexer_c.cc | 4 +- cpp_src/core/cjson/baseencoder.cc | 38 +- cpp_src/core/cjson/baseencoder.h | 7 +- cpp_src/core/cjson/cjsonbuilder.cc | 38 +- cpp_src/core/cjson/cjsonbuilder.h | 22 +- cpp_src/core/cjson/cjsondecoder.cc | 17 +- cpp_src/core/cjson/cjsondecoder.h | 11 +- cpp_src/core/cjson/cjsonmodifier.cc | 242 +- cpp_src/core/cjson/cjsonmodifier.h | 12 +- cpp_src/core/cjson/cjsontools.cc | 13 +- cpp_src/core/cjson/cjsontools.h | 15 +- cpp_src/core/cjson/csvbuilder.cc | 211 ++ cpp_src/core/cjson/csvbuilder.h | 127 ++ cpp_src/core/cjson/fieldextractor.h | 157 +- cpp_src/core/cjson/jschemachecker.cc | 6 +- cpp_src/core/cjson/jsonbuilder.cc | 19 +- cpp_src/core/cjson/jsonbuilder.h | 24 +- cpp_src/core/cjson/jsondecoder.cc | 51 +- cpp_src/core/cjson/jsondecoder.h | 5 +- cpp_src/core/cjson/msgpackbuilder.cc | 12 +- cpp_src/core/cjson/msgpackbuilder.h | 16 +- cpp_src/core/cjson/msgpackdecoder.cc | 45 +- cpp_src/core/cjson/msgpackdecoder.h | 5 +- cpp_src/core/cjson/objtype.h | 4 + cpp_src/core/cjson/protobufbuilder.cc | 2 +- cpp_src/core/cjson/protobufbuilder.h | 12 +- cpp_src/core/cjson/protobufdecoder.cc | 27 +- cpp_src/core/cjson/protobufdecoder.h | 6 +- cpp_src/core/cjson/protobufschemabuilder.cc | 2 +- cpp_src/core/cjson/tagsmatcher.h | 12 +- cpp_src/core/cjson/tagsmatcherimpl.h | 8 +- cpp_src/core/cjson/uuid_recoders.h | 4 +- cpp_src/core/clusterproxy.cc | 56 + cpp_src/core/clusterproxy.h | 21 +- cpp_src/core/comparator.cc | 37 +- cpp_src/core/comparatorimpl.h | 37 +- cpp_src/core/compositearraycomparator.cc | 15 +- cpp_src/core/dbconfig.cc | 63 +- cpp_src/core/dbconfig.h | 64 +- cpp_src/core/defnsconfigs.h | 9 +- cpp_src/core/expressiontree.h | 2 +- cpp_src/core/expressiontree.md | 31 +- cpp_src/core/ft/config/baseftconfig.cc | 32 +- cpp_src/core/ft/config/baseftconfig.h | 22 + cpp_src/core/ft/config/ftfastconfig.cc | 16 +- cpp_src/core/ft/filters/itokenfilter.h | 6 +- cpp_src/core/ft/filters/kblayout.cc | 4 +- cpp_src/core/ft/filters/kblayout.h | 2 +- cpp_src/core/ft/filters/synonyms.cc | 25 +- cpp_src/core/ft/filters/synonyms.h | 8 +- cpp_src/core/ft/filters/translit.cc | 4 +- cpp_src/core/ft/filters/translit.h | 2 +- cpp_src/core/ft/ft_fast/dataholder.cc | 24 +- cpp_src/core/ft/ft_fast/dataholder.h | 8 +- cpp_src/core/ft/ft_fast/dataprocessor.cc | 126 +- cpp_src/core/ft/ft_fast/dataprocessor.h | 3 +- cpp_src/core/ft/ft_fast/indextexttypes.h | 2 + cpp_src/core/ft/ft_fast/selecter.cc | 130 +- cpp_src/core/ft/ft_fast/selecter.h | 8 +- cpp_src/core/ft/ft_fuzzy/baseseacher.cc | 4 +- .../core/ft/ft_fuzzy/prefilter/prefilter.h | 1 + cpp_src/core/ft/ftdsl.cc | 16 +- cpp_src/core/ft/ftsetcashe.h | 9 +- cpp_src/core/ft/idrelset.h | 18 +- cpp_src/core/ft/numtotext.cc | 60 +- cpp_src/core/idsetcache.h | 13 +- cpp_src/core/index/ft_preselect.h | 3 +- cpp_src/core/index/index.cc | 4 +- cpp_src/core/index/index.h | 4 +- cpp_src/core/index/indexiterator.h | 8 +- cpp_src/core/index/indexordered.cc | 5 +- cpp_src/core/index/indexstore.cc | 41 +- cpp_src/core/index/indexstore.h | 1 + cpp_src/core/index/indextext/fastindextext.cc | 133 +- cpp_src/core/index/indextext/fastindextext.h | 17 +- cpp_src/core/index/indextext/fieldsgetter.h | 15 +- .../core/index/indextext/fuzzyindextext.cc | 20 +- cpp_src/core/index/indextext/fuzzyindextext.h | 8 +- cpp_src/core/index/indextext/indextext.cc | 74 +- cpp_src/core/index/indextext/indextext.h | 22 +- cpp_src/core/index/indexunordered.cc | 9 +- cpp_src/core/index/indexunordered.h | 2 +- cpp_src/core/index/payload_map.h | 18 +- cpp_src/core/index/uuid_index.h | 1 - cpp_src/core/indexdef.cc | 22 +- cpp_src/core/item.cc | 37 +- cpp_src/core/item.h | 11 +- cpp_src/core/itemimpl.cc | 20 +- cpp_src/core/itemmodifier.cc | 239 +- cpp_src/core/itemmodifier.h | 9 +- cpp_src/core/joincache.h | 17 +- cpp_src/core/keyvalue/geometry.cc | 9 + cpp_src/core/keyvalue/geometry.h | 71 +- cpp_src/core/keyvalue/uuid.cc | 276 ++- cpp_src/core/keyvalue/variant.cc | 38 +- cpp_src/core/keyvalue/variant.h | 21 +- cpp_src/core/lrucache.cc | 42 +- cpp_src/core/lrucache.h | 27 +- cpp_src/core/namespace/asyncstorage.cc | 10 +- cpp_src/core/namespace/asyncstorage.h | 81 +- cpp_src/core/namespace/bgnamespacedeleter.h | 32 + cpp_src/core/namespace/namespace.cc | 27 +- cpp_src/core/namespace/namespace.h | 66 +- cpp_src/core/namespace/namespaceimpl.cc | 269 ++- cpp_src/core/namespace/namespaceimpl.h | 84 +- .../namespace/snapshot/snapshothandler.cc | 4 +- cpp_src/core/nsselecter/aggregator.cc | 4 +- cpp_src/core/nsselecter/aggregator.h | 8 +- cpp_src/core/nsselecter/btreeindexiterator.h | 10 +- .../core/nsselecter/btreeindexiteratorimpl.h | 90 +- cpp_src/core/nsselecter/explaincalc.cc | 230 +- cpp_src/core/nsselecter/explaincalc.h | 98 +- cpp_src/core/nsselecter/fieldscomparator.h | 9 +- cpp_src/core/nsselecter/joinedselector.cc | 4 +- cpp_src/core/nsselecter/joinedselector.h | 14 +- cpp_src/core/nsselecter/nsselecter.cc | 662 ++++-- cpp_src/core/nsselecter/nsselecter.h | 19 +- cpp_src/core/nsselecter/qresexplainholder.h | 49 + cpp_src/core/nsselecter/querypreprocessor.cc | 459 +++- cpp_src/core/nsselecter/querypreprocessor.h | 41 +- cpp_src/core/nsselecter/selectiterator.cc | 44 +- cpp_src/core/nsselecter/selectiterator.h | 36 +- .../nsselecter/selectiteratorcontainer.cc | 67 +- .../core/nsselecter/selectiteratorcontainer.h | 2 +- cpp_src/core/nsselecter/substitutionhelpers.h | 77 +- cpp_src/core/parallelexecutor.h | 10 +- cpp_src/core/payload/fieldsset.cc | 8 +- cpp_src/core/payload/fieldsset.h | 86 +- cpp_src/core/payload/payloadfieldvalue.cc | 78 +- cpp_src/core/payload/payloadfieldvalue.h | 84 +- cpp_src/core/payload/payloadiface.cc | 67 +- cpp_src/core/payload/payloadiface.h | 42 +- cpp_src/core/proxycallback.h | 28 + cpp_src/core/query/dsl/dslencoder.cc | 5 +- cpp_src/core/query/dsl/dslencoder.h | 3 +- cpp_src/core/query/dsl/dslparser.cc | 169 +- cpp_src/core/query/expressionevaluator.cc | 307 ++- cpp_src/core/query/expressionevaluator.h | 24 +- cpp_src/core/query/query.cc | 41 +- cpp_src/core/query/query.h | 91 +- cpp_src/core/query/queryentry.cc | 68 +- cpp_src/core/query/queryentry.h | 61 +- cpp_src/core/query/sql/sqlencoder.cc | 34 +- cpp_src/core/query/sql/sqlencoder.h | 5 - cpp_src/core/query/sql/sqlparser.cc | 50 +- cpp_src/core/query/sql/sqlparser.h | 2 +- cpp_src/core/query/sql/sqlsuggester.cc | 3 +- cpp_src/core/querycache.h | 23 +- .../core/queryresults/additionaldatasource.h | 10 + cpp_src/core/queryresults/aggregationresult.h | 19 +- .../core/queryresults/localqueryresults.cc | 123 +- cpp_src/core/queryresults/localqueryresults.h | 31 +- cpp_src/core/queryresults/queryresults.cc | 16 +- cpp_src/core/queryresults/queryresults.h | 4 + cpp_src/core/querystat.h | 52 +- cpp_src/core/rdxcontext.cc | 15 +- cpp_src/core/rdxcontext.h | 66 +- cpp_src/core/reindexer.cc | 12 +- cpp_src/core/reindexer.h | 56 +- cpp_src/core/reindexerimpl.cc | 630 ++++-- cpp_src/core/reindexerimpl.h | 109 +- cpp_src/core/schema.cc | 38 +- cpp_src/core/schema.h | 3 + cpp_src/core/selectfunc/ctx/ftctx.cc | 3 - cpp_src/core/selectfunc/ctx/ftctx.h | 10 +- cpp_src/core/selectfunc/functionexecutor.cc | 8 +- cpp_src/core/selectfunc/functionexecutor.h | 2 +- .../core/selectfunc/functions/highlight.cc | 2 +- cpp_src/core/selectfunc/functions/snippet.cc | 2 +- cpp_src/core/selectfunc/selectfuncparser.cc | 29 +- cpp_src/core/selectfunc/selectfuncparser.h | 11 +- cpp_src/core/selectkeyresult.h | 20 +- cpp_src/core/shardingproxy.cc | 811 +++++-- cpp_src/core/shardingproxy.h | 228 +- cpp_src/core/sorting/sortexpression.cc | 13 +- cpp_src/core/transaction/transaction.cc | 2 +- cpp_src/core/transaction/transaction.h | 4 +- cpp_src/core/transaction/transactionimpl.cc | 10 +- cpp_src/core/transaction/transactionimpl.h | 9 +- cpp_src/core/type_consts.h | 1 + cpp_src/core/type_consts_helpers.cc | 32 +- cpp_src/core/type_consts_helpers.h | 32 +- cpp_src/debug/allocdebug.h | 2 +- cpp_src/debug/backtrace.cc | 4 +- cpp_src/debug/backtrace.h | 2 +- cpp_src/debug/terminate_handler.cpp | 2 +- cpp_src/estl/contexted_locks.h | 2 +- cpp_src/estl/debug_macros.h | 19 +- cpp_src/estl/defines.h | 35 + cpp_src/estl/h_vector.h | 39 +- cpp_src/estl/intrusive_ptr.h | 39 +- cpp_src/estl/multihash_map.h | 15 +- cpp_src/estl/mutex.cc | 16 - cpp_src/estl/mutex.h | 24 +- cpp_src/estl/shared_mutex.h | 31 +- cpp_src/estl/smart_lock.h | 6 +- cpp_src/estl/tokenizer.cc | 51 +- cpp_src/estl/tokenizer.h | 68 +- .../gtests/bench/fixtures/api_tv_simple.cc | 10 +- cpp_src/gtests/bench/fixtures/api_tv_simple.h | 6 +- .../fixtures/api_tv_simple_comparators.cc | 511 +++++ .../fixtures/api_tv_simple_comparators.h | 71 + cpp_src/gtests/bench/fixtures/geometry.cc | 2 +- cpp_src/gtests/bench/reindexer_bench.cc | 9 + cpp_src/gtests/tests/API/base_tests.cc | 146 +- .../fixtures/fuzzing/random_generator.cc | 21 + .../tests/fixtures/fuzzing/random_generator.h | 2 + .../gtests/tests/fixtures/join_selects_api.h | 16 + .../gtests/tests/fixtures/json_parsing_test.h | 9 - .../tests/fixtures/msgpack_cproto_api.h | 2 +- cpp_src/gtests/tests/fixtures/ns_api.h | 63 + cpp_src/gtests/tests/fixtures/queries_api.cc | 444 ++++ cpp_src/gtests/tests/fixtures/queries_api.h | 298 ++- .../gtests/tests/fixtures/queries_verifier.h | 292 ++- .../gtests/tests/fixtures/reindexertestapi.h | 39 +- .../tests/fixtures/replication_load_api.h | 8 +- .../tests/fixtures/selector_plan_test.h | 2 + cpp_src/gtests/tests/fixtures/servercontrol.h | 3 + cpp_src/gtests/tests/fixtures/sharding_api.h | 14 +- cpp_src/gtests/tests/fixtures/ttl_index_api.h | 7 +- cpp_src/gtests/tests/fuzzing/fuzzing.cc | 12 +- .../gtests/tests/unit/composite_indexes_api.h | 10 +- .../tests/unit/composite_indexes_test.cc | 78 +- .../gtests/tests/unit/csv2jsonconverter.cc | 84 + cpp_src/gtests/tests/unit/csv2jsonconverter.h | 11 + cpp_src/gtests/tests/unit/ft/ft_generic.cc | 332 ++- cpp_src/gtests/tests/unit/grpcclient_test.cc | 42 +- cpp_src/gtests/tests/unit/join_test.cc | 37 +- .../gtests/tests/unit/json_parsing_test.cc | 22 +- .../gtests/tests/unit/msgpack_cproto_tests.cc | 6 +- cpp_src/gtests/tests/unit/namespace_test.cc | 470 +++- cpp_src/gtests/tests/unit/queries_test.cc | 245 ++- .../unit/query_aggregate_strict_mode_test.h | 2 +- cpp_src/gtests/tests/unit/rtree_test.cc | 6 +- .../gtests/tests/unit/selector_plan_test.cc | 85 +- .../gtests/tests/unit/sharding_base_test.cc | 781 ++++++- .../gtests/tests/unit/sharding_extras_test.cc | 4 +- cpp_src/gtests/tests/unit/sort_expr_test.cc | 8 +- .../gtests/tests/unit/string_function_test.cc | 33 +- .../gtests/tests/unit/transactions_tests.cc | 4 +- cpp_src/gtests/tests/unit/ttl_index_api.cc | 8 +- .../gtests/tests/unit/value_by_json_path.cc | 47 +- cpp_src/net/connectinstatscollector.cc | 2 - cpp_src/net/connectinstatscollector.h | 4 +- cpp_src/net/connection.cc | 14 +- cpp_src/net/cproto/cproto.cc | 6 +- cpp_src/net/cproto/cproto.h | 4 +- cpp_src/net/cproto/dispatcher.h | 12 +- cpp_src/net/cproto/serverconnection.h | 6 +- cpp_src/net/ev/ev.h | 6 +- cpp_src/net/http/router.cc | 20 +- cpp_src/net/http/router.h | 29 +- cpp_src/net/http/serverconnection.cc | 10 +- cpp_src/net/http/serverconnection.h | 2 +- cpp_src/net/socket.cc | 20 + cpp_src/net/socket.h | 5 +- cpp_src/readme.md | 4 +- cpp_src/server/CMakeLists.txt | 2 +- cpp_src/server/config.cc | 8 +- cpp_src/server/contrib/server.md | 56 +- cpp_src/server/contrib/server.yml | 142 +- cpp_src/server/dbmanager.cc | 11 +- cpp_src/server/dbmanager.h | 14 +- cpp_src/server/httpserver.cc | 103 +- cpp_src/server/httpserver.h | 8 +- cpp_src/server/pprof/pprof.cc | 2 +- cpp_src/server/rpcserver.cc | 84 +- cpp_src/server/rpcserver.h | 2 + cpp_src/server/server.cc | 8 +- cpp_src/server/server.h | 8 +- cpp_src/server/serverimpl.cc | 26 +- cpp_src/server/serverimpl.h | 10 +- cpp_src/server/statscollect/istatswatcher.h | 26 + cpp_src/server/statscollect/statscollector.cc | 102 +- cpp_src/server/statscollect/statscollector.h | 24 +- cpp_src/tools/assertrx.h | 7 +- cpp_src/tools/compiletimemap.h | 45 + cpp_src/tools/cpucheck.cc | 79 + cpp_src/tools/cpucheck.h | 8 + cpp_src/tools/customhash.cc | 40 +- cpp_src/tools/customhash.h | 28 +- cpp_src/tools/customlocal.cc | 358 +-- cpp_src/tools/customlocal.h | 9 +- cpp_src/tools/fsops.h | 4 +- cpp_src/tools/json2kv.cc | 8 +- cpp_src/tools/json2kv.h | 2 +- cpp_src/tools/jsontools.cc | 18 +- cpp_src/tools/jsontools.h | 20 +- cpp_src/tools/logger.cc | 61 +- cpp_src/tools/logger.h | 4 +- cpp_src/tools/logginglongqueries.cc | 169 +- cpp_src/tools/logginglongqueries.h | 142 +- cpp_src/tools/md5crypt.cc | 6 +- cpp_src/tools/random.cc | 4 +- cpp_src/tools/randomgenerator.h | 33 +- cpp_src/tools/serializer.cc | 5 +- cpp_src/tools/serializer.h | 4 +- cpp_src/tools/stringstools.cc | 199 +- cpp_src/tools/stringstools.h | 86 +- cpp_src/tools/timetools.cc | 16 +- cpp_src/vendor/cpp-btree/btree_test.h | 6 +- cpp_src/vendor/gason/gason.cc | 20 +- cpp_src/vendor/gason/gason.h | 5 +- .../vendor/picohttpparser/picohttpparser.c | 8 +- cpp_src/vendor/sort/pdqsort.hpp | 1 + cpp_src/vendor/sparse-map/sparse_hash.h | 3 +- cpp_src/wal/walrecord.cc | 4 +- cpp_src/wal/walrecord.h | 8 +- dependencies.sh | 62 +- describer.go | 2 +- dsl/dsl.go | 218 +- ftfastconfig.go | 44 + ftfuzzyconfig.go | 4 + fulltext.md | 85 +- iterator.go | 46 + query.go | 40 +- readme.md | 74 +- reflect.go | 2 +- reindexer.go | 5 +- reindexer_impl.go | 173 +- samples/cpp/builtin_sample.cc | 35 +- samples/cpp/cproto_sample.cc | 37 +- sharding.md | 192 ++ test/composite_indexes_test.go | 714 +++--- test/dsl_test.go | 1944 +++++++++++++++++ test/eq_and_set_test.go | 63 +- test/helpers/server.go | 20 +- test/huge_items_test.go | 192 ++ test/join_test.go | 114 + test/queries_test.go | 100 +- test/query_test.go | 20 +- test/reindexer_test.go | 7 +- test/tx_test.go | 47 + test/uuid_test.go | 198 +- tx.go | 53 +- 375 files changed, 17199 insertions(+), 5096 deletions(-) create mode 100644 cpp_src/cluster/sharding/locatorserviceadapter.cc create mode 100644 cpp_src/cluster/sharding/locatorserviceadapter.h create mode 100644 cpp_src/cluster/sharding/shardingcontrolrequest.cc create mode 100644 cpp_src/cluster/sharding/shardingcontrolrequest.h create mode 100644 cpp_src/core/cjson/csvbuilder.cc create mode 100644 cpp_src/core/cjson/csvbuilder.h create mode 100644 cpp_src/core/keyvalue/geometry.cc create mode 100644 cpp_src/core/namespace/bgnamespacedeleter.h create mode 100644 cpp_src/core/nsselecter/qresexplainholder.h create mode 100644 cpp_src/core/proxycallback.h create mode 100644 cpp_src/estl/defines.h delete mode 100644 cpp_src/estl/mutex.cc create mode 100644 cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.cc create mode 100644 cpp_src/gtests/bench/fixtures/api_tv_simple_comparators.h delete mode 100644 cpp_src/gtests/tests/fixtures/json_parsing_test.h create mode 100644 cpp_src/gtests/tests/fixtures/queries_api.cc create mode 100644 cpp_src/gtests/tests/unit/csv2jsonconverter.cc create mode 100644 cpp_src/gtests/tests/unit/csv2jsonconverter.h create mode 100644 cpp_src/tools/compiletimemap.h create mode 100644 cpp_src/tools/cpucheck.cc create mode 100644 cpp_src/tools/cpucheck.h create mode 100644 test/dsl_test.go diff --git a/bindings/builtin/builtin.go b/bindings/builtin/builtin.go index 8034347f0..40b683af5 100644 --- a/bindings/builtin/builtin.go +++ b/bindings/builtin/builtin.go @@ -31,8 +31,13 @@ type Logger interface { Printf(level int, fmt string, msg ...interface{}) } -var logger Logger +// Separate mutexes for logger object itself and for reindexer_enable_logger call: +// logMtx provides safe access to the logger +// logEnableMtx provides atomic logic for (enable + set) and (disable + reset) procedures var logMtx sync.RWMutex +var logEnableMtx sync.Mutex +var logger Logger + var enableDebug bool var bufPool sync.Pool @@ -605,18 +610,24 @@ func CGoLogger(level int, msg string) { } } -func (binding *Builtin) EnableLogger(log bindings.Logger) { +func (binding *Builtin) setLogger(log bindings.Logger) { logMtx.Lock() defer logMtx.Unlock() logger = log +} + +func (binding *Builtin) EnableLogger(log bindings.Logger) { + logEnableMtx.Lock() + defer logEnableMtx.Unlock() + binding.setLogger(log) C.reindexer_enable_go_logger() } func (binding *Builtin) DisableLogger() { - logMtx.Lock() - defer logMtx.Unlock() + logEnableMtx.Lock() + defer logEnableMtx.Unlock() C.reindexer_disable_go_logger() - logger = nil + binding.setLogger(nil) } func (binding *Builtin) ReopenLogFiles() error { diff --git a/bindings/builtinserver/config/config.go b/bindings/builtinserver/config/config.go index a9613ed2d..538217a16 100644 --- a/bindings/builtinserver/config/config.go +++ b/bindings/builtinserver/config/config.go @@ -14,11 +14,10 @@ type StorageConf struct { } type NetConf struct { - HTTPAddr string `yaml:"httpaddr"` - RPCAddr string `yaml:"rpcaddr"` - WebRoot string `yaml:"webroot"` - Security bool `yaml:"security"` - RAFTCluster bool `yaml:"enable_cluster"` + HTTPAddr string `yaml:"httpaddr"` + RPCAddr string `yaml:"rpcaddr"` + WebRoot string `yaml:"webroot"` + Security bool `yaml:"security"` } type LoggerConf struct { @@ -71,10 +70,9 @@ func DefaultServerConfig() *ServerConfig { Autorepair: false, }, Net: NetConf{ - HTTPAddr: "0.0.0.0:9088", - RPCAddr: "0.0.0.0:6534", - Security: false, - RAFTCluster: false, + HTTPAddr: "0.0.0.0:9088", + RPCAddr: "0.0.0.0:6534", + Security: false, }, Logger: LoggerConf{ ServerLog: "stdout", diff --git a/bindings/consts.go b/bindings/consts.go index c87a2e441..8802e6c81 100644 --- a/bindings/consts.go +++ b/bindings/consts.go @@ -2,7 +2,7 @@ package bindings const CInt32Max = int(^uint32(0) >> 1) -const ReindexerVersion = "v4.11.0" +const ReindexerVersion = "v4.12.0" // public go consts from type_consts.h and reindexer_ctypes.h const ( diff --git a/bindings/cproto/cproto.go b/bindings/cproto/cproto.go index 10db52eca..5c66ac399 100644 --- a/bindings/cproto/cproto.go +++ b/bindings/cproto/cproto.go @@ -19,8 +19,9 @@ import ( const ( defConnPoolSize = 8 - defConnPoolLBAlgorithm = bindings.LBRoundRobin - pingerTimeoutSec = 60 + defConnPoolLBAlgorithm = bindings.LBPowerOfTwoChoices + pingerTimeoutSec = uint32(60) + pingResponseTimeoutSec = uint32(20) defAppName = "Go-connector" opRd = 0 @@ -841,7 +842,11 @@ func (binding *NetCProto) rpcCallNoResults(ctx context.Context, op int, cmd int, func (binding *NetCProto) pinger() { timeout := time.Second ticker := time.NewTicker(timeout) - var ticksCount uint16 + var ticksCount uint32 + pingTimeoutSec := pingResponseTimeoutSec + if uint32(binding.timeouts.RequestTimeout/time.Second) > pingTimeoutSec { + pingTimeoutSec = uint32(binding.timeouts.RequestTimeout / time.Second) + } for now := range ticker.C { ticksCount++ select { @@ -852,15 +857,28 @@ func (binding *NetCProto) pinger() { if ticksCount == pingerTimeoutSec { ticksCount = 0 conns := binding.getAllConns() + var wg sync.WaitGroup + cmpl := func(buf bindings.RawBuffer, err error) { + wg.Done() + if buf != nil { + buf.Free() + } + } for _, conn := range conns { if conn.hasError() { continue } - if conn.lastReadTime().Add(timeout).Before(now) { - buf, _ := conn.rpcCall(context.TODO(), cmdPing, uint32(binding.timeouts.RequestTimeout/time.Second)) - buf.Free() + if !conn.lastReadTime().Add(timeout).Before(now) { + continue + } + seqs := conn.getSeqs() + if cap(seqs)-len(seqs) > 0 { + continue } + wg.Add(1) + conn.rpcCallAsync(context.TODO(), cmdPing, pingTimeoutSec, cmpl) } + wg.Wait() } } } diff --git a/bindings/cproto/cproto_test.go b/bindings/cproto/cproto_test.go index 1bba3aa4c..357b35cf4 100644 --- a/bindings/cproto/cproto_test.go +++ b/bindings/cproto/cproto_test.go @@ -77,7 +77,7 @@ func TestCprotoPool(t *testing.T) { u, err := url.Parse(dsn) require.NoError(t, err) c := new(NetCProto) - err = c.Init([]url.URL{*u}) + err = c.Init([]url.URL{*u}, reindexer.WithConnPoolLoadBalancing(bindings.LBRoundRobin)) require.NoError(t, err) conns := make(map[connection]bool) diff --git a/changelog.md b/changelog.md index 039878381..1047b8f73 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,45 @@ +# Version 4.12.0 *beta* (13.10.2023) +## Sharding +- [fea] Added [commands](sharding.md#runtime-sharding-configuration) for the runtime sharding configuration (on the empty namespaces) + +## Go connector +- [fix] Fixed potential deadlock in builtin binding, when `SetLogger` method is called multiple + +## Ported +- [fea/fix] Ported all the fixes from [v3.17.0](https://github.com/Restream/reindexer/releases/tag/v3.17.0), [v3.18.0](https://github.com/Restream/reindexer/releases/tag/v3.18.0) and [v3.19.0](https://github.com/Restream/reindexer/releases/tag/v3.19.0) + +## Face +- [fea] Improved the drop-down section behavior on the Query builder page +- [fea] Added a link to the online documentation +- [fea] Added new proc settings to the Index config +- [fea] Changed the scale window icon for textareas +- [fea] Added the background color to the Close icon in the search history on the Namespace page +- [fea] Improved the buttons' behavior on the Query builder page +- [fea] Added the database name size limit +- [fea] Added the ability to use spaces for JSON paths +- [fea] Changed the numeric values position in the Grid +- [fix] Fixed the columns' settings resetting after the Perfstats page reloading +- [fix] Removed the double requests on the Perfstats page +- [fix] Fixed the JSON Paths tooltip description +- [fix] Fixed the pie chart position in Safari +- [fix] Fixed the popup window size for the long text +- [fix] Fixed the bottom padding on the statistics legend window +- [fix] Fixed the modal window to inform about disabled memory statistics +- [fix] Fixed the filter removal +- [fix] Fixed the filter result page when the filter is removed +- [fix] Fixed the redirect to the wrong page after all items were removed +- [fix] Fixed the Statistics chart for undefined replication.wal_size +- [fix] Fixed the column set for the namespace items during the namespace switching +- [fix] Fixed the JSON paths view for values included spaces +- [fix] Changed the value format for width on the integer for sqlquery +- [fix] Fixed the bug related to the query history on the Namespace Items list +- [fix] Fixed the column titles in the table settings menu on the Performance page +- [fix] Added the validation of the negative values for the index settings +- [fix] Fixed the SQL query result table +- [fix] Fixed the aggrigation panel +- [fix] Fixed the items sorting +- [fix] Fixed the last column settings + # Version 4.11.0 *beta* (09.06.2023) ## Server - [fix] Fixed HTTP-transactions timeout handling @@ -1669,5 +1711,3 @@ Storages for v3 and v4 are compatible in both ways. - [ref] EnableStorage method was deprecated - [fix] Query builder did not reset opOR after InnerJoin -## Misc - diff --git a/cjson/creflect.go b/cjson/creflect.go index c703e9ddd..aa047769a 100644 --- a/cjson/creflect.go +++ b/cjson/creflect.go @@ -3,7 +3,6 @@ package cjson import ( "fmt" "reflect" - "strings" "unsafe" "github.com/restream/reindexer/v4/bindings" @@ -114,17 +113,44 @@ func (pl *payloadIface) ptr(field, idx, typ int) unsafe.Pointer { const hexChars = "0123456789abcdef" func createUuid(v [2]uint64) string { - var b strings.Builder - b.Grow(36) - for i, j := 0, 0; i < 36; i++ { - switch i { - case 8, 13, 18, 23: b.WriteByte('-') - default: - b.WriteByte(hexChars[(v[j / 16] >> ((15 - j % 16) * 4)) & 0xF]) - j++ - } - } - return b.String() + buf := make([]byte, 36) + buf[0] = hexChars[(v[0] >> 60) & 0xF]; + buf[1] = hexChars[(v[0] >> 56) & 0xF]; + buf[2] = hexChars[(v[0] >> 52) & 0xF]; + buf[3] = hexChars[(v[0] >> 48) & 0xF]; + buf[4] = hexChars[(v[0] >> 44) & 0xF]; + buf[5] = hexChars[(v[0] >> 40) & 0xF]; + buf[6] = hexChars[(v[0] >> 36) & 0xF]; + buf[7] = hexChars[(v[0] >> 32) & 0xF]; + buf[8] = '-'; + buf[9] = hexChars[(v[0] >> 28) & 0xF]; + buf[10] = hexChars[(v[0] >> 24) & 0xF]; + buf[11] = hexChars[(v[0] >> 20) & 0xF]; + buf[12] = hexChars[(v[0] >> 16) & 0xF]; + buf[13] = '-'; + buf[14] = hexChars[(v[0] >> 12) & 0xF]; + buf[15] = hexChars[(v[0] >> 8) & 0xF]; + buf[16] = hexChars[(v[0] >> 4) & 0xF]; + buf[17] = hexChars[v[0] & 0xF]; + buf[18] = '-'; + buf[19] = hexChars[(v[1] >> 60) & 0xF]; + buf[20] = hexChars[(v[1] >> 56) & 0xF]; + buf[21] = hexChars[(v[1] >> 52) & 0xF]; + buf[22] = hexChars[(v[1] >> 48) & 0xF]; + buf[23] = '-'; + buf[24] = hexChars[(v[1] >> 44) & 0xF]; + buf[25] = hexChars[(v[1] >> 40) & 0xF]; + buf[26] = hexChars[(v[1] >> 36) & 0xF]; + buf[27] = hexChars[(v[1] >> 32) & 0xF]; + buf[28] = hexChars[(v[1] >> 28) & 0xF]; + buf[29] = hexChars[(v[1] >> 24) & 0xF]; + buf[30] = hexChars[(v[1] >> 20) & 0xF]; + buf[31] = hexChars[(v[1] >> 16) & 0xF]; + buf[32] = hexChars[(v[1] >> 12) & 0xF]; + buf[33] = hexChars[(v[1] >> 8) & 0xF]; + buf[34] = hexChars[(v[1] >> 4) & 0xF]; + buf[35] = hexChars[v[1] & 0xF]; + return string(buf) } func (pl *payloadIface) getInt(field, idx int) int { diff --git a/cjson/decoder.go b/cjson/decoder.go index 7c823549f..dc9667ced 100644 --- a/cjson/decoder.go +++ b/cjson/decoder.go @@ -26,6 +26,8 @@ type Decoder struct { logger Logger } +const MaxIndexes = 256 + func fieldByTag(t reflect.Type, tag string) (result reflect.StructField, ok bool) { if t.Kind() == reflect.Ptr { t = t.Elem() @@ -674,7 +676,7 @@ func (dec *Decoder) DecodeCPtr(cptr uintptr, dest interface{}) (err error) { } }() - fieldsoutcnt := make([]int, 64, 64) + fieldsoutcnt := make([]int, MaxIndexes) ctagsPath := make([]int, 0, 8) dec.decodeValue(pl, ser, reflect.ValueOf(dest), fieldsoutcnt, ctagsPath) @@ -709,7 +711,7 @@ func (dec *Decoder) Decode(cjson []byte, dest interface{}) (err error) { } }() - fieldsoutcnt := make([]int, 64, 64) + fieldsoutcnt := make([]int, MaxIndexes) ctagsPath := make([]int, 0, 8) dec.decodeValue(nil, ser, reflect.ValueOf(dest), fieldsoutcnt, ctagsPath) diff --git a/cjson/encoder.go b/cjson/encoder.go index d0630282d..ab40554c9 100644 --- a/cjson/encoder.go +++ b/cjson/encoder.go @@ -223,67 +223,375 @@ func (enc *Encoder) encodeMap(v reflect.Value, rdser *Serializer, idx []int) err return nil } -func ParseUuid(s string) (res [2]uint64, err error) { - if len(s) == 0 { - return +var hexCharToUint = [256]uint64{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +} + +func generateError(ch byte, str string) (res [2]uint64, err error) { + if ch == '-' { + err = fmt.Errorf("Invalid UUID format: '%s'", str) + } else { + err = fmt.Errorf("UUID cannot contain char '%c': '%s'", ch, str) } - i := 0 - for _, ch := range s { - if i >= 32 && ch != '-' { - err = fmt.Errorf("UUID should consist of 32 hexadecimal digits: '%s'", s) - return + return +} + +func ParseUuid(str string) (res [2]uint64, err error) { + switch len(str) { + case 0: + return + case 32: + ch := str[0] + num := hexCharToUint[ch] + if num > 15 { + return generateError(ch, str) } - var v uint64 - switch ch { - case '0': - v = 0 - case '1': - v = 1 - case '2': - v = 2 - case '3': - v = 3 - case '4': - v = 4 - case '5': - v = 5 - case '6': - v = 6 - case '7': - v = 7 - case '8': - v = 8 - case '9': - v = 9 - case 'a', 'A': - v = 10 - case 'b', 'B': - v = 11 - case 'c', 'C': - v = 12 - case 'd', 'D': - v = 13 - case 'e', 'E': - v = 14 - case 'f', 'F': - v = 15 - case '-': - continue - default: - err = fmt.Errorf("UUID cannot contain char '%c': '%s'", ch, s) + res[0] = num << 60 + ch = str[1] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 56 + ch = str[2] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 52 + ch = str[3] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 48 + ch = str[4] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 44 + ch = str[5] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 40 + ch = str[6] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 36 + ch = str[7] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 32 + ch = str[8] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 28 + ch = str[9] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 24 + ch = str[10] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 20 + ch = str[11] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 16 + ch = str[12] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 12 + ch = str[13] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 8 + ch = str[14] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 4 + ch = str[15] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num + ch = str[16] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] = num << 60 + ch = str[17] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 56 + ch = str[18] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 52 + ch = str[19] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 48 + ch = str[20] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 44 + ch = str[21] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 40 + ch = str[22] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 36 + ch = str[23] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 32 + ch = str[24] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 28 + ch = str[25] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 24 + ch = str[26] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 20 + ch = str[27] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 16 + ch = str[28] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 12 + ch = str[29] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 8 + ch = str[30] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 4 + ch = str[31] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num + break + case 36: + if str[8] != '-' || str[13] != '-' || str[18] != '-' || str[23] != '-' { + err = fmt.Errorf("Invalid UUID format: '%s'", str) return } - res[i/16] = (res[i/16] << 4) | v - i++ - } - if i != 32 { - err = fmt.Errorf("UUID should consist of 32 hexadecimal digits: '%s'", s) - return - } - if (res[0] != 0 || res[1] != 0) && (res[1]>>63) == 0 { - err = fmt.Errorf("Variant 0 of UUID is unsupported: '%s'", s) + ch := str[0] + num := hexCharToUint[ch] + if num > 15 { + return generateError(ch, str) + } + res[0] = num << 60 + ch = str[1] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 56 + ch = str[2] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 52 + ch = str[3] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 48 + ch = str[4] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 44 + ch = str[5] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 40 + ch = str[6] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 36 + ch = str[7] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 32 + ch = str[9] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 28 + ch = str[10] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 24 + ch = str[11] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 20 + ch = str[12] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 16 + ch = str[14] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 12 + ch = str[15] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 8 + ch = str[16] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num << 4 + ch = str[17] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[0] |= num + ch = str[19] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] = num << 60 + ch = str[20] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 56 + ch = str[21] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 52 + ch = str[22] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 48 + ch = str[24] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 44 + ch = str[25] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 40 + ch = str[26] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 36 + ch = str[27] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 32 + ch = str[28] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 28 + ch = str[29] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 24 + ch = str[30] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 20 + ch = str[31] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 16 + ch = str[32] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 12 + ch = str[33] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 8 + ch = str[34] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num << 4 + ch = str[35] + if num = hexCharToUint[ch]; num > 15 { + return generateError(ch, str) + } + res[1] |= num + break + default: + err = fmt.Errorf("UUID should consist of 32 hexadecimal digits: '%s'", str) return } + if (res[0] != 0 || res[1] != 0) && (res[1] >> 63) == 0 { + err = fmt.Errorf("Variant 0 of UUID is unsupported: '%s'", str) + } return } diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index ff72e0509..0ccbbe730 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -24,6 +24,7 @@ option (ENABLE_TCMALLOC "Enable tcmalloc extensions" ON) option (ENABLE_JEMALLOC "Enable jemalloc extensions" ON) option (ENABLE_ROCKSDB "Enable rocksdb storage" ON) option (ENABLE_GRPC "Enable GRPC service" OFF) +option (ENABLE_SSE "Enable SSE instructions" ON) option (ENABLE_SERVER_AS_PROCESS_IN_TEST "Enable run reindexer server as separate process in tests" OFF) if (NOT GRPC_PACKAGE_PROVIDER) @@ -36,7 +37,7 @@ else() option (LINK_RESOURCES "Link web resources as binary data" ON) endif() -set (REINDEXER_VERSION_DEFAULT "4.11.0") +set (REINDEXER_VERSION_DEFAULT "4.12.0") if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "RelWithDebInfo") @@ -53,12 +54,13 @@ include (TargetArch) target_architecture(COMPILER_TARGET_ARCH) # Configure compile options -string( REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") +string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") +string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") if (NOT ${COMPILER_TARGET_ARCH} STREQUAL "e2k") string(REPLACE "-g" "-g1" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") else() string(REPLACE "-g" "-g0" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") - string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() if (${COMPILER_TARGET_ARCH} STREQUAL "e2k") @@ -226,6 +228,16 @@ else () endif () list(APPEND SRCS ${CONTEXT_ASM_SRCS}) +if (ENABLE_SSE) + if (NOT MSVC AND NOT APPLE AND (${COMPILER_TARGET_ARCH} STREQUAL "x86_64" OR ${COMPILER_TARGET_ARCH} STREQUAL "i386")) + add_definitions(-DREINDEXER_WITH_SSE=1) + message ("Building with SSE support...") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -mpopcnt") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -mpopcnt") + else () + message ("SSE compiler flags were disabled for the current platform") + endif () +endif () include_directories(${REINDEXER_SOURCE_PATH}) include_directories(${REINDEXER_SOURCE_PATH}/vendor) @@ -696,7 +708,7 @@ if (NOT WIN32) "estl/cow.h" "core/shardedmeta.h" "estl/overloaded.h" "estl/one_of.h" "core/queryresults/localqueryresults.h" "estl/h_vector.h" "estl/mutex.h" "estl/intrusive_ptr.h" "estl/trivial_reverse_iterator.h" "estl/span.h" "estl/chunk.h" - "estl/fast_hash_traits.h" "estl/debug_macros.h" + "estl/fast_hash_traits.h" "estl/debug_macros.h" "estl/defines.h" "client/item.h" "client/resultserializer.h" "client/internalrdxcontext.h" "client/reindexer.h" "client/reindexerconfig.h" "client/cororeindexer.h" "client/coroqueryresults.h" "client/corotransaction.h" "client/connectopts.h" diff --git a/cpp_src/client/coroqueryresults.cc b/cpp_src/client/coroqueryresults.cc index 65e565f1a..14c4aff93 100644 --- a/cpp_src/client/coroqueryresults.cc +++ b/cpp_src/client/coroqueryresults.cc @@ -1,10 +1,12 @@ #include "client/coroqueryresults.h" #include "client/itemimpl.h" #include "client/namespace.h" +#include "core/cjson/csvbuilder.h" #include "core/keyvalue/p_string.h" #include "core/queryresults/additionaldatasource.h" #include "net/cproto/coroclientconnection.h" #include "server/rpcqrwatcher.h" +#include "tools/catch_and_return.h" #include "tools/logger.h" namespace reindexer { @@ -317,6 +319,39 @@ Error CoroQueryResults::Iterator::GetMsgPack(WrSerializer &wrser, bool withHdrLe return errOK; } +void CoroQueryResults::Iterator::getCSVFromCJSON(std::string_view cjson, WrSerializer &wrser, CsvOrdering &ordering) const { + auto tm = qr_->GetTagsMatcher(itemParams_.nsid); + CsvBuilder builder(wrser, ordering); + CsvEncoder encoder(&tm); + + if (qr_->HaveJoined() && joinedData_.size()) { + EncoderDatasourceWithJoins joinsDs(joinedData_, *qr_); + h_vector *, 2> dss; + AdditionalDatasourceCSV ds(&joinsDs); + encoder.Encode(cjson, builder, dss); + return; + } + + encoder.Encode(cjson, builder); +} + +[[nodiscard]] Error CoroQueryResults::Iterator::GetCSV(WrSerializer &wrser, CsvOrdering &ordering) noexcept { + try { + checkIdx(); + readNext(); + switch (qr_->i_.queryParams_.flags & kResultsFormatMask) { + case kResultsCJson: { + getCSVFromCJSON(itemParams_.data, wrser, ordering); + return errOK; + } + default: + return Error(errParseBin, "Server returned data in unexpected format %d", qr_->i_.queryParams_.flags & kResultsFormatMask); + } + } + CATCH_AND_RETURN + return errOK; +} + Error CoroQueryResults::Iterator::GetJSON(WrSerializer &wrser, bool withHdrLen) { try { checkIdx(); diff --git a/cpp_src/client/coroqueryresults.h b/cpp_src/client/coroqueryresults.h index b8e4c63ff..55583afb6 100644 --- a/cpp_src/client/coroqueryresults.h +++ b/cpp_src/client/coroqueryresults.h @@ -9,6 +9,7 @@ namespace reindexer { class Query; +struct CsvOrdering; namespace net { namespace cproto { @@ -60,6 +61,7 @@ class CoroQueryResults { Error GetJSON(WrSerializer& wrser, bool withHdrLen = true); Error GetCJSON(WrSerializer& wrser, bool withHdrLen = true); Error GetMsgPack(WrSerializer& wrser, bool withHdrLen = true); + [[nodiscard]] Error GetCSV(WrSerializer& wrser, CsvOrdering& ordering) noexcept; Item GetItem(); lsn_t GetLSN(); int GetNSID(); @@ -86,6 +88,7 @@ class CoroQueryResults { void readNext(); void getJSONFromCJSON(std::string_view cjson, WrSerializer& wrser, bool withHdrLen = true) const; + void getCSVFromCJSON(std::string_view cjson, WrSerializer& wrser, CsvOrdering& ordering) const; void checkIdx() const; bool isAvailable() const noexcept { return idx_ >= qr_->i_.fetchOffset_ && idx_ < qr_->i_.queryParams_.qcount; } diff --git a/cpp_src/client/cororeindexer.cc b/cpp_src/client/cororeindexer.cc index 97f96bf18..e6b93c450 100644 --- a/cpp_src/client/cororeindexer.cc +++ b/cpp_src/client/cororeindexer.cc @@ -1,11 +1,14 @@ #include "client/cororeindexer.h" #include "client/itemimpl.h" #include "client/rpcclient.h" +#include "tools/cpucheck.h" namespace reindexer { namespace client { -CoroReindexer::CoroReindexer(const ReindexerConfig& config) : impl_(new RPCClient(config, nullptr)), owner_(true), ctx_() {} +CoroReindexer::CoroReindexer(const ReindexerConfig& config) : impl_(new RPCClient(config, nullptr)), owner_(true), ctx_() { + reindexer::CheckRequiredSSESupport(); +} CoroReindexer::~CoroReindexer() { if (owner_) { delete impl_; @@ -103,5 +106,9 @@ int64_t CoroReindexer::AddConnectionStateObserver(CoroReindexer::ConnectionState } Error CoroReindexer::RemoveConnectionStateObserver(int64_t id) { return impl_->RemoveConnectionStateObserver(id); } +[[nodiscard]] Error CoroReindexer::ShardingControlRequest(const sharding::ShardingControlRequestData& request) noexcept { + return impl_->ShardingControlRequest(request, ctx_); +} + } // namespace client } // namespace reindexer diff --git a/cpp_src/client/cororeindexer.h b/cpp_src/client/cororeindexer.h index a6b8896fe..3cbafbfb0 100644 --- a/cpp_src/client/cororeindexer.h +++ b/cpp_src/client/cororeindexer.h @@ -17,6 +17,10 @@ struct SnapshotOpts; struct ReplicationStateV2; struct ClusterizationStatus; +namespace sharding { +struct ShardingControlRequestData; +} + namespace client { class RPCClient; @@ -235,6 +239,10 @@ class CoroReindexer { /// @param id - observer's ID Error RemoveConnectionStateObserver(int64_t id); + /// Execute sharding control request during the sharding config change + /// @param request - control params + [[nodiscard]] Error ShardingControlRequest(const sharding::ShardingControlRequestData &request) noexcept; + /// Add cancelable context /// @param cancelCtx - context pointer CoroReindexer WithContext(const IRdxCancelContext *cancelCtx) { return CoroReindexer(impl_, ctx_.WithCancelContext(cancelCtx)); } diff --git a/cpp_src/client/itemimplbase.cc b/cpp_src/client/itemimplbase.cc index b61e31b30..09442ddc2 100644 --- a/cpp_src/client/itemimplbase.cc +++ b/cpp_src/client/itemimplbase.cc @@ -60,7 +60,7 @@ void ItemImplBase::FromCJSON(std::string_view slice) { const auto tupleSize = ser_.Len(); tupleHolder_ = ser_.DetachBuf(); tupleData_ = std::string_view(reinterpret_cast(tupleHolder_.get()), tupleSize); - pl.Set(0, {Variant(p_string(&tupleData_))}); + pl.Set(0, Variant(p_string(&tupleData_))); } Error ItemImplBase::FromJSON(std::string_view slice, char **endp, bool /*pkOnly*/) { @@ -97,14 +97,14 @@ Error ItemImplBase::FromJSON(std::string_view slice, char **endp, bool /*pkOnly* const auto tupleSize = ser_.Len(); tupleHolder_ = ser_.DetachBuf(); tupleData_ = std::string_view(reinterpret_cast(tupleHolder_.get()), tupleSize); - pl.Set(0, {Variant(p_string(&tupleData_))}); + pl.Set(0, Variant(p_string(&tupleData_))); } return err; } Error ItemImplBase::FromMsgPack(std::string_view buf, size_t &offset) { Payload pl = GetPayload(); - MsgPackDecoder decoder(&tagsMatcher_); + MsgPackDecoder decoder(tagsMatcher_); std::string_view data = buf; if (!unsafe_) { @@ -119,7 +119,7 @@ Error ItemImplBase::FromMsgPack(std::string_view buf, size_t &offset) { const auto tupleSize = ser_.Len(); tupleHolder_ = ser_.DetachBuf(); tupleData_ = std::string_view(reinterpret_cast(tupleHolder_.get()), tupleSize); - pl.Set(0, {Variant(p_string(&tupleData_))}); + pl.Set(0, Variant(p_string(&tupleData_))); } return err; } diff --git a/cpp_src/client/reindexer.cc b/cpp_src/client/reindexer.cc index 9c529856a..e19af0f82 100644 --- a/cpp_src/client/reindexer.cc +++ b/cpp_src/client/reindexer.cc @@ -77,5 +77,21 @@ Error Reindexer::CommitTransaction(Transaction& tr, QueryResults& result) { retu Error Reindexer::RollBackTransaction(Transaction& tr) { return impl_->RollBackTransaction(tr, ctx_); } Error Reindexer::GetReplState(std::string_view nsName, ReplicationStateV2& state) { return impl_->GetReplState(nsName, state, ctx_); } +[[nodiscard]] Error Reindexer::SaveNewShardingConfig(std::string_view config, int64_t sourceId) noexcept { + return impl_->SaveNewShardingConfig(config, sourceId, ctx_); +} + +[[nodiscard]] Error Reindexer::ResetShardingConfigCandidate(int64_t sourceId) noexcept { + return impl_->ResetShardingConfigCandidate(sourceId, ctx_); +} + +[[nodiscard]] Error Reindexer::ResetOldShardingConfig(int64_t sourceId) noexcept { return impl_->ResetOldShardingConfig(sourceId, ctx_); } + +[[nodiscard]] Error Reindexer::RollbackShardingConfigCandidate(int64_t sourceId) noexcept { + return impl_->RollbackShardingConfigCandidate(sourceId, ctx_); +} + +[[nodiscard]] Error Reindexer::ApplyNewShardingConfig(int64_t sourceId) noexcept { return impl_->ApplyNewShardingConfig(sourceId, ctx_); } + } // namespace client } // namespace reindexer diff --git a/cpp_src/client/reindexer.h b/cpp_src/client/reindexer.h index eaf3270cf..57ee2ca5e 100644 --- a/cpp_src/client/reindexer.h +++ b/cpp_src/client/reindexer.h @@ -200,6 +200,19 @@ class Reindexer { /// @param state - result state Error GetReplState(std::string_view nsName, ReplicationStateV2 &state); + /// Process new sharding config + /// @param config - New sharding config + /// @param sourceId - Unique identifier for operations with a specific config candidate + [[nodiscard]] Error SaveNewShardingConfig(std::string_view config, int64_t sourceId) noexcept; + /// Resetting the old sharding config before applying the new one + [[nodiscard]] Error ResetOldShardingConfig(int64_t sourceId) noexcept; + /// Resetting sharding config candidates if there were errors when saving of candidates on other nodes + [[nodiscard]] Error ResetShardingConfigCandidate(int64_t sourceId) noexcept; + /// Rollback config candidate if there were errors when trying to apply candidates on other nodes + [[nodiscard]] Error RollbackShardingConfigCandidate(int64_t sourceId) noexcept; + /// Apply new sharding config on all shards + [[nodiscard]] Error ApplyNewShardingConfig(int64_t sourceId) noexcept; + /// Add cancelable context /// @param cancelCtx - context pointer Reindexer WithContext(const IRdxCancelContext *cancelCtx) { return Reindexer(impl_, ctx_.WithCancelContext(cancelCtx)); } diff --git a/cpp_src/client/reindexerimpl.cc b/cpp_src/client/reindexerimpl.cc index 1810751fe..475737e83 100644 --- a/cpp_src/client/reindexerimpl.cc +++ b/cpp_src/client/reindexerimpl.cc @@ -1,6 +1,8 @@ #include "client/reindexerimpl.h" #include "client/connectionspool.h" #include "client/itemimpl.h" +#include "cluster/sharding/shardingcontrolrequest.h" +#include "tools/catch_and_return.h" namespace reindexer { namespace client { @@ -211,6 +213,27 @@ Error ReindexerImpl::GetReplState(std::string_view nsName, ReplicationStateV2 &s return sendCommand(DbCmdGetReplState, ctx, std::move(nsName), state); } +[[nodiscard]] Error ReindexerImpl::SaveNewShardingConfig(std::string_view config, int64_t sourceId, + const InternalRdxContext &ctx) noexcept { + RETURN_RESULT_NOEXCEPT( + sendCommand(DbCmdSaveNewShardingCfg, ctx, std::move(config), std::move(sourceId))) +} + +[[nodiscard]] Error ReindexerImpl::ResetShardingConfigCandidate(int64_t sourceId, const InternalRdxContext &ctx) noexcept { + RETURN_RESULT_NOEXCEPT(sendCommand(DbCmdResetConfigCandidate, ctx, std::move(sourceId))) +} + +[[nodiscard]] Error ReindexerImpl::ResetOldShardingConfig(int64_t sourceId, const InternalRdxContext &ctx) noexcept { + RETURN_RESULT_NOEXCEPT(sendCommand(DbCmdResetOldShardingCfg, ctx, std::move(sourceId))) +} + +[[nodiscard]] Error ReindexerImpl::RollbackShardingConfigCandidate(int64_t sourceId, const InternalRdxContext &ctx) noexcept { + RETURN_RESULT_NOEXCEPT(sendCommand(DbCmdRollbackConfigCandidate, ctx, std::move(sourceId))) +} + +[[nodiscard]] Error ReindexerImpl::ApplyNewShardingConfig(int64_t sourceId, const InternalRdxContext &ctx) noexcept { + RETURN_RESULT_NOEXCEPT(sendCommand(DbCmdApplyNewShardingCfg, ctx, std::move(sourceId)))} + Error ReindexerImpl::fetchResults(int flags, int offset, int limit, QueryResults &result) { return sendCommand(result.coroConnection(), DbCmdFetchResultsParametrized, InternalRdxContext(), std::move(flags), std::move(offset), std::move(limit), result.results_); @@ -697,6 +720,41 @@ void ReindexerImpl::coroInterpreter(Connection &conn, Connectio }); break; } + case DbCmdSaveNewShardingCfg: { + execCommand(cmd, [&conn, &cmd](std::string_view config, int64_t sourceId) { + return conn.rx.ShardingControlRequest( + sharding::MakeRequestData(config, sourceId), cmd->ctx); + }); + break; + } + case DbCmdResetOldShardingCfg: { + execCommand(cmd, [&conn, &cmd](int64_t sourceId) { + return conn.rx.ShardingControlRequest( + sharding::MakeRequestData(sourceId), cmd->ctx); + }); + break; + } + case DbCmdRollbackConfigCandidate: { + execCommand(cmd, [&conn, &cmd](int64_t sourceId) { + return conn.rx.ShardingControlRequest( + sharding::MakeRequestData(sourceId), cmd->ctx); + }); + break; + } + case DbCmdResetConfigCandidate: { + execCommand(cmd, [&conn, &cmd](int64_t sourceId) { + return conn.rx.ShardingControlRequest( + sharding::MakeRequestData(sourceId), cmd->ctx); + }); + break; + } + case DbCmdApplyNewShardingCfg: { + execCommand(cmd, [&conn, &cmd](int64_t sourceId) { + return conn.rx.ShardingControlRequest( + sharding::MakeRequestData(sourceId), cmd->ctx); + }); + break; + } case DbCmdNone: assert(false); break; diff --git a/cpp_src/client/reindexerimpl.h b/cpp_src/client/reindexerimpl.h index 851f8753b..6989c395c 100644 --- a/cpp_src/client/reindexerimpl.h +++ b/cpp_src/client/reindexerimpl.h @@ -75,6 +75,12 @@ class ReindexerImpl { Error RollBackTransaction(Transaction &tr, const InternalRdxContext &ctx); Error GetReplState(std::string_view nsName, ReplicationStateV2 &state, const InternalRdxContext &ctx); + [[nodiscard]] Error SaveNewShardingConfig(std::string_view config, int64_t sourceId, const InternalRdxContext &ctx) noexcept; + [[nodiscard]] Error ResetOldShardingConfig(int64_t sourceId, const InternalRdxContext &ctx) noexcept; + [[nodiscard]] Error ResetShardingConfigCandidate(int64_t sourceId, const InternalRdxContext &ctx) noexcept; + [[nodiscard]] Error RollbackShardingConfigCandidate(int64_t sourceId, const InternalRdxContext &ctx) noexcept; + [[nodiscard]] Error ApplyNewShardingConfig(int64_t sourceId, const InternalRdxContext &ctx) noexcept; + private: friend class QueryResults; friend class Transaction; @@ -136,6 +142,11 @@ class ReindexerImpl { DbCmdModifyTx, DbCmdGetReplState, DbCmdSetTxTagsMatcher, + DbCmdSaveNewShardingCfg, + DbCmdResetOldShardingCfg, + DbCmdResetConfigCandidate, + DbCmdRollbackConfigCandidate, + DbCmdApplyNewShardingCfg, }; template @@ -266,7 +277,7 @@ class ReindexerImpl { return R(Error(errNetwork, "Request for invalid connection (probably this connection was broken and invalidated)")); } } - return R(Error(errTerminated, "Client is not connected")); + return R(Error(errTerminated, "Client is not connected: %s", err.what())); } if constexpr (std::is_same_v) { DatabaseCommandData cmd(c, ctx, std::forward(args)...); diff --git a/cpp_src/client/resultserializer.cc b/cpp_src/client/resultserializer.cc index 525c30c4e..8ce7719c3 100644 --- a/cpp_src/client/resultserializer.cc +++ b/cpp_src/client/resultserializer.cc @@ -62,11 +62,12 @@ void ResultSerializer::GetExtraParams(ResultSerializer::QueryParams& ret, Option ret.aggResults.emplace(); ret.explainResults.emplace(); } - ret.aggResults->emplace_back(); + // firstLazyData guaranties, that aggResults will be non-'nullopt' + ret.aggResults->emplace_back(); // NOLINT(bugprone-unchecked-optional-access) if ((ret.flags & kResultsFormatMask) == kResultsMsgPack) { - ret.aggResults->back().FromMsgPack(data); + ret.aggResults->back().FromMsgPack(data); // NOLINT(bugprone-unchecked-optional-access) } else { - ret.aggResults->back().FromJSON(giftStr(data)); + ret.aggResults->back().FromJSON(giftStr(data)); // NOLINT(bugprone-unchecked-optional-access) } } break; diff --git a/cpp_src/client/rpcclient.cc b/cpp_src/client/rpcclient.cc index fc3db68f9..c60d8464d 100644 --- a/cpp_src/client/rpcclient.cc +++ b/cpp_src/client/rpcclient.cc @@ -5,10 +5,13 @@ #include "client/rpcclient.h" #include "client/snapshot.h" #include "cluster/clustercontrolrequest.h" +#include "cluster/sharding/shardingcontrolrequest.h" #include "core/namespace/namespacestat.h" #include "core/namespace/snapshot/snapshot.h" #include "core/namespacedef.h" #include "gason/gason.h" +#include "tools/catch_and_return.h" +#include "tools/cpucheck.h" #include "tools/errors.h" #include "vendor/gason/gason.h" @@ -19,6 +22,8 @@ using reindexer::net::cproto::CoroRPCAnswer; RPCClient::RPCClient(const ReindexerConfig& config, INamespaces::PtrT sharedNamespaces) : namespaces_(sharedNamespaces ? std::move(sharedNamespaces) : INamespaces::PtrT(new NamespacesImpl())), config_(config) { + reindexer::CheckRequiredSSESupport(); + conn_.SetConnectionStateHandler([this](Error err) { onConnectionState(std::move(err)); }); } @@ -557,7 +562,7 @@ Error RPCClient::Status(bool forceCheck, const InternalRdxContext& ctx) { if (!conn_.IsRunning()) { return Error(errParams, "Client is not running"); } - return conn_.Status(forceCheck, config_.NetTimeout, ctx.execTimeout(), ctx.getCancelCtx()); + return conn_.Status(forceCheck, std::max(config_.NetTimeout, ctx.execTimeout()), ctx.execTimeout(), ctx.getCancelCtx()); } Namespace* RPCClient::getNamespace(std::string_view nsName) { return namespaces_->Get(nsName); } @@ -749,5 +754,15 @@ Error RPCClient::GetRaftInfo(RaftInfo& info, const InternalRdxContext& ctx) { return ret.Status(); } +[[nodiscard]] Error RPCClient::ShardingControlRequest(const sharding::ShardingControlRequestData& request, + const InternalRdxContext& ctx) noexcept { + try { + WrSerializer ser; + request.GetJSON(ser); + return conn_.Call(mkCommand(cproto::kShardingControlRequest, &ctx), ser.Slice()).Status(); + } + CATCH_AND_RETURN +} + } // namespace client } // namespace reindexer diff --git a/cpp_src/client/rpcclient.h b/cpp_src/client/rpcclient.h index 85f902594..901e6d840 100644 --- a/cpp_src/client/rpcclient.h +++ b/cpp_src/client/rpcclient.h @@ -26,6 +26,9 @@ class SnapshotChunk; struct SnapshotOpts; struct ClusterControlRequestData; +namespace sharding { +struct ShardingControlRequestData; +} namespace client { class Snapshot; @@ -164,6 +167,8 @@ class RPCClient { typedef CoroQueryResults QueryResultsT; + [[nodiscard]] Error ShardingControlRequest(const sharding::ShardingControlRequestData &request, const InternalRdxContext &ctx) noexcept; + protected: Error selectImpl(const Query &query, CoroQueryResults &result, milliseconds netTimeout, const InternalRdxContext &ctx); Error modifyItemCJSON(std::string_view nsName, Item &item, CoroQueryResults *results, int mode, milliseconds netTimeout, diff --git a/cpp_src/cluster/config.cc b/cpp_src/cluster/config.cc index 481eebb9e..efb96db3e 100644 --- a/cpp_src/cluster/config.cc +++ b/cpp_src/cluster/config.cc @@ -649,10 +649,10 @@ sharding::Segment ShardingConfig::Key::SegmentFromJSON(const gason::Jso case gason::JsonTag::JSON_STRING: case gason::JsonTag::JSON_DOUBLE: case gason::JsonTag::JSON_NUMBER: { - auto val = jsonValue2Variant(jsonValue, KeyValueType::Undefined{}); + auto val = stringToVariant(stringifyJson(json, false)); if (val.Type().Is()) { - throw Error(errParams, "Incorrect value '%s'. Type is equal to 'KeyValueNull'", jsonValue.toString()); + throw Error(errParams, "Incorrect value '%s'. Type is equal to 'KeyValueNull'", stringifyJson(json, false)); } return sharding::Segment{val, val}; @@ -662,8 +662,8 @@ sharding::Segment ShardingConfig::Key::SegmentFromJSON(const gason::Jso if (auto dist = std::distance(begin(json), end(json)); dist != 2) throw Error(errParams, "Incorrect range for sharding key. Should contain 2 numbers but %d are received", dist); - auto left = jsonValue2Variant(begin(jsonValue)->value, KeyValueType::Undefined{}); - auto right = jsonValue2Variant(begin(jsonValue)->next->value, KeyValueType::Undefined{}); + auto left = stringToVariant(stringifyJson(*begin(json), false)); + auto right = stringToVariant(stringifyJson(*begin(json)->next, false)); if (!left.Type().IsSame(right.Type())) throw Error(errParams, "Incorrect segment '[%s, %s]'. Type of left value is '%s', right type is '%s'", diff --git a/cpp_src/cluster/replication/clusterdatareplicator.cc b/cpp_src/cluster/replication/clusterdatareplicator.cc index 4a18ffc08..f303532cf 100644 --- a/cpp_src/cluster/replication/clusterdatareplicator.cc +++ b/cpp_src/cluster/replication/clusterdatareplicator.cc @@ -247,7 +247,7 @@ void ClusterDataReplicator::clusterControlRoutine(int serverId) { RaftInfo raftInfo; while (!terminate_) { - onRoleChanged(RaftInfo::Role::Candidate, raftInfo.role == RaftInfo::Role::Leader ? serverId : raftManager_.GetLeaderId()); + onRoleChanged(RaftInfo::Role::Candidate, raftInfo.role == RaftInfo::Role::Leader ? serverId : raftInfo.leaderId); raftInfo.role = RaftInfo::Role::Candidate; restartElections_ = false; diff --git a/cpp_src/cluster/replication/replicationthread.cc b/cpp_src/cluster/replication/replicationthread.cc index c222040ad..51671cea1 100644 --- a/cpp_src/cluster/replication/replicationthread.cc +++ b/cpp_src/cluster/replication/replicationthread.cc @@ -1,9 +1,11 @@ #include "asyncreplthread.h" #include "client/snapshot.h" +#include "cluster/sharding/shardingcontrolrequest.h" #include "clusterreplthread.h" #include "core/defnsconfigs.h" #include "core/namespace/snapshot/snapshot.h" #include "core/reindexerimpl.h" +#include "tools/catch_and_return.h" #include "tools/flagguard.h" #include "updatesbatcher.h" #include "updatesqueue.h" @@ -255,6 +257,56 @@ void ReplThread::nodeReplicationRoutine(Node& node) { node.client.Stop(); } +template <> +[[nodiscard]] Error ReplThread::syncShardingConfig(Node& node) noexcept { + /////////////////////////////// ATTENTION! ///////////////////////////////// + ///////// This specialization is necessary because clang-tyde //////// + ///////// falsely diagnoses the private member access error here, //////// + ///////// despite the fact that this code is under 'if constexpr'. //////// + ////////////////////////////////////////////////////////////////////////////// + ///////// This specialization should be located up to the point of use /////// + ///////// in function `nodeReplicationImpl` in order to avoid IFNDR. /////// + ////////////////////////////////////////////////////////////////////////////// + try { + for (size_t i = 0; i < kMaxRetriesOnRoleSwitchAwait; ++i) { + ReplicationStateV2 replState; + auto err = node.client.GetReplState(std::string_view(), replState); + + if (!bhvParam_.IsLeader()) { + return Error(errParams, "Leader was switched"); + } + + if (!err.ok()) { + logWarn("%d:%d Unable to get repl state: %s", serverId_, node.uid, err.what()); + return err; + } + + statsCollector_.OnSyncStateChanged(node.uid, NodeStats::SyncState::Syncing); + updateNodeStatus(node.uid, NodeStats::Status::Online); + if (replState.clusterStatus.role != ClusterizationStatus::Role::ClusterReplica || + replState.clusterStatus.leaderId != serverId_) { + // Await transition + logTrace("%d:%d Awaiting role switch on remote node", serverId_, node.uid); + loop.sleep(kRoleSwitchStepTime); + // TODO: Check if cluster is configured on remote node + continue; + } + + logInfo("%d:%d Start applying leader's sharding config locally", serverId_, node.uid); + std::string config; + if (auto configPtr = thisNode.shardingConfig_.Get()) { + config = configPtr->GetJSON(); + } + + return node.client.WithLSN(lsn_t(0, serverId_)) + .ShardingControlRequest( + sharding::MakeRequestData(config, -1)); + }; + return Error(errTimeout, "%d:%d DB role switch waiting timeout", serverId_, node.uid); + } + CATCH_AND_RETURN +} + template Error ReplThread::nodeReplicationImpl(Node& node) { std::vector nsList; @@ -282,6 +334,14 @@ Error ReplThread::nodeReplicationImpl(Node& node) { } } + if constexpr (isClusterReplThread()) { + integralError = syncShardingConfig(node); + if (!integralError.ok()) { + logWarn("%s", integralError.what()); + return integralError; + } + } + logInfo("%d:%d Creating %d sync routines", serverId_, node.uid, nsList.size()); coroutine::wait_group localWg; for (const auto& ns : nsList) { @@ -364,34 +424,6 @@ Error ReplThread::nodeReplicationImpl(Node& node) { } }); } - if constexpr (isClusterReplThread()) { - if (!localWg.wait_count()) { - size_t i; - logInfo("%d:%d No sync coroutines were created. Just awating DB role switch...", serverId_, node.uid); - for (i = 0; i < kMaxRetriesOnRoleSwitchAwait; ++i) { - ReplicationStateV2 replState; - auto err = node.client.GetReplState(std::string_view(), replState); - if (!err.ok()) { - return err; - } - if (!bhvParam_.IsLeader()) { - return Error(errParams, "Leader was switched"); - } - if (replState.clusterStatus.role != ClusterizationStatus::Role::ClusterReplica || - replState.clusterStatus.leaderId != serverId_) { - // Await transition - logTrace("%d:%d Awaiting DB role switch on remote node", serverId_, node.uid); - loop.sleep(kRoleSwitchStepTime); - // TODO: Check if cluster is configured on remote node - continue; - } - break; - } - if (i == kMaxRetriesOnRoleSwitchAwait) { - return Error(errTimeout, "%d:%d DB role switch waiting timeout", serverId_, node.uid); - } - } - } localWg.wait(); if (!integralError.ok()) { logWarn("%d:%d Unable to sync remote namespaces: %s", serverId_, node.uid, integralError.what()); @@ -1073,9 +1105,14 @@ UpdateApplyStatus ReplThread::applyUpdate(const UpdateRecord& r case UpdateRecord::Type::NodeNetworkCheck: case UpdateRecord::Type::SetTagsMatcher: case UpdateRecord::Type::SetTagsMatcherTx: - default: - std::abort(); + case UpdateRecord::Type::SaveShardingConfig: + case UpdateRecord::Type::ApplyShardingConfig: + case UpdateRecord::Type::ResetOldShardingConfig: + case UpdateRecord::Type::ResetCandidateConfig: + case UpdateRecord::Type::RollbackCandidateConfig: + break; } + std::abort(); } case UpdateRecord::Type::UpdateQueryTx: case UpdateRecord::Type::DeleteQueryTx: { @@ -1143,6 +1180,44 @@ UpdateApplyStatus ReplThread::applyUpdate(const UpdateRecord& r TagsMatcher tm = data->tm; return UpdateApplyStatus(client.WithLSN(lsn).SetTagsMatcher(nsName, std::move(tm)), rec.type); } + case UpdateRecord::Type::SaveShardingConfig: { + auto& data = std::get>(rec.data); + auto err = client.WithLSN(lsn_t(0, serverId_)) + .ShardingControlRequest(sharding::MakeRequestData( + data->config, data->sourceId)); + return UpdateApplyStatus(std::move(err), rec.type); + } + + case UpdateRecord::Type::ApplyShardingConfig: { + auto& data = std::get>(rec.data); + auto err = client.WithLSN(lsn_t(0, serverId_)) + .ShardingControlRequest( + sharding::MakeRequestData(data->sourceId)); + return UpdateApplyStatus(std::move(err), rec.type); + } + case UpdateRecord::Type::ResetOldShardingConfig: { + auto& data = std::get>(rec.data); + auto err = client.WithLSN(lsn_t(0, serverId_)) + .ShardingControlRequest( + sharding::MakeRequestData(data->sourceId)); + return UpdateApplyStatus(std::move(err), rec.type); + } + case UpdateRecord::Type::ResetCandidateConfig: { + auto& data = std::get>(rec.data); + auto err = client.WithLSN(lsn_t(0, serverId_)) + .ShardingControlRequest( + sharding::MakeRequestData(data->sourceId)); + return UpdateApplyStatus(std::move(err), rec.type); + } + case UpdateRecord::Type::RollbackCandidateConfig: { + auto& data = std::get>(rec.data); + auto err = + client.WithLSN(lsn_t(0, serverId_)) + .ShardingControlRequest( + sharding::MakeRequestData(data->sourceId)); + return UpdateApplyStatus(std::move(err), rec.type); + } + case UpdateRecord::Type::None: case UpdateRecord::Type::EmptyUpdate: case UpdateRecord::Type::ResyncOnUpdatesDrop: diff --git a/cpp_src/cluster/replication/replicationthread.h b/cpp_src/cluster/replication/replicationthread.h index 5a150b20f..cdb0fc303 100644 --- a/cpp_src/cluster/replication/replicationthread.h +++ b/cpp_src/cluster/replication/replicationthread.h @@ -173,6 +173,7 @@ class ReplThread { bool currentlyOnline, const UpdateRecord &rec) noexcept; Error syncNamespace(Node &node, const std::string &nsName, const ReplicationStateV2 &followerState); + [[nodiscard]] Error syncShardingConfig(Node &node) noexcept; UpdateApplyStatus nodeUpdatesHandlingLoop(Node &node) noexcept; bool handleUpdatesWithError(Node &node, const Error &err); Error checkIfReplicationAllowed(Node &node, LogLevel &logLevel); diff --git a/cpp_src/cluster/sharding/locatorserviceadapter.cc b/cpp_src/cluster/sharding/locatorserviceadapter.cc new file mode 100644 index 000000000..ca172ce2b --- /dev/null +++ b/cpp_src/cluster/sharding/locatorserviceadapter.cc @@ -0,0 +1,11 @@ +#include "locatorserviceadapter.h" +#include "cluster/sharding/sharding.h" + +namespace reindexer::sharding { +std::shared_ptr LocatorServiceAdapter::GetShardConnection(std::string_view ns, int shardId, Error &status) { + return locator_->GetShardConnection(ns, shardId, status); +} +int LocatorServiceAdapter::ActualShardId() const noexcept { return locator_->ActualShardId(); } +int LocatorServiceAdapter::GetShardId(std::string_view ns, const Item &item) const { return locator_->GetShardId(ns, item); } +ShardIDsContainer LocatorServiceAdapter::GetShardId(const Query &q) const { return locator_->GetShardId(q); } +} // namespace reindexer::sharding diff --git a/cpp_src/cluster/sharding/locatorserviceadapter.h b/cpp_src/cluster/sharding/locatorserviceadapter.h new file mode 100644 index 000000000..7cef8ae75 --- /dev/null +++ b/cpp_src/cluster/sharding/locatorserviceadapter.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include "cluster/sharding/shardingkeys.h" +#include "tools/errors.h" + +namespace reindexer { + +namespace client { +class Reindexer; +} + +class Query; +class Item; + +namespace sharding { + +class LocatorService; + +class LocatorServiceAdapter { +public: + LocatorServiceAdapter() = default; + LocatorServiceAdapter(std::shared_ptr locator) : locator_(std::move(locator)) { + if (!locator_) { + throw Error(errLogic, "Unable to initialize LocatorService's interface with nullptr"); + } + } + std::shared_ptr GetShardConnection(std::string_view ns, int shardId, Error &status); + int ActualShardId() const noexcept; + int GetShardId(std::string_view ns, const Item &item) const; + ShardIDsContainer GetShardId(const Query &q) const; + + inline operator bool() const noexcept { return locator_.operator bool(); } + inline void reset() noexcept { locator_.reset(); } + +private: + std::shared_ptr locator_; +}; + +} // namespace sharding +} // namespace reindexer \ No newline at end of file diff --git a/cpp_src/cluster/sharding/sharding.cc b/cpp_src/cluster/sharding/sharding.cc index f8e19a172..5f7c1a453 100644 --- a/cpp_src/cluster/sharding/sharding.cc +++ b/cpp_src/cluster/sharding/sharding.cc @@ -12,7 +12,7 @@ namespace sharding { constexpr size_t kMaxShardingProxyConnCount = 64; constexpr size_t kMaxShardingProxyConnConcurrency = 1024; -RoutingStrategy::RoutingStrategy(const cluster::ShardingConfig &config) : config_(config), keys_(config_) {} +RoutingStrategy::RoutingStrategy(const cluster::ShardingConfig &config) : keys_(config) {} bool RoutingStrategy::getHostIdForQuery(const Query &q, int &hostId) const { bool containsKey = false; @@ -304,8 +304,8 @@ std::shared_ptr ConnectStrategy::tryConnectToLeader(const std return {}; } -LocatorService::LocatorService(ClusterProxy &rx, const cluster::ShardingConfig &config) - : rx_(rx), config_(config), routingStrategy_(config), actualShardId(config.thisShardId) {} +LocatorService::LocatorService(ClusterProxy &rx, cluster::ShardingConfig config) + : rx_(rx), config_(std::move(config)), routingStrategy_(config_), actualShardId(config.thisShardId) {} Error LocatorService::convertShardingKeysValues(KeyValueType fieldType, std::vector &keys) { return fieldType.EvaluateOneOf( @@ -371,6 +371,7 @@ Error LocatorService::Start() { } } cfg.EnableCompression = true; + cfg.RequestDedicatedThread = true; uint32_t proxyConnCount = cluster::kDefaultShardingProxyConnCount; if (config_.proxyConnCount > 0) { @@ -432,6 +433,20 @@ ConnectionsPtr LocatorService::GetShardsConnections(std::string_view ns, int sha return connections; } +ConnectionsPtr LocatorService::GetAllShardsConnections(Error &status) { + ConnectionsPtr connections = std::make_shared>(); + connections->reserve(hostsConnections_.size()); + for (const auto &[shardID, con] : hostsConnections_) { + connections->emplace_back(getShardConnection(shardID, status), shardID); + if (!status.ok()) return {}; + if (connections->back().IsOnThisShard() && connections->size() > 1) { + connections->back() = std::move(connections->front()); + connections->front() = {nullptr, shardID}; + } + } + return connections; +} + ConnectionsPtr LocatorService::GetShardsConnectionsWithId(const Query &q, Error &status) { ShardIDsContainer ids = routingStrategy_.GetHostsIds(q); assert(ids.size() > 0); @@ -507,12 +522,14 @@ std::shared_ptr LocatorService::GetShardConnection(std::strin if (actualShardId == defaultShard) { return {}; } - return peekHostForShard(hostsConnections_[defaultShard], defaultShard, status); + shardId = defaultShard; } - if (actualShardId == shardId) { + auto it = hostsConnections_.find(shardId); + if (actualShardId == shardId || it == hostsConnections_.end()) { return {}; } - return peekHostForShard(hostsConnections_[shardId], shardId, status); + + return peekHostForShard(it->second, shardId, status); } std::shared_ptr LocatorService::getShardConnection(int shardId, Error &status) { diff --git a/cpp_src/cluster/sharding/sharding.h b/cpp_src/cluster/sharding/sharding.h index 98546ea09..8cb7e1449 100644 --- a/cpp_src/cluster/sharding/sharding.h +++ b/cpp_src/cluster/sharding/sharding.h @@ -36,7 +36,6 @@ class RoutingStrategy { private: bool getHostIdForQuery(const Query &, int ¤tId) const; - const cluster::ShardingConfig &config_; ShardingKeys keys_; }; @@ -50,6 +49,10 @@ class Connections : public std::vector> { reconnectTs(obj.reconnectTs), status(std::move(obj.status)), shutdown(obj.shutdown) {} + + Connections(const Connections &obj) noexcept + : base(obj), actualIndex(obj.actualIndex), reconnectTs(obj.reconnectTs), status(obj.status), shutdown(obj.shutdown) {} + void Shutdown() { std::lock_guard lck(m); if (!shutdown) { @@ -116,8 +119,8 @@ class ConnectStrategy : public IConnectStrategy { class LocatorService { public: - LocatorService(ClusterProxy &rx, const cluster::ShardingConfig &config); - ~LocatorService() = default; + LocatorService(ClusterProxy &rx, cluster::ShardingConfig config); + ~LocatorService() { Shutdown(); } LocatorService(const LocatorService &) = delete; LocatorService(LocatorService &&) = delete; LocatorService &operator=(const LocatorService &) = delete; @@ -133,6 +136,7 @@ class LocatorService { ConnectionsPtr GetShardsConnections(std::string_view ns, int shardId, Error &status); ConnectionsPtr GetShardsConnectionsWithId(const Query &q, Error &status); ConnectionsPtr GetShardsConnections(Error &status) { return GetShardsConnections("", -1, status); } + ConnectionsPtr GetAllShardsConnections(Error &status); ShardConnection GetShardConnectionWithId(std::string_view ns, const Item &item, Error &status) { int shardId = routingStrategy_.GetHostId(ns, item); return ShardConnection(GetShardConnection(ns, shardId, status), shardId); @@ -145,7 +149,7 @@ class LocatorService { ConnectionsPtr rebuildConnectionsVector(std::string_view ns, int shardId, Error &status); ConnectionsPtr getConnectionsFromCache(std::string_view ns, int shardId, bool &requiresRebuild); std::shared_ptr getShardConnection(int shardId, Error &status); - std::shared_ptr peekHostForShard(Connections &connections, int shardId, Error &status) { + std::shared_ptr peekHostForShard(Connections &connections, int shardId, Error &status) const { return ConnectStrategy(config_, connections, ActualShardId()).Connect(shardId, status); } Error validateConfig(); diff --git a/cpp_src/cluster/sharding/shardingcontrolrequest.cc b/cpp_src/cluster/sharding/shardingcontrolrequest.cc new file mode 100644 index 000000000..a9310d354 --- /dev/null +++ b/cpp_src/cluster/sharding/shardingcontrolrequest.cc @@ -0,0 +1,66 @@ +#include "shardingcontrolrequest.h" +#include "core/cjson/jsonbuilder.h" +#include "gason/gason.h" +#include "tools/catch_and_return.h" + +namespace reindexer::sharding { + +void ShardingControlRequestData::GetJSON(WrSerializer& ser) const { + JsonBuilder request(ser); + request.Put("type", int(type)); + { + auto payloadBuilder = request.Object("payload"); + std::visit([&payloadBuilder](const auto& d) { d.GetJSON(payloadBuilder); }, data); + } +} + +[[nodiscard]] Error ShardingControlRequestData::FromJSON(span json) noexcept { + try { + gason::JsonParser parser; + auto node = parser.Parse(json); + Type commandType = Type(node["type"].As()); + + switch (commandType) { + case Type::SaveCandidate: + case Type::ApplyLeaderConfig: + data = SaveConfigCommand{}; + break; + case Type::ApplyNew: + data = ApplyConfigCommand{}; + break; + case Type::ResetOldSharding: + case Type::ResetCandidate: + case Type::RollbackCandidate: + data = ResetConfigCommand{}; + break; + default: + return Error(errParams, "Unknown sharding command request. Command type [%d].", int(commandType)); + } + + const auto& payloadNode = node["payload"]; + std::visit([&payloadNode](auto& d) { d.FromJSON(payloadNode); }, data); + type = commandType; + } + CATCH_AND_RETURN + return errOK; +} + +void SaveConfigCommand::GetJSON(JsonBuilder& json) const { + json.Put("config", config); + json.Put("source_id", sourceId); +} + +void SaveConfigCommand::FromJSON(const gason::JsonNode& payload) { + config = payload["config"].As(); + sourceId = payload["source_id"].As(); +} + +void ApplyConfigCommand::GetJSON(JsonBuilder& json) const { json.Put("source_id", sourceId); } + +void ApplyConfigCommand::FromJSON(const gason::JsonNode& payload) { sourceId = payload["source_id"].As(); } + +void ResetConfigCommand::GetJSON(JsonBuilder& json) const { json.Put("source_id", sourceId); } + +void ResetConfigCommand::FromJSON(const gason::JsonNode& payload) { sourceId = payload["source_id"].As(); } + +} // namespace reindexer::sharding diff --git a/cpp_src/cluster/sharding/shardingcontrolrequest.h b/cpp_src/cluster/sharding/shardingcontrolrequest.h new file mode 100644 index 000000000..c41d0bf10 --- /dev/null +++ b/cpp_src/cluster/sharding/shardingcontrolrequest.h @@ -0,0 +1,89 @@ +#pragma once +#include +#include "estl/span.h" +#include "tools/compiletimemap.h" +#include "tools/errors.h" +#include "tools/serializer.h" + +namespace reindexer { +class JsonBuilder; +} + +namespace gason { +struct JsonNode; +} +namespace reindexer::sharding { + +struct SaveConfigCommand { + SaveConfigCommand() = default; + SaveConfigCommand(std::string_view config, int64_t sourceId) noexcept : config(config), sourceId(sourceId) {} + + std::string_view config; + int64_t sourceId; + + void GetJSON(JsonBuilder& json) const; + void FromJSON(const gason::JsonNode& payload); +}; + +struct ApplyConfigCommand { + ApplyConfigCommand() = default; + ApplyConfigCommand(int64_t sourceId) noexcept : sourceId(sourceId) {} + + int64_t sourceId; + + void GetJSON(JsonBuilder&) const; + void FromJSON(const gason::JsonNode&); +}; + +struct ResetConfigCommand { + ResetConfigCommand() = default; + ResetConfigCommand(int64_t sourceId) noexcept : sourceId(sourceId) {} + + int64_t sourceId; + + void GetJSON(JsonBuilder&) const; + void FromJSON(const gason::JsonNode&); +}; + +struct ShardingControlRequestData { + enum class Type { + SaveCandidate = 0, + ResetOldSharding = 1, + ResetCandidate = 2, + RollbackCandidate = 3, + ApplyNew = 4, + ApplyLeaderConfig = 5 + }; + +private: + using CommandDataType = std::variant; + using Enum2Type = meta::Map< + meta::Values2Types, + std::tuple>; + + template + friend ShardingControlRequestData MakeRequestData(Args&&... args) noexcept; + + // this constructor required only for support MSVC-compiler + template + ShardingControlRequestData(Type type, T&& data) : type(type), data(std::move(data)) {} + +public: + ShardingControlRequestData() = default; + + void GetJSON(WrSerializer& ser) const; + [[nodiscard]] Error FromJSON(span json) noexcept; + + Type type; + CommandDataType data; +}; + +template +ShardingControlRequestData MakeRequestData(Args&&... args) noexcept { + using DataType = ShardingControlRequestData::Enum2Type::GetType; + static_assert(std::is_nothrow_constructible_v); + return {type, DataType(std::forward(args)...)}; +} + +} // namespace reindexer::sharding diff --git a/cpp_src/cluster/updaterecord.cc b/cpp_src/cluster/updaterecord.cc index ea932e341..7ba8f1dda 100644 --- a/cpp_src/cluster/updaterecord.cc +++ b/cpp_src/cluster/updaterecord.cc @@ -45,6 +45,11 @@ UpdateRecord::UpdateRecord(UpdateRecord::Type _type, std::string _nsName, int _e case Type::NodeNetworkCheck: case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: assert(false); } } @@ -86,6 +91,11 @@ UpdateRecord::UpdateRecord(Type _type, std::string _nsName, lsn_t _lsn, lsn_t _n case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: case Type::EmptyUpdate: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: assert(false); } } @@ -132,6 +142,11 @@ UpdateRecord::UpdateRecord(Type _type, std::string _nsName, lsn_t _lsn, lsn_t _n case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: case Type::EmptyUpdate: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: assert(false); } } @@ -175,6 +190,11 @@ UpdateRecord::UpdateRecord(Type _type, std::string _nsName, lsn_t _lsn, lsn_t _n case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: case Type::EmptyUpdate: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: assert(false); } } @@ -225,6 +245,11 @@ UpdateRecord::UpdateRecord(UpdateRecord::Type _type, std::string _nsName, lsn_t case Type::ResyncOnUpdatesDrop: case Type::NodeNetworkCheck: case Type::EmptyUpdate: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: assert(false); } } @@ -270,6 +295,11 @@ UpdateRecord::UpdateRecord(Type _type, std::string _nsName, lsn_t _lsn, lsn_t _n case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: case Type::EmptyUpdate: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: assert(false); } } @@ -312,6 +342,11 @@ UpdateRecord::UpdateRecord(Type _type, std::string _nsName, lsn_t _nsVersion, in case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: case Type::EmptyUpdate: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: assert(false); } } @@ -355,6 +390,106 @@ UpdateRecord::UpdateRecord(UpdateRecord::Type _type, std::string _nsName, lsn_t case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: case Type::EmptyUpdate: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: + assert(false); + } +} + +UpdateRecord::UpdateRecord(Type _type, int _emmiterServerId, std::string _data, int64_t sourceId) + : type(_type), emmiterServerId(_emmiterServerId) { + switch (type) { + case Type::SaveShardingConfig: + data.emplace>(new SaveNewShardingCfgRecord{std::move(_data), sourceId}); + break; + case Type::PutMeta: + case Type::PutMetaTx: + case Type::IndexAdd: + case Type::IndexDrop: + case Type::IndexUpdate: + case Type::None: + case Type::ItemUpdate: + case Type::ItemUpsert: + case Type::ItemDelete: + case Type::ItemInsert: + case Type::ItemUpdateTx: + case Type::ItemUpsertTx: + case Type::ItemDeleteTx: + case Type::ItemInsertTx: + case Type::UpdateQuery: + case Type::DeleteQuery: + case Type::UpdateQueryTx: + case Type::DeleteQueryTx: + case Type::SetSchema: + case Type::Truncate: + case Type::BeginTx: + case Type::CommitTx: + case Type::AddNamespace: + case Type::DropNamespace: + case Type::CloseNamespace: + case Type::RenameNamespace: + case Type::ResyncNamespaceGeneric: + case Type::ResyncNamespaceLeaderInit: + case Type::ResyncOnUpdatesDrop: + case Type::NodeNetworkCheck: + case Type::SetTagsMatcher: + case Type::SetTagsMatcherTx: + case Type::EmptyUpdate: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: + assert(false); + } +} + +UpdateRecord::UpdateRecord(Type _type, int _emmiterServerId, int64_t sourceId) : type(_type), emmiterServerId(_emmiterServerId) { + switch (type) { + case Type::ApplyShardingConfig: + data.emplace>(new ApplyNewShardingCfgRecord{sourceId}); + break; + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: + data.emplace>(new ResetShardingCfgRecord{sourceId}); + break; + case Type::PutMeta: + case Type::PutMetaTx: + case Type::IndexAdd: + case Type::IndexDrop: + case Type::IndexUpdate: + case Type::None: + case Type::ItemUpdate: + case Type::ItemUpsert: + case Type::ItemDelete: + case Type::ItemInsert: + case Type::ItemUpdateTx: + case Type::ItemUpsertTx: + case Type::ItemDeleteTx: + case Type::ItemInsertTx: + case Type::UpdateQuery: + case Type::DeleteQuery: + case Type::UpdateQueryTx: + case Type::DeleteQueryTx: + case Type::SetSchema: + case Type::Truncate: + case Type::BeginTx: + case Type::CommitTx: + case Type::AddNamespace: + case Type::DropNamespace: + case Type::CloseNamespace: + case Type::RenameNamespace: + case Type::ResyncNamespaceGeneric: + case Type::ResyncNamespaceLeaderInit: + case Type::ResyncOnUpdatesDrop: + case Type::NodeNetworkCheck: + case Type::SetTagsMatcher: + case Type::SetTagsMatcherTx: + case Type::EmptyUpdate: + case Type::SaveShardingConfig: assert(false); } } @@ -403,6 +538,14 @@ size_t UpdateRecord::DataSize() const noexcept { case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: return std::get>(data)->Size(); + case Type::SaveShardingConfig: + return std::get>(data)->Size(); + case Type::ApplyShardingConfig: + return std::get>(data)->Size(); + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: + return std::get>(data)->Size(); case Type::None: default: std::abort(); diff --git a/cpp_src/cluster/updaterecord.h b/cpp_src/cluster/updaterecord.h index 4c52e343c..b01d8d63d 100644 --- a/cpp_src/cluster/updaterecord.h +++ b/cpp_src/cluster/updaterecord.h @@ -70,6 +70,25 @@ struct NodeNetworkCheckRecord { bool online; }; +struct SaveNewShardingCfgRecord { + size_t Size() const noexcept { return sizeof(SaveNewShardingCfgRecord) + config.size(); } + + std::string config; + int64_t sourceId; +}; + +struct ApplyNewShardingCfgRecord { + size_t Size() const noexcept { return sizeof(ApplyNewShardingCfgRecord); } + + int64_t sourceId; +}; + +struct ResetShardingCfgRecord { + size_t Size() const noexcept { return sizeof(ResetShardingCfgRecord); } + + int64_t sourceId; +}; + struct UpdateRecord { enum class Type { None = 0, @@ -104,7 +123,12 @@ struct UpdateRecord { EmptyUpdate = 29, NodeNetworkCheck = 30, SetTagsMatcher = 31, - SetTagsMatcherTx = 32 + SetTagsMatcherTx = 32, + SaveShardingConfig = 33, + ApplyShardingConfig = 34, + ResetOldShardingConfig = 35, + ResetCandidateConfig = 36, + RollbackCandidateConfig = 37, }; UpdateRecord() = default; @@ -117,11 +141,15 @@ struct UpdateRecord { UpdateRecord(Type _type, std::string _nsName, lsn_t _lsn, lsn_t _nsVersion, int _emmiterServerId, IndexDef _idef); UpdateRecord(Type _type, std::string _nsName, lsn_t _nsVersion, int _emmiterServerId, NamespaceDef _def, int64_t _stateToken); UpdateRecord(Type _type, std::string _nsName, lsn_t _lsn, lsn_t _nsVersion, int _emmiterServerId, std::string _k, std::string _v); + UpdateRecord(Type _type, int _emmiterServerId, std::string _data, int64_t sourceId); + UpdateRecord(Type _type, int _emmiterServerId, int64_t sourceId); const std::string& GetNsName() const noexcept { return nsName; } bool IsDbRecord() const noexcept { return type == Type::AddNamespace || type == Type::DropNamespace || type == Type::CloseNamespace || type == Type::RenameNamespace || - type == Type::ResyncNamespaceGeneric || type == Type::ResyncNamespaceLeaderInit; + type == Type::ResyncNamespaceGeneric || type == Type::ResyncNamespaceLeaderInit || type == Type::SaveShardingConfig || + type == Type::ApplyShardingConfig || type == Type::ResetOldShardingConfig || type == Type::RollbackCandidateConfig || + type == Type::ResetCandidateConfig; } bool IsRequiringTmUpdate() const noexcept { return type == Type::IndexAdd || type == Type::SetSchema || type == Type::IndexDrop || type == Type::IndexUpdate || IsDbRecord(); @@ -169,6 +197,11 @@ struct UpdateRecord { case Type::NodeNetworkCheck: case Type::SetTagsMatcher: case Type::SetTagsMatcherTx: + case Type::SaveShardingConfig: + case Type::ApplyShardingConfig: + case Type::ResetOldShardingConfig: + case Type::ResetCandidateConfig: + case Type::RollbackCandidateConfig: default: return false; } @@ -184,7 +217,9 @@ struct UpdateRecord { std::variant, std::unique_ptr, std::unique_ptr, std::unique_ptr, std::unique_ptr, std::unique_ptr, std::unique_ptr, - std::unique_ptr, std::unique_ptr> + std::unique_ptr, std::unique_ptr, + std::unique_ptr, std::unique_ptr, + std::unique_ptr> data; int emmiterServerId = -1; }; diff --git a/cpp_src/cmd/reindexer_server/contrib/Dockerfile.deb b/cpp_src/cmd/reindexer_server/contrib/Dockerfile.deb index fd90203e0..d64e65dc2 100644 --- a/cpp_src/cmd/reindexer_server/contrib/Dockerfile.deb +++ b/cpp_src/cmd/reindexer_server/contrib/Dockerfile.deb @@ -19,7 +19,7 @@ RUN cd /src && \ FROM debian:stable-slim COPY --from=build /usr/local /usr/local COPY --from=build /entrypoint.sh /entrypoint.sh -RUN apt update -y && apt install -y libleveldb1d libunwind8 libjemalloc2 libgrpc++1 && rm -rf /var/lib/apt +RUN apt update -y && apt install -y libleveldb1d libunwind8 libjemalloc2 libgrpc++1.51 && rm -rf /var/lib/apt ENV RX_DATABASE /db ENV RX_CORELOG stdout diff --git a/cpp_src/cmd/reindexer_server/main.cc b/cpp_src/cmd/reindexer_server/main.cc index 6d2535cf3..11cf822d8 100644 --- a/cpp_src/cmd/reindexer_server/main.cc +++ b/cpp_src/cmd/reindexer_server/main.cc @@ -2,9 +2,18 @@ #include "debug/backtrace.h" #include "server/server.h" #include "spdlog/spdlog.h" +#include "tools/cpucheck.h" int main(int argc, char* argv[]) { reindexer::debug::backtrace_init(); + + try { + reindexer::CheckRequiredSSESupport(); + } catch (Error& err) { + std::cerr << err.what(); + return EXIT_FAILURE; + } + reindexer_server::Server svc(reindexer_server::ServerMode::Standalone); auto err = svc.InitFromCLI(argc, argv); if (!err.ok()) { diff --git a/cpp_src/cmd/reindexer_tool/commandsexecutor.cc b/cpp_src/cmd/reindexer_tool/commandsexecutor.cc index bdd8392c2..e6fa03382 100644 --- a/cpp_src/cmd/reindexer_tool/commandsexecutor.cc +++ b/cpp_src/cmd/reindexer_tool/commandsexecutor.cc @@ -9,6 +9,7 @@ #include "tableviewscroller.h" #include "tools/fsops.h" #include "tools/jsontools.h" +#include "tools/stringstools.h" #include "wal/walrecord.h" namespace reindexer_tool { @@ -511,7 +512,7 @@ Error CommandsExecutor::processImpl(const std::string& command) noe auto token = parser.NextToken(); if (!token.length()) return Error(); - if (fromFile_ && reindexer::checkIfStartsWith(kDumpModePrefix, command, true)) { + if (fromFile_ && reindexer::checkIfStartsWith(kDumpModePrefix, command)) { DumpOptions opts; auto err = opts.FromJSON(command.substr(kDumpModePrefix.size())); if (!err.ok()) return Error(errParams, "Unable to parse dump mode from cmd: %s", err.what()); @@ -652,7 +653,7 @@ Error CommandsExecutor::commandSelect(const std::string& command) { reindexer::h_vector maxW; maxW.reserve(agg.fields.size()); for (const auto& field : agg.fields) { - maxW.push_back(field.length()); + maxW.emplace_back(field.length()); } for (auto& row : agg.facets) { assertrx(row.values.size() == agg.fields.size()); @@ -828,7 +829,7 @@ Error CommandsExecutor::commandDump(const std::string& command) { auto ns = parser.NextToken(); auto nsDef = std::find_if(allNsDefs.begin(), allNsDefs.end(), [&ns](const NamespaceDef& nsDef) { return ns == nsDef.name; }); if (nsDef != allNsDefs.end()) { - doNsDefs.push_back(std::move(*nsDef)); + doNsDefs.emplace_back(std::move(*nsDef)); allNsDefs.erase(nsDef); } else { std::cerr << "Namespace '" << ns << "' - skipped. (not found in storage)" << std::endl; @@ -852,7 +853,7 @@ Error CommandsExecutor::commandDump(const std::string& command) { for (auto& nsDef : doNsDefs) { // skip system namespaces, except #config - if (nsDef.name.length() > 0 && nsDef.name[0] == '#' && nsDef.name != "#config") continue; + if (reindexer::isSystemNamespaceNameFast(nsDef.name) && nsDef.name != "#config") continue; wrser << "-- Dumping namespace '" << nsDef.name << "' ..." << '\n'; @@ -869,7 +870,7 @@ Error CommandsExecutor::commandDump(const std::string& command) { std::string mdata; for (auto& mkey : meta) { mdata.clear(); - const bool isSerial = reindexer::checkIfStartsWith(kSerialPrefix, mkey, true); + const bool isSerial = reindexer::checkIfStartsWith(kSerialPrefix, mkey); if (isSerial) { err = getMergedSerialMeta(parametrizedDb, nsDef.name, mkey, mdata); } else { diff --git a/cpp_src/cmd/reindexer_tool/reindexer_tool.cc b/cpp_src/cmd/reindexer_tool/reindexer_tool.cc index 4943c22ca..f347105da 100644 --- a/cpp_src/cmd/reindexer_tool/reindexer_tool.cc +++ b/cpp_src/cmd/reindexer_tool/reindexer_tool.cc @@ -7,6 +7,7 @@ #include "debug/backtrace.h" #include "reindexer_version.h" #include "repair_tool.h" +#include "tools/cpucheck.h" #include "tools/logger.h" #include "tools/stringstools.h" @@ -29,11 +30,13 @@ static void InstallLogLevel(const std::vector& args) { llevel = 3; } - reindexer::logInstallWriter([](int level, char* buf) { - if (level <= llevel) { - std::cout << buf << std::endl; - } - }); + reindexer::logInstallWriter( + [](int level, char* buf) { + if (level <= llevel) { + std::cout << buf << std::endl; + } + }, + reindexer::LoggerPolicy::WithoutLocks); } } // namespace reindexer_tool @@ -42,6 +45,13 @@ int main(int argc, char* argv[]) { using namespace reindexer_tool; reindexer::debug::backtrace_init(); + try { + reindexer::CheckRequiredSSESupport(); + } catch (Error& err) { + std::cerr << err.what(); + return EXIT_FAILURE; + } + args::ArgumentParser parser("Reindexer client tool"); args::HelpFlag help(parser, "help", "show this message", {'h', "help"}); diff --git a/cpp_src/core/activity_context.cc b/cpp_src/core/activity_context.cc index b1c495afd..37528b719 100644 --- a/cpp_src/core/activity_context.cc +++ b/cpp_src/core/activity_context.cc @@ -7,8 +7,10 @@ namespace reindexer { using namespace std::string_view_literals; void ActivityContainer::Register(const RdxActivityContext* context) { - std::unique_lock lck(mtx_); + std::unique_lock lck(mtx_); const auto res = cont_.insert(context); + lck.unlock(); + assertrx(res.second); (void)res; #ifdef RX_LOGACTIVITY @@ -17,8 +19,10 @@ void ActivityContainer::Register(const RdxActivityContext* context) { } void ActivityContainer::Unregister(const RdxActivityContext* context) { - std::unique_lock lck(mtx_); + std::unique_lock lck(mtx_); const auto count = cont_.erase(context); + lck.unlock(); + assertrx(count == 1u); (void)count; #ifdef RX_LOGACTIVITY @@ -28,10 +32,13 @@ void ActivityContainer::Unregister(const RdxActivityContext* context) { void ActivityContainer::Reregister(const RdxActivityContext* oldCtx, const RdxActivityContext* newCtx) { if (oldCtx == newCtx) return; - std::unique_lock lck(mtx_); + + std::unique_lock lck(mtx_); const auto eraseCount = cont_.erase(oldCtx); - assertrx(eraseCount == 1u); const auto insertRes = cont_.insert(newCtx); + lck.unlock(); + + assertrx(eraseCount == 1u); assertrx(insertRes.second); (void)eraseCount; (void)insertRes; @@ -56,25 +63,27 @@ void ActivityContainer::AddOperation(const RdxActivityContext* ctx, Activity::St std::vector ActivityContainer::List([[maybe_unused]] int serverId) { std::vector ret; - std::unique_lock lck(mtx_); + { + std::lock_guard lck(mtx_); #ifdef RX_LOGACTIVITY - log_.Dump(serverId); + log_.Dump(serverId); #endif - ret.reserve(cont_.size()); - for (const RdxActivityContext* ctx : cont_) ret.push_back(*ctx); + ret.reserve(cont_.size()); + for (const RdxActivityContext* ctx : cont_) ret.emplace_back(*ctx); + } return ret; } std::optional ActivityContainer::QueryForIpConnection(int id) { - std::unique_lock lck(mtx_); + std::lock_guard lck(mtx_); for (const RdxActivityContext* ctx : cont_) { if (ctx->CheckConnectionId(id)) { std::string ret; deepCopy(ret, ctx->Query()); - return ret; + return std::optional{std::move(ret)}; } } @@ -121,16 +130,11 @@ RdxActivityContext::operator Activity() const { return ret; } -unsigned RdxActivityContext::serializeState(MutexMark mark) { return Activity::WaitLock | (static_cast(mark) << kStateShift); } -unsigned RdxActivityContext::serializeState(Activity::State state) { return static_cast(state); } - std::pair RdxActivityContext::deserializeState(unsigned state) { const Activity::State decodedState = static_cast(state & kStateMask); - if (decodedState == Activity::WaitLock) { - return {decodedState, DescribeMutexMark(static_cast(state >> kStateShift))}; - } else { - return {decodedState, ""}; - } + return decodedState == Activity::WaitLock + ? std::make_pair(decodedState, DescribeMutexMark(static_cast(state >> kStateShift))) + : std::make_pair(decodedState, ""); } unsigned RdxActivityContext::nextId() noexcept { diff --git a/cpp_src/core/activity_context.h b/cpp_src/core/activity_context.h index 5170fcc44..e93e1d29d 100644 --- a/cpp_src/core/activity_context.h +++ b/cpp_src/core/activity_context.h @@ -50,7 +50,7 @@ class RdxActivityContext { class Ward { public: - Ward(RdxActivityContext* cont, Activity::State state) : context_(cont) { + Ward(RdxActivityContext* cont, Activity::State state) noexcept : context_(cont) { if (context_) { prevState_ = context_->state_.exchange(serializeState(state), std::memory_order_relaxed); #ifndef NDEBUG @@ -61,7 +61,7 @@ class RdxActivityContext { #endif } } - Ward(RdxActivityContext* cont, MutexMark mutexMark) : context_(cont) { + Ward(RdxActivityContext* cont, MutexMark mutexMark) noexcept : context_(cont) { if (context_) { prevState_ = context_->state_.exchange(serializeState(mutexMark), std::memory_order_relaxed); #ifndef NDEBUG @@ -72,7 +72,7 @@ class RdxActivityContext { #endif } } - Ward(Ward&& other) : context_(other.context_), prevState_(other.prevState_) { other.context_ = nullptr; } + Ward(Ward&& other) noexcept : context_(other.context_), prevState_(other.prevState_) { other.context_ = nullptr; } ~Ward() { if (context_) { #ifdef RX_LOGACTIVITY @@ -115,18 +115,18 @@ class RdxActivityContext { /// returning value of these functions should be assined to a local variable which will be destroyed after the waiting work complete /// lifetime of the local variable should not exceed of the activityContext's - Ward BeforeLock(MutexMark mutexMark) { return Ward(this, mutexMark); } - Ward BeforeState(Activity::State st) { return Ward(this, st); } - Ward BeforeIndexWork() { return Ward(this, Activity::IndexesLookup); } - Ward BeforeSelectLoop() { return Ward(this, Activity::SelectLoop); } - Ward BeforeClusterProxy() { return Ward(this, Activity::ProxiedViaClusterProxy); } - Ward BeforeShardingProxy() { return Ward(this, Activity::ProxiedViaShardingProxy); } + Ward BeforeLock(MutexMark mutexMark) noexcept { return Ward(this, mutexMark); } + Ward BeforeState(Activity::State st) noexcept { return Ward(this, st); } + Ward BeforeIndexWork() noexcept { return Ward(this, Activity::IndexesLookup); } + Ward BeforeSelectLoop() noexcept { return Ward(this, Activity::SelectLoop); } + Ward BeforeClusterProxy() noexcept { return Ward(this, Activity::ProxiedViaClusterProxy); } + Ward BeforeShardingProxy() noexcept { return Ward(this, Activity::ProxiedViaShardingProxy); } bool CheckConnectionId(int connectionId) const noexcept { return data_.connectionId == connectionId; } private: - static unsigned serializeState(MutexMark); - static unsigned serializeState(Activity::State); + static unsigned serializeState(MutexMark mark) noexcept { return Activity::WaitLock | (static_cast(mark) << kStateShift); } + static unsigned serializeState(Activity::State state) noexcept { return static_cast(state); } static std::pair deserializeState(unsigned state); static unsigned nextId() noexcept; diff --git a/cpp_src/core/cbinding/reindexer_c.cc b/cpp_src/core/cbinding/reindexer_c.cc index 0d0a6488c..837005e0c 100644 --- a/cpp_src/core/cbinding/reindexer_c.cc +++ b/cpp_src/core/cbinding/reindexer_c.cc @@ -736,9 +736,9 @@ reindexer_error reindexer_commit(uintptr_t rx, reindexer_string nsName) { return error2c(!db ? err_not_init : db->Commit(str2cv(nsName))); } -void reindexer_enable_logger(void (*logWriter)(int, char*)) { logInstallWriter(logWriter, false); } +void reindexer_enable_logger(void (*logWriter)(int, char*)) { logInstallWriter(logWriter, LoggerPolicy::WithLocks); } -void reindexer_disable_logger() { logInstallWriter(nullptr, false); } +void reindexer_disable_logger() { logInstallWriter(nullptr, LoggerPolicy::WithLocks); } reindexer_error reindexer_free_buffer(reindexer_resbuffer in) { constexpr static put_results_to_pool putResultsToPool; diff --git a/cpp_src/core/cjson/baseencoder.cc b/cpp_src/core/cjson/baseencoder.cc index f3c86faee..60918e25b 100644 --- a/cpp_src/core/cjson/baseencoder.cc +++ b/cpp_src/core/cjson/baseencoder.cc @@ -4,6 +4,7 @@ #include "cjsonbuilder.h" #include "cjsontools.h" #include "core/keyvalue/p_string.h" +#include "csvbuilder.h" #include "jsonbuilder.h" #include "msgpackbuilder.h" #include "protobufbuilder.h" @@ -13,10 +14,7 @@ namespace reindexer { template -BaseEncoder::BaseEncoder(const TagsMatcher* tagsMatcher, const FieldsSet* filter) : tagsMatcher_(tagsMatcher), filter_(filter) { - static_assert(std::numeric_limits::digits >= maxIndexes, - "objectScalarIndexes_ needs to provide 'maxIndexes' bits or more"); -} +BaseEncoder::BaseEncoder(const TagsMatcher* tagsMatcher, const FieldsSet* filter) : tagsMatcher_(tagsMatcher), filter_(filter) {} template void BaseEncoder::Encode(std::string_view tuple, Builder& builder, const h_vector*, 2>& dss) { @@ -49,7 +47,8 @@ void BaseEncoder::Encode(ConstPayload& pl, Builder& builder, const h_ve if (rdser.Eof()) { return; } - objectScalarIndexes_ = 0; + + objectScalarIndexes_.reset(); std::fill_n(std::begin(fieldsoutcnt_), pl.NumFields(), 0); builder.SetTagsMatcher(tagsMatcher_); if constexpr (kWithTagsPathTracking) { @@ -80,7 +79,7 @@ const TagsLengths& BaseEncoder::GetTagsMeasures(ConstPayload& pl, IEnco [[maybe_unused]] const ctag beginTag = rdser.GetCTag(); assertrx(beginTag.Type() == TAG_OBJECT); - tagsLengths_.reserve(maxIndexes); + tagsLengths_.reserve(kMaxIndexes); tagsLengths_.push_back(StartObject); while (collectTagsSizes(pl, rdser)) { @@ -128,7 +127,16 @@ void BaseEncoder::encodeJoinedItems(Builder& builder, IEncoderDatasourc template bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& builder, bool visible) { const ctag tag = rdser.GetCTag(); + if (tag == kCTagEnd) { + if constexpr (kWithFieldExtractor) { + if (visible && filter_ && indexedTagsPath_.size() && indexedTagsPath_.back().IsWithIndex()) { + const auto field = builder.TargetField(); + if (field >= 0 && !builder.IsHavingOffset() && filter_->match(indexedTagsPath_)) { + builder.OnScopeEnd(fieldsoutcnt_[field]); + } + } + } return false; } @@ -145,14 +153,10 @@ bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& // get field from indexed field if (tagField >= 0) { if (!pl) throw Error(errParams, "Trying to encode index field %d without payload", tagField); - if ((objectScalarIndexes_ & (1ULL << tagField)) && (tagType != TAG_ARRAY)) { - std::string fieldName; - if (tagName && tagsMatcher_) { - fieldName = tagsMatcher_->tag2name(tagName); - } - throw Error(errParams, "Non-array field '%s' [%d] from '%s' can only be encoded once.", fieldName, tagField, pl->Type().Name()); + const auto& f = pl->Type().Field(tagField); + if (!f.IsArray() && objectScalarIndexes_.test(tagField)) { + throw Error(errParams, "Non-array field '%s' [%d] from '%s' can only be encoded once.", f.Name(), tagField, pl->Type().Name()); } - objectScalarIndexes_ |= (1ULL << tagField); assertrx(tagField < pl->NumFields()); int* cnt = &fieldsoutcnt_[tagField]; switch (tagType) { @@ -175,6 +179,7 @@ bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& break; } case TAG_NULL: + objectScalarIndexes_.set(tagField); if (visible) builder.Null(tagName); break; case TAG_VARINT: @@ -184,7 +189,8 @@ bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& case TAG_END: case TAG_OBJECT: case TAG_UUID: - if (visible) builder.Put(tagName, pl->Get(tagField, (*cnt))); + objectScalarIndexes_.set(tagField); + if (visible) builder.Put(tagName, pl->Get(tagField, (*cnt)), *cnt); ++(*cnt); break; } @@ -216,7 +222,6 @@ bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& break; } case TAG_OBJECT: { - objectScalarIndexes_ = 0; if (visible) { auto objNode = builder.Object(tagName); while (encode(pl, rdser, objNode, true)) @@ -237,7 +242,7 @@ bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& case TAG_UUID: if (visible) { Variant value = rdser.GetRawVariant(KeyValueType{tagType}); - builder.Put(tagName, std::move(value)); + builder.Put(tagName, std::move(value), 0); } else { rdser.SkipRawVariant(KeyValueType{tagType}); } @@ -345,5 +350,6 @@ template class BaseEncoder; template class BaseEncoder; template class BaseEncoder; template class BaseEncoder; +template class BaseEncoder; } // namespace reindexer diff --git a/cpp_src/core/cjson/baseencoder.h b/cpp_src/core/cjson/baseencoder.h index 98a4a65b6..0daa01ec9 100644 --- a/cpp_src/core/cjson/baseencoder.h +++ b/cpp_src/core/cjson/baseencoder.h @@ -14,6 +14,7 @@ class TagsMatcher; class JsonBuilder; class MsgPackBuilder; class ProtobufBuilder; +class CsvBuilder; class IEncoderDatasourceWithJoins { public: @@ -49,6 +50,7 @@ class BaseEncoder { protected: using IndexedTagsPathInternalT = IndexedTagsPathImpl<16>; constexpr static bool kWithTagsPathTracking = std::is_same_v; + constexpr static bool kWithFieldExtractor = std::is_same_v; struct DummyTagsPathScope { DummyTagsPathScope(TagsPath & /*tagsPath*/, int16_t /*tagName*/) noexcept {} @@ -63,18 +65,19 @@ class BaseEncoder { std::string_view getPlTuple(ConstPayload &pl); const TagsMatcher *tagsMatcher_; - int fieldsoutcnt_[maxIndexes]; + int fieldsoutcnt_[kMaxIndexes]; const FieldsSet *filter_; WrSerializer tmpPlTuple_; TagsPath curTagsPath_; IndexedTagsPathInternalT indexedTagsPath_; TagsLengths tagsLengths_; - uint64_t objectScalarIndexes_ = 0; + ScalarIndexesSetT objectScalarIndexes_; }; using JsonEncoder = BaseEncoder; using CJsonEncoder = BaseEncoder; using MsgPackEncoder = BaseEncoder; using ProtobufEncoder = BaseEncoder; +using CsvEncoder = BaseEncoder; } // namespace reindexer diff --git a/cpp_src/core/cjson/cjsonbuilder.cc b/cpp_src/core/cjson/cjsonbuilder.cc index f383e5052..bd70aaee2 100644 --- a/cpp_src/core/cjson/cjsonbuilder.cc +++ b/cpp_src/core/cjson/cjsonbuilder.cc @@ -39,7 +39,7 @@ void CJsonBuilder::Array(int tagName, span data, int /*offset*/) { } } -CJsonBuilder &CJsonBuilder::Put(int tagName, bool arg) { +CJsonBuilder &CJsonBuilder::Put(int tagName, bool arg, int /*offset*/) { if (type_ == ObjType::TypeArray) { itemType_ = TAG_BOOL; } else { @@ -50,7 +50,7 @@ CJsonBuilder &CJsonBuilder::Put(int tagName, bool arg) { return *this; } -CJsonBuilder &CJsonBuilder::Put(int tagName, int64_t arg) { +CJsonBuilder &CJsonBuilder::Put(int tagName, int64_t arg, int /*offset*/) { if (type_ == ObjType::TypeArray) { itemType_ = TAG_VARINT; } else { @@ -61,7 +61,7 @@ CJsonBuilder &CJsonBuilder::Put(int tagName, int64_t arg) { return *this; } -CJsonBuilder &CJsonBuilder::Put(int tagName, int arg) { +CJsonBuilder &CJsonBuilder::Put(int tagName, int arg, int /*offset*/) { if (type_ == ObjType::TypeArray) { itemType_ = TAG_VARINT; } else { @@ -72,7 +72,7 @@ CJsonBuilder &CJsonBuilder::Put(int tagName, int arg) { return *this; } -CJsonBuilder &CJsonBuilder::Put(int tagName, double arg) { +CJsonBuilder &CJsonBuilder::Put(int tagName, double arg, int /*offset*/) { if (type_ == ObjType::TypeArray) { itemType_ = TAG_DOUBLE; } else { @@ -83,7 +83,7 @@ CJsonBuilder &CJsonBuilder::Put(int tagName, double arg) { return *this; } -CJsonBuilder &CJsonBuilder::Put(int tagName, std::string_view arg) { +CJsonBuilder &CJsonBuilder::Put(int tagName, std::string_view arg, int /*offset*/) { if (type_ == ObjType::TypeArray) { itemType_ = TAG_STRING; } else { @@ -94,7 +94,7 @@ CJsonBuilder &CJsonBuilder::Put(int tagName, std::string_view arg) { return *this; } -CJsonBuilder &CJsonBuilder::Put(int tagName, Uuid arg) { +CJsonBuilder &CJsonBuilder::Put(int tagName, Uuid arg, int /*offset*/) { ser_->PutCTag(ctag{TAG_UUID, tagName}); ser_->PutUuid(arg); return *this; @@ -140,18 +140,20 @@ CJsonBuilder &CJsonBuilder::ArrayRef(int tagName, int field, int count) { return *this; } -CJsonBuilder &CJsonBuilder::Put(int tagName, const Variant &kv) { - kv.Type().EvaluateOneOf( - [&](KeyValueType::Int) { Put(tagName, int(kv)); }, [&](KeyValueType::Int64) { Put(tagName, int64_t(kv)); }, - [&](KeyValueType::Double) { Put(tagName, double(kv)); }, [&](KeyValueType::String) { Put(tagName, std::string_view(kv)); }, - [&](KeyValueType::Null) { Null(tagName); }, [&](KeyValueType::Bool) { Put(tagName, bool(kv)); }, - [&](KeyValueType::Tuple) { - auto arrNode = Array(tagName); - for (auto &val : kv.getCompositeValues()) { - arrNode.Put(nullptr, val); - } - }, - [&](KeyValueType::Uuid) { Put(tagName, Uuid{kv}); }, [](OneOf) noexcept {}); +CJsonBuilder &CJsonBuilder::Put(int tagName, const Variant &kv, int offset) { + kv.Type().EvaluateOneOf([&](KeyValueType::Int) { Put(tagName, int(kv), offset); }, + [&](KeyValueType::Int64) { Put(tagName, int64_t(kv), offset); }, + [&](KeyValueType::Double) { Put(tagName, double(kv), offset); }, + [&](KeyValueType::String) { Put(tagName, std::string_view(kv), offset); }, + [&](KeyValueType::Null) { Null(tagName); }, [&](KeyValueType::Bool) { Put(tagName, bool(kv), offset); }, + [&](KeyValueType::Tuple) { + auto arrNode = Array(tagName); + for (auto &val : kv.getCompositeValues()) { + arrNode.Put(nullptr, val); + } + }, + [&](KeyValueType::Uuid) { Put(tagName, Uuid{kv}, offset); }, + [](OneOf) noexcept {}); return *this; } diff --git a/cpp_src/core/cjson/cjsonbuilder.h b/cpp_src/core/cjson/cjsonbuilder.h index 33be5a29b..a64494a34 100644 --- a/cpp_src/core/cjson/cjsonbuilder.h +++ b/cpp_src/core/cjson/cjsonbuilder.h @@ -23,7 +23,7 @@ class CJsonBuilder { CJsonBuilder &operator=(const CJsonBuilder &) = delete; CJsonBuilder &operator=(CJsonBuilder &&) = delete; - void SetTagsMatcher(const TagsMatcher *tm) { tm_ = tm; } + void SetTagsMatcher(const TagsMatcher *tm) noexcept { tm_ = tm; } /// Start new object CJsonBuilder Object(int tagName); @@ -67,25 +67,25 @@ class CJsonBuilder { } template - CJsonBuilder &Put(std::nullptr_t, T arg) { - return Put(0, arg); + CJsonBuilder &Put(std::nullptr_t, const T &arg, int offset = 0) { + return Put(0, arg, offset); } void Write(std::string_view data) { ser_->Write(data); } CJsonBuilder &Null(std::nullptr_t) { return Null(0); } - CJsonBuilder &Put(int tagName, bool arg); - CJsonBuilder &Put(int tagName, int arg); - CJsonBuilder &Put(int tagName, int64_t arg); - CJsonBuilder &Put(int tagName, double arg); - CJsonBuilder &Put(int tagName, std::string_view arg); - CJsonBuilder &Put(int tagName, Uuid arg); + CJsonBuilder &Put(int tagName, bool arg, int offset = 0); + CJsonBuilder &Put(int tagName, int arg, int offset = 0); + CJsonBuilder &Put(int tagName, int64_t arg, int offset = 0); + CJsonBuilder &Put(int tagName, double arg, int offset = 0); + CJsonBuilder &Put(int tagName, std::string_view arg, int offset = 0); + CJsonBuilder &Put(int tagName, Uuid arg, int offset = 0); CJsonBuilder &Ref(int tagName, const Variant &v, int field); CJsonBuilder &ArrayRef(int tagName, int field, int count); CJsonBuilder &Null(int tagName); - CJsonBuilder &Put(int tagName, const Variant &kv); - CJsonBuilder &Put(int tagName, const char *arg) { return Put(tagName, std::string_view(arg)); } + CJsonBuilder &Put(int tagName, const Variant &kv, int offset = 0); + CJsonBuilder &Put(int tagName, const char *arg, int offset = 0) { return Put(tagName, std::string_view(arg), offset); } CJsonBuilder &End() { switch (type_) { case ObjType::TypeArray: diff --git a/cpp_src/core/cjson/cjsondecoder.cc b/cpp_src/core/cjson/cjsondecoder.cc index a18ddcc54..e6a7c2d73 100644 --- a/cpp_src/core/cjson/cjsondecoder.cc +++ b/cpp_src/core/cjson/cjsondecoder.cc @@ -7,10 +7,6 @@ namespace reindexer { -CJsonDecoder::CJsonDecoder(TagsMatcher &tagsMatcher) : tagsMatcher_(tagsMatcher), filter_(nullptr) {} -CJsonDecoder::CJsonDecoder(TagsMatcher &tagsMatcher, const FieldsSet *filter, Recoder *recoder) - : tagsMatcher_(tagsMatcher), filter_(filter), recoder_{recoder} {} - bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrser, bool match) { const ctag tag = rdser.GetCTag(); if (tag == kCTagEnd) { @@ -23,7 +19,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs (void)tagsMatcher_.tag2name(tagName); tagsPath_.emplace_back(tagName); } - if (tag.Field() >= 0) { + if rx_unlikely (tag.Field() >= 0) { throw Error(errLogic, "Reference tag was found in transport CJSON for field %d[%s] in ns [%s]", tag.Field(), tagsMatcher_.tag2name(tagName), pl.Type().Name()); } @@ -56,6 +52,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs if (field >= 0) { if (match) { if (tagType == TAG_NULL) { + objectScalarIndexes_.set(field); wrser.PutCTag(ctag{TAG_NULL, tagName}); } else if (recoder) { recoder->Recode(rdser, pl, tagName, wrser); @@ -63,7 +60,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs const auto &fieldRef{pl.Type().Field(field)}; const KeyValueType fieldType{fieldRef.Type()}; if (tagType == TAG_ARRAY) { - if (!fieldRef.IsArray()) { + if rx_unlikely (!fieldRef.IsArray()) { throw Error(errLogic, "Error parsing cjson field '%s' - got array, expected scalar %s", fieldRef.Name(), fieldType.Name()); } @@ -77,11 +74,10 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs } wrser.PutCTag(ctag{TAG_ARRAY, tagName, field}); wrser.PutVarUint(count); - } else if (isInArray() && !fieldRef.IsArray()) { - throw Error(errLogic, "Error parsing cjson field '%s' - got value in the nested array, but expected scalar %s", - fieldRef.Name(), fieldType.Name()); } else { - pl.Set(field, {cjsonValueToVariant(tagType, rdser, fieldType)}, true); + validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, fieldRef, field, isInArray(), "cjson"); + objectScalarIndexes_.set(field); + pl.Set(field, cjsonValueToVariant(tagType, rdser, fieldType), true); fieldType.EvaluateOneOf( [&](OneOf) { wrser.PutCTag(ctag{TAG_VARINT, tagName, field}); @@ -93,6 +89,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs } } } else { + // objectScalarIndexes_.set(field); - do not change objectScalarIndexes_ value for the filtered out fields skipCjsonTag(tag, rdser); } } else { diff --git a/cpp_src/core/cjson/cjsondecoder.h b/cpp_src/core/cjson/cjsondecoder.h index 35ebfb0d9..a128fe487 100644 --- a/cpp_src/core/cjson/cjsondecoder.h +++ b/cpp_src/core/cjson/cjsondecoder.h @@ -20,10 +20,14 @@ class Recoder { class CJsonDecoder { public: - CJsonDecoder(TagsMatcher &tagsMatcher); - CJsonDecoder(TagsMatcher &tagsMatcher, const FieldsSet *filter, Recoder *); + CJsonDecoder(TagsMatcher &tagsMatcher) noexcept : tagsMatcher_(tagsMatcher), filter_(nullptr) {} + CJsonDecoder(TagsMatcher &tagsMatcher, const FieldsSet *filter, Recoder *recoder) noexcept + : tagsMatcher_(tagsMatcher), filter_(filter), recoder_(recoder) {} - void Decode(Payload &pl, Serializer &rdSer, WrSerializer &wrSer) { decodeCJson(pl, rdSer, wrSer, true); } + void Decode(Payload &pl, Serializer &rdSer, WrSerializer &wrSer) { + objectScalarIndexes_.reset(); + decodeCJson(pl, rdSer, wrSer, true); + } private: bool decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrser, bool match); @@ -34,6 +38,7 @@ class CJsonDecoder { TagsPath tagsPath_; Recoder *recoder_{nullptr}; int32_t arrayLevel_ = 0; + ScalarIndexesSetT objectScalarIndexes_; }; } // namespace reindexer diff --git a/cpp_src/core/cjson/cjsonmodifier.cc b/cpp_src/core/cjson/cjsonmodifier.cc index a85a5fc23..82c75f732 100644 --- a/cpp_src/core/cjson/cjsonmodifier.cc +++ b/cpp_src/core/cjson/cjsonmodifier.cc @@ -9,54 +9,66 @@ namespace reindexer { const std::string_view kWrongFieldsAmountMsg = "Number of fields for update should be > 0"; -struct CJsonModifier::Context { +class CJsonModifier::Context { +public: Context(const IndexedTagsPath &fieldPath, const VariantArray &v, WrSerializer &ser, std::string_view tuple, FieldModifyMode m, const Payload *pl = nullptr) : value(v), wrser(ser), rdser(tuple), mode(m), payload(pl) { + jsonPath.reserve(fieldPath.size()); for (const IndexedPathNode &node : fieldPath) { + isForAllItems_ = isForAllItems_ || node.IsForAllItems(); jsonPath.emplace_back(node.NameTag()); } - if (mode == FieldModeSet && fieldPath.back().IsArrayNode() && value.empty()) { - throw Error(errParams, "Array item should not be an empty value"); + if (fieldPath.back().IsArrayNode()) { + updateArrayElements = true; + if (mode == FieldModeSet && value.empty()) { + throw Error(errParams, "Array item should not be an empty value"); + } } + std::fill(std::begin(fieldsArrayOffsets), std::end(fieldsArrayOffsets), 0); } + bool IsForAllItems() const noexcept { return isForAllItems_; } + const VariantArray &value; WrSerializer &wrser; Serializer rdser; TagsPath jsonPath; IndexedTagsPath currObjPath; FieldModifyMode mode; - const Payload *payload = nullptr; bool fieldUpdated = false; - std::array fieldsArrayOffsets; -}; + bool updateArrayElements = false; + const Payload *payload = nullptr; + std::array fieldsArrayOffsets; -CJsonModifier::CJsonModifier(TagsMatcher &tagsMatcher, PayloadType pt) : pt_(std::move(pt)), tagsMatcher_(tagsMatcher) {} +private: + bool isForAllItems_ = false; +}; -void CJsonModifier::SetFieldValue(std::string_view tuple, IndexedTagsPath fieldPath, const VariantArray &val, WrSerializer &ser) { +void CJsonModifier::SetFieldValue(std::string_view tuple, IndexedTagsPath fieldPath, const VariantArray &val, WrSerializer &ser, + const Payload &pl) { if (fieldPath.empty()) { throw Error(errLogic, kWrongFieldsAmountMsg); } - tagsPath_.clear(); - Context ctx(fieldPath, val, ser, tuple, FieldModeSet, nullptr); + tagsPath_.clear(); + Context ctx(fieldPath, val, ser, tuple, FieldModeSet, &pl); fieldPath_ = std::move(fieldPath); updateFieldInTuple(ctx); - if (!ctx.fieldUpdated && !fieldPath_.back().IsForAllItems()) { + if (!ctx.fieldUpdated && !ctx.IsForAllItems()) { throw Error(errParams, "[SetFieldValue] Requested field or array's index was not found"); } } void CJsonModifier::SetObject(std::string_view tuple, IndexedTagsPath fieldPath, const VariantArray &val, WrSerializer &ser, - const Payload *pl) { + const Payload &pl) { if (fieldPath.empty()) { throw Error(errLogic, kWrongFieldsAmountMsg); } - tagsPath_.clear(); - Context ctx(fieldPath, val, ser, tuple, FieldModeSetJson, pl); + tagsPath_.clear(); + Context ctx(fieldPath, val, ser, tuple, FieldModeSetJson, &pl); fieldPath_ = std::move(fieldPath); buildCJSON(ctx); - if (!ctx.fieldUpdated && !fieldPath_.back().IsForAllItems()) { + if (!ctx.fieldUpdated && !ctx.IsForAllItems()) { throw Error(errParams, "[SetObject] Requested field or array's index was not found"); } } @@ -65,7 +77,7 @@ void CJsonModifier::RemoveField(std::string_view tuple, IndexedTagsPath fieldPat if (fieldPath.empty()) { throw Error(errLogic, kWrongFieldsAmountMsg); } - tagsPath_.clear(); + tagsPath_.clear(); Context ctx(fieldPath, {}, wrser, tuple, FieldModeDrop); fieldPath_ = std::move(fieldPath); dropFieldInTuple(ctx); @@ -104,7 +116,7 @@ void CJsonModifier::insertField(Context &ctx) { if (ctx.mode == FieldModeSetJson) { updateObject(ctx, tagName); } else { - int field = tagsMatcher_.tags2field(ctx.jsonPath.data(), fieldPath_.size()); + const int field = tagsMatcher_.tags2field(ctx.jsonPath.data(), fieldPath_.size()); const TagType tagType = determineUpdateTagType(ctx, field); if (field > 0) { putCJsonRef(tagType, tagName, field, ctx.value, ctx.wrser); @@ -122,47 +134,55 @@ void CJsonModifier::insertField(Context &ctx) { ctx.currObjPath.clear(); } -bool CJsonModifier::needToInsertField(Context &ctx) { +bool CJsonModifier::needToInsertField(const Context &ctx) { if (ctx.fieldUpdated) return false; if (fieldPath_.back().IsArrayNode()) return false; if (ctx.currObjPath.size() < fieldPath_.size()) { - bool correctPath = true; - for (size_t i = 0; i < ctx.currObjPath.size(); ++i) { + for (unsigned i = 0; i < ctx.currObjPath.size(); ++i) { if (fieldPath_[i] != ctx.currObjPath[i]) { - correctPath = false; - break; + return false; } } - if (correctPath) { - bool containsArrayIndex = false; - for (auto &node : fieldPath_) { - if (node.IsArrayNode()) { - containsArrayIndex = true; - break; - } + if (ctx.IsForAllItems()) { + throw Error(errParams, "Unable to insert new field with 'all items ([*])' syntax"); + } + for (unsigned i = ctx.currObjPath.size(); i < fieldPath_.size(); ++i) { + if (fieldPath_[i].IsArrayNode()) { + return false; } - return !containsArrayIndex || fieldPath_.size() == ctx.currObjPath.size(); } - return false; + return true; } return false; } TagType CJsonModifier::determineUpdateTagType(const Context &ctx, int field) { - if (field != -1) { + if (field >= 0) { const PayloadFieldType &fieldType = pt_.Field(field); - if (ctx.value.size() > 0 && !fieldType.Type().IsSame(ctx.value.front().Type())) { - throw Error(errParams, "Inserted field %s type [%s] doesn't match it's index type [%s]", fieldType.Name(), - TagTypeToStr(kvType2Tag(ctx.value.front().Type())), TagTypeToStr(kvType2Tag(fieldType.Type()))); + if (!fieldType.IsArray() || ctx.updateArrayElements || !ctx.value.IsNullValue()) { + for (auto &v : ctx.value) { + if (!fieldType.Type().IsSame(v.Type())) { + throw Error(errParams, "Inserted field %s type [%s] doesn't match it's index type [%s]", fieldType.Name(), + v.Type().Name(), fieldType.Type().Name()); + } + } + } + } else if (ctx.value.size() > 1) { + const auto type = kvType2Tag(ctx.value.front().Type()); + for (auto it = ctx.value.begin() + 1, end = ctx.value.end(); it != end; ++it) { + if (type != kvType2Tag(it->Type())) { + throw Error(errParams, "Unable to update field with heterogeneous array. Type[0] is [%s] and type[%d] is [%s]", + TagTypeToStr(type), it - ctx.value.begin(), TagTypeToStr(kvType2Tag(it->Type()))); + } } } - if (ctx.value.IsArrayValue()) { + + if (ctx.updateArrayElements || ctx.value.IsArrayValue()) { return TAG_ARRAY; - } else if (ctx.value.empty()) { + } else if (ctx.value.IsNullValue() || ctx.value.empty()) { return TAG_NULL; - } else { - return kvType2Tag(ctx.value.front().Type()); } + return kvType2Tag(ctx.value.front().Type()); } bool CJsonModifier::checkIfFoundTag(Context &ctx, bool isLastItem) { @@ -196,77 +216,109 @@ bool CJsonModifier::updateFieldInTuple(Context &ctx) { TagsPathScope pathScope(tagsPath_, tagName); bool tagMatched = checkIfFoundTag(ctx); - if (tagMatched && field < 0) { - tagType = determineUpdateTagType(ctx); - } - - ctx.wrser.PutCTag(ctag{tagType, tagName, field}); - if (field >= 0) { if (tagType == TAG_ARRAY) { - auto count = ctx.rdser.GetVarUint(); + const int count = ctx.rdser.GetVarUint(); + if (!tagMatched || !ctx.fieldUpdated) { + auto &lastTag = tagsPath_.back(); + for (int i = 0; i < count; ++i) { + lastTag.SetIndex(i); + const bool isLastItem = (i + 1 == count); + tagMatched = checkIfFoundTag(ctx, isLastItem); + if (tagMatched && ctx.fieldUpdated) { + break; + } + } + } + + if (tagMatched && ctx.fieldUpdated) { + const auto resultTagType = determineUpdateTagType(ctx, field); + ctx.wrser.PutCTag(ctag{resultTagType, tagName, field}); + + if (resultTagType == TAG_ARRAY) { + if (ctx.updateArrayElements) { + ctx.wrser.PutVarUint(count); + } else { + ctx.wrser.PutVarUint(ctx.value.size()); + } + } + } else { + ctx.wrser.PutCTag(ctag{tagType, tagName, field}); + ctx.wrser.PutVarUint(count); + } + } else { if (tagMatched) { - count = (ctx.value.empty() || ctx.value.IsNullValue()) ? 0 : ctx.value.size(); + if (ctx.updateArrayElements) { + throw Error(errParams, "Unable to update scalar value by index"); + } + const auto resultTagType = determineUpdateTagType(ctx, field); + ctx.wrser.PutCTag(ctag{resultTagType, tagName, field}); + + if (resultTagType == TAG_ARRAY) { + ctx.wrser.PutVarUint(ctx.value.size()); + } + } else { + ctx.wrser.PutCTag(ctag{tagType, tagName, field}); } - ctx.wrser.PutVarUint(count); } } else { - if (tagType == TAG_OBJECT) { + const auto resultTagType = tagMatched ? determineUpdateTagType(ctx, field) : tagType; + ctx.wrser.PutCTag(ctag{resultTagType, tagName, field}); + if (tagMatched) { + if (ctx.updateArrayElements && tagType != TAG_ARRAY) { + throw Error(errParams, "Unable to update scalar value by index"); + } + if (resultTagType != TAG_NULL) { + if (resultTagType == TAG_ARRAY) { + ctx.wrser.PutCArrayTag(carraytag{ctx.value.size(), kvType2Tag(ctx.value.ArrayType())}); + } else if (ctx.value.empty()) { + throw Error(errLogic, "Update value for field [%s] cannot be empty", tagsMatcher_.tag2name(tagName)); + } + for (size_t i = 0, size = ctx.value.size(); i < size; ++i) { + updateField(ctx, i); + } + } + skipCjsonTag(tag, ctx.rdser, &ctx.fieldsArrayOffsets); + } else if (tagType == TAG_OBJECT) { TagsPathScope pathScope(ctx.currObjPath, tagName); while (updateFieldInTuple(ctx)) { } } else if (tagType == TAG_ARRAY) { - if (tagMatched) { - skipCjsonTag(tag, ctx.rdser, &ctx.fieldsArrayOffsets); - ctx.wrser.PutCArrayTag(carraytag{ctx.value.size(), ctx.value.ArrayType().ToTagType()}); - for (size_t i = 0; i < ctx.value.size(); ++i) { - updateField(ctx, i); - } - } else { - const carraytag atag = ctx.rdser.GetCArrayTag(); - ctx.wrser.PutCArrayTag(atag); - const TagType atagType = atag.Type(); - const auto count = atag.Count(); - for (size_t i = 0; i < count; ++i) { - tagsPath_.back().SetIndex(i); - const bool isLastItem = (i == count - 1); - tagMatched = checkIfFoundTag(ctx, isLastItem); - if (tagMatched) { - copyCJsonValue(atagType, ctx.value.front(), ctx.wrser); - skipCjsonTag(ctag{atagType}, ctx.rdser, &ctx.fieldsArrayOffsets); - } else { - switch (atagType) { - case TAG_OBJECT: { - TagsPathScope pathScope(ctx.currObjPath, tagName, i); - updateFieldInTuple(ctx); - break; - } - case TAG_VARINT: - case TAG_DOUBLE: - case TAG_STRING: - case TAG_ARRAY: - case TAG_NULL: - case TAG_BOOL: - case TAG_END: - case TAG_UUID: - copyCJsonValue(atagType, ctx.rdser, ctx.wrser); - break; + const carraytag atag = ctx.rdser.GetCArrayTag(); + ctx.wrser.PutCArrayTag(atag); + const TagType atagType = atag.Type(); + const auto count = atag.Count(); + for (unsigned i = 0; i < count; i++) { + tagsPath_.back().SetIndex(i); + const bool isLastItem = (i + 1 == atag.Count()); + if (checkIfFoundTag(ctx, isLastItem)) { + if (ctx.value.IsArrayValue()) { + throw Error(errParams, "Unable to update non-indexed array's element with array-value"); + } + copyCJsonValue(atagType, ctx.value.front(), ctx.wrser); + skipCjsonTag(ctag{atagType}, ctx.rdser, &ctx.fieldsArrayOffsets); + } else { + switch (atagType) { + case TAG_OBJECT: { + TagsPathScope pathScope(ctx.currObjPath, tagName, i); + updateFieldInTuple(ctx); + break; } + case TAG_VARINT: + case TAG_DOUBLE: + case TAG_STRING: + case TAG_ARRAY: + case TAG_NULL: + case TAG_BOOL: + case TAG_END: + case TAG_UUID: + copyCJsonValue(atagType, ctx.rdser, ctx.wrser); + break; } } } } else { - if (tagMatched) { - if (tagType != TAG_NULL) { - if (ctx.value.empty()) { - throw Error(errLogic, "Update value for field [%s] cannot be empty", tagsMatcher_.tag2name(tagName)); - } - updateField(ctx, 0); - } - skipCjsonTag(tag, ctx.rdser, &ctx.fieldsArrayOffsets); - } else { - copyCJsonValue(tagType, ctx.rdser, ctx.wrser); - } + copyCJsonValue(tagType, ctx.rdser, ctx.wrser); } } diff --git a/cpp_src/core/cjson/cjsonmodifier.h b/cpp_src/core/cjson/cjsonmodifier.h index 56599768d..642e2908b 100644 --- a/cpp_src/core/cjson/cjsonmodifier.h +++ b/cpp_src/core/cjson/cjsonmodifier.h @@ -6,22 +6,22 @@ namespace reindexer { class CJsonModifier { public: - CJsonModifier(TagsMatcher &tagsMatcher, PayloadType pt); - void SetFieldValue(std::string_view tuple, IndexedTagsPath path, const VariantArray &v, WrSerializer &ser); - void SetObject(std::string_view tuple, IndexedTagsPath path, const VariantArray &v, WrSerializer &ser, const Payload *pl); + CJsonModifier(TagsMatcher &tagsMatcher, PayloadType pt) noexcept : pt_(std::move(pt)), tagsMatcher_(tagsMatcher) {} + void SetFieldValue(std::string_view tuple, IndexedTagsPath path, const VariantArray &v, WrSerializer &ser, const Payload &pl); + void SetObject(std::string_view tuple, IndexedTagsPath path, const VariantArray &v, WrSerializer &ser, const Payload &pl); void RemoveField(std::string_view tuple, IndexedTagsPath fieldPath, WrSerializer &wrser); private: - struct Context; + class Context; bool updateFieldInTuple(Context &ctx); bool dropFieldInTuple(Context &ctx); bool buildCJSON(Context &ctx); - bool needToInsertField(Context &ctx); + bool needToInsertField(const Context &ctx); void insertField(Context &ctx); void embedFieldValue(TagType, int field, Context &ctx, size_t idx); void updateObject(Context &ctx, int tagName); void updateField(Context &ctx, size_t idx); - TagType determineUpdateTagType(const Context &ctx, int field = IndexValueType::NotSet); + TagType determineUpdateTagType(const Context &ctx, int field); bool checkIfFoundTag(Context &ctx, bool isLastItem = false); PayloadType pt_; diff --git a/cpp_src/core/cjson/cjsontools.cc b/cpp_src/core/cjson/cjsontools.cc index d19a4154a..0294772ff 100644 --- a/cpp_src/core/cjson/cjsontools.cc +++ b/cpp_src/core/cjson/cjsontools.cc @@ -92,7 +92,7 @@ void copyCJsonValue(TagType tagType, Serializer &rdser, WrSerializer &wrser) { } } -void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets) { +void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets) { const auto field = tag.Field(); const bool embeddedField = (field < 0); switch (tag.Type()) { @@ -140,9 +140,11 @@ Variant cjsonValueToVariant(TagType tagType, Serializer &rdser, KeyValueType dst template void buildPayloadTuple(const PayloadIface &pl, const TagsMatcher *tagsMatcher, WrSerializer &wrser) { CJsonBuilder builder(wrser, ObjType::TypeObject); - for (int field = 1; field < pl.NumFields(); ++field) { + for (int field = 1, numFields = pl.NumFields(); field < numFields; ++field) { const PayloadFieldType &fieldType = pl.Type().Field(field); - if (fieldType.JsonPaths().size() < 1 || fieldType.JsonPaths()[0].empty()) continue; + if (fieldType.JsonPaths().size() < 1 || fieldType.JsonPaths()[0].empty()) { + continue; + } int tagName = tagsMatcher->name2tag(fieldType.JsonPaths()[0]); assertf(tagName != 0, "ns=%s, field=%s", pl.Type().Name(), fieldType.JsonPaths()[0]); @@ -155,8 +157,7 @@ void buildPayloadTuple(const PayloadIface &pl, const TagsMatcher *tagsMatcher } } -template void buildPayloadTuple(const PayloadIface &pl, const TagsMatcher *tagsMatcher, - WrSerializer &wrser); -template void buildPayloadTuple(const PayloadIface &pl, const TagsMatcher *tagsMatcher, WrSerializer &wrser); +template void buildPayloadTuple(const PayloadIface &, const TagsMatcher *, WrSerializer &); +template void buildPayloadTuple(const PayloadIface &, const TagsMatcher *, WrSerializer &); } // namespace reindexer diff --git a/cpp_src/core/cjson/cjsontools.h b/cpp_src/core/cjson/cjsontools.h index b3c1054a9..69070c1e8 100644 --- a/cpp_src/core/cjson/cjsontools.h +++ b/cpp_src/core/cjson/cjsontools.h @@ -14,7 +14,20 @@ void putCJsonRef(TagType tagType, int tagName, int tagField, const VariantArray void putCJsonValue(TagType tagType, int tagName, const VariantArray &values, WrSerializer &wrser); [[nodiscard]] TagType kvType2Tag(KeyValueType kvType) noexcept; -void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets = nullptr); +void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets = nullptr); [[nodiscard]] Variant cjsonValueToVariant(TagType tag, Serializer &rdser, KeyValueType dstType); +RX_ALWAYS_INLINE void validateNonArrayFieldRestrictions(const ScalarIndexesSetT &scalarIndexes, const Payload &pl, + const PayloadFieldType &f, int field, bool isInArray, std::string_view parserName) { + if (!f.IsArray()) { + if rx_unlikely (isInArray) { + throw Error(errLogic, "Error parsing %s field '%s' - got value nested into the array, but expected scalar %s", parserName, + f.Name(), f.Type().Name()); + } + if rx_unlikely (scalarIndexes.test(field)) { + throw Error(errLogic, "Non-array field '%s' [%d] from '%s' can only be encoded once.", f.Name(), field, pl.Type().Name()); + } + } +} + } // namespace reindexer diff --git a/cpp_src/core/cjson/csvbuilder.cc b/cpp_src/core/cjson/csvbuilder.cc new file mode 100644 index 000000000..ed7031dd8 --- /dev/null +++ b/cpp_src/core/cjson/csvbuilder.cc @@ -0,0 +1,211 @@ +#include "csvbuilder.h" +#include "tools/json2kv.h" + +namespace reindexer { + +CsvBuilder::CsvBuilder(ObjType type, const CsvBuilder &parent) + : ser_(parent.ser_), + tm_(parent.tm_), + type_(type), + level_(parent.level_ + 1), + startSerLen_(ser_->Len()), + ordering_(parent.ordering_), + buf_(parent.buf_), + positions_([this]() -> std::vector> { + if (level_ == 0 && ordering_) { + return {ordering_->size(), std::pair{-1, -1}}; + } + return {}; + }()) { + if (level_ < 1) { + return; + } else if (level_ == 1) { + (*ser_) << '"'; + } + + switch (type_) { + case ObjType::TypeArray: + (*ser_) << '['; + break; + case ObjType::TypeObject: + (*ser_) << '{'; + break; + case ObjType::TypeObjectArray: + case ObjType::TypePlain: + default: + break; + } +} + +CsvBuilder::CsvBuilder(WrSerializer &ser, CsvOrdering &ordering) + : ser_(&ser), + level_(-1), + ordering_(!ordering.ordering_.empty() ? &ordering.ordering_ : nullptr), + buf_(ordering_ ? &ordering.buf_ : nullptr) {} + +CsvBuilder::~CsvBuilder() { End(); } + +std::string_view CsvBuilder::getNameByTag(int tagName) { return tagName ? tm_->tag2name(tagName) : std::string_view(); } + +CsvBuilder &CsvBuilder::End() { + if (!positions_.empty()) { + postProcessing(); + } + if (level_ > 0) { + switch (type_) { + case ObjType::TypeArray: + (*ser_) << ']'; + break; + case ObjType::TypeObject: + (*ser_) << '}'; + break; + case ObjType::TypeObjectArray: + case ObjType::TypePlain: + default: + break; + } + } + + if (level_ == 1) { + (*ser_) << '"'; + } + + type_ = ObjType::TypePlain; + + return *this; +} + +CsvBuilder CsvBuilder::Object(std::string_view name, int /*size*/) { + putName(name); + return CsvBuilder(ObjType::TypeObject, *this); +} + +CsvBuilder CsvBuilder::Array(std::string_view name, int /*size*/) { + putName(name); + return CsvBuilder(ObjType::TypeArray, *this); +} + +void CsvBuilder::putName(std::string_view name) { + if (level_ == 0 && ordering_ && !ordering_->empty()) { + tmProcessing(name); + } + + if (count_++) (*ser_) << ','; + + if (level_ < 1) return; + + if (name.data()) { + (*ser_) << '"'; + (*ser_).PrintJsonString(name, WrSerializer::PrintJsonStringMode::QuotedQuote); + (*ser_) << '"'; + (*ser_) << ':'; + } +} + +void CsvBuilder::tmProcessing(std::string_view name) { + int tag = tm_->name2tag(name); + + auto prevFinishPos = ser_->Len(); + if (tag > 0) { + auto it = std::find_if(ordering_->begin(), ordering_->end(), [&tag](const auto &t) { return t == tag; }); + + if (it != ordering_->end()) { + if (curTagPos_ > -1) { + positions_[curTagPos_].second = prevFinishPos; + } + curTagPos_ = std::distance(ordering_->begin(), it); + positions_[curTagPos_].first = prevFinishPos + (count_ > 0 ? 1 : 0); + } else { + throw Error(errParams, "Tag %s from tagsmatcher was not passed with the schema", name); + } + } else { + if (name.substr(0, 7) != "joined_") { + throw Error(errParams, "The \"joined_*\"-like tag for joined namespaced is expected, but received %d", name); + } + + if (curTagPos_ > -1) { + positions_[curTagPos_].second = prevFinishPos; + } + if (count_) (*ser_) << ','; + + (*ser_) << "\"{"; + type_ = ObjType::TypeObject; + count_ = 0; + level_++; + } +} + +void CsvBuilder::postProcessing() { + if (!buf_) { + throw Error(errParams, "Buffer not initialized"); + } + + buf_->Reset(); + + if (positions_[curTagPos_].second == -1) { + positions_[curTagPos_].second = ser_->Len(); + } + + auto joinedData = std::string_view(ser_->Slice().data() + positions_[curTagPos_].second, ser_->Len() - positions_[curTagPos_].second); + + bool needDelim = false; + for (auto &[begin, end] : positions_) { + if (needDelim) { + *buf_ << ','; + } else { + needDelim = true; + } + *buf_ << std::string_view{ser_->Slice().data() + begin, static_cast(end - begin)}; + } + + *buf_ << joinedData; + ser_->Reset(startSerLen_); + *ser_ << buf_->Slice(); +} + +CsvBuilder &CsvBuilder::Put(std::string_view name, std::string_view arg, int /*offset*/) { + putName(name); + + std::string_view optQuote = level_ > 0 ? "\"" : ""; + (*ser_) << optQuote; + (*ser_).PrintJsonString(arg, WrSerializer::PrintJsonStringMode::QuotedQuote); + + (*ser_) << optQuote; + return *this; +} + +CsvBuilder &CsvBuilder::Put(std::string_view name, Uuid arg, int /*offset*/) { + putName(name); + ser_->PrintJsonUuid(arg); + return *this; +} + +CsvBuilder &CsvBuilder::Raw(std::string_view name, std::string_view arg) { + putName(name); + (*ser_) << arg; + return *this; +} + +CsvBuilder &CsvBuilder::Null(std::string_view name) { + putName(name); + (*ser_) << "null"; + return *this; +} + +CsvBuilder &CsvBuilder::Put(std::string_view name, const Variant &kv, int offset) { + kv.Type().EvaluateOneOf( + [&](KeyValueType::Int) { Put(name, int(kv), offset); }, [&](KeyValueType::Int64) { Put(name, int64_t(kv), offset); }, + [&](KeyValueType::Double) { Put(name, double(kv), offset); }, + [&](KeyValueType::String) { Put(name, std::string_view(kv), offset); }, [&](KeyValueType::Null) { Null(name); }, + [&](KeyValueType::Bool) { Put(name, bool(kv), offset); }, + [&](KeyValueType::Tuple) { + auto arrNode = Array(name); + for (auto &val : kv.getCompositeValues()) { + arrNode.Put({nullptr, 0}, val); + } + }, + [&](KeyValueType::Uuid) { Put(name, Uuid{kv}, offset); }, [](OneOf) noexcept {}); + return *this; +} + +} // namespace reindexer diff --git a/cpp_src/core/cjson/csvbuilder.h b/cpp_src/core/cjson/csvbuilder.h new file mode 100644 index 000000000..5f7c7687d --- /dev/null +++ b/cpp_src/core/cjson/csvbuilder.h @@ -0,0 +1,127 @@ +#pragma once + +#include +#include "estl/span.h" +#include "objtype.h" +#include "tagslengths.h" +#include "tagsmatcher.h" +#include "vendor/gason/gason.h" + +namespace reindexer { + +class CsvBuilder; + +struct CsvOrdering { + CsvOrdering(std::vector ordering) : ordering_(std::move(ordering)) { + if (ordering_.empty()) { + return; + } + + buf_.Reserve(kInitBufferSize); + } + + auto begin() const noexcept { return ordering_.begin(); } + auto end() const noexcept { return ordering_.end(); } + + friend CsvBuilder; + +private: + const size_t kInitBufferSize = 0x1000; + std::vector ordering_; + WrSerializer buf_; +}; + +class CsvBuilder { +public: + CsvBuilder() = default; + + CsvBuilder(WrSerializer &ser, CsvOrdering &ordering); + + ~CsvBuilder(); + CsvBuilder &operator=(const CsvBuilder &) = delete; + CsvBuilder &operator=(CsvBuilder &&) = delete; + + void SetTagsMatcher(const TagsMatcher *tm) { tm_ = tm; } + + /// Start new object + CsvBuilder Object(std::string_view name = {}, int size = KUnknownFieldSize); + CsvBuilder Object(std::nullptr_t, int size = KUnknownFieldSize) { return Object(std::string_view{}, size); } + CsvBuilder Object(int tagName, int size = KUnknownFieldSize) { return Object(getNameByTag(tagName), size); } + + CsvBuilder Array(std::string_view name, int size = KUnknownFieldSize); + CsvBuilder Array(int tagName, int size = KUnknownFieldSize) { return Array(getNameByTag(tagName), size); } + + template + void Array(int tagName, span data, int /*offset*/ = 0) { + CsvBuilder node = Array(tagName); + for (const auto &d : data) node.Put({}, d); + } + template + void Array(std::string_view n, span data, int /*offset*/ = 0) { + CsvBuilder node = Array(n); + for (const auto &d : data) node.Put({}, d); + } + template + void Array(std::string_view n, std::initializer_list data, int /*offset*/ = 0) { + CsvBuilder node = Array(n); + for (const auto &d : data) node.Put({}, d); + } + + void Array(int tagName, Serializer &ser, TagType tagType, int count) { + CsvBuilder node = Array(tagName); + while (count--) node.Put({}, ser.GetRawVariant(KeyValueType{tagType})); + } + + CsvBuilder &Put(std::string_view name, const Variant &arg, int offset = 0); + CsvBuilder &Put(std::nullptr_t, const Variant &arg, int offset = 0) { return Put(std::string_view{}, arg, offset); } + CsvBuilder &Put(std::string_view name, std::string_view arg, int offset = 0); + CsvBuilder &Put(std::string_view name, Uuid arg, int offset = 0); + CsvBuilder &Put(std::nullptr_t, std::string_view arg, int offset = 0) { return Put(std::string_view{}, arg, offset); } + CsvBuilder &Put(std::string_view name, const char *arg, int offset = 0) { return Put(name, std::string_view(arg), offset); } + template ::value || std::is_floating_point::value>::type * = nullptr> + CsvBuilder &Put(std::string_view name, const T &arg, int /*offset*/ = 0) { + putName(name); + (*ser_) << arg; + return *this; + } + template + CsvBuilder &Put(int tagName, const T &arg, int offset = 0) { + return Put(getNameByTag(tagName), arg, offset); + } + + CsvBuilder &Raw(int tagName, std::string_view arg) { return Raw(getNameByTag(tagName), arg); } + CsvBuilder &Raw(std::string_view name, std::string_view arg); + CsvBuilder &Raw(std::nullptr_t, std::string_view arg) { return Raw(std::string_view{}, arg); } + CsvBuilder &Json(std::string_view name, std::string_view arg) { return Raw(name, arg); } + CsvBuilder &Json(std::nullptr_t, std::string_view arg) { return Raw(std::string_view{}, arg); } + + CsvBuilder &Null(int tagName) { return Null(getNameByTag(tagName)); } + CsvBuilder &Null(std::string_view name); + + CsvBuilder &End(); + +protected: + CsvBuilder(ObjType type, const CsvBuilder &parent); + + void putName(std::string_view name); + std::string_view getNameByTag(int tagName); + void tmProcessing(std::string_view name); + void postProcessing(); + + WrSerializer *ser_ = nullptr; + const TagsMatcher *tm_ = nullptr; + ObjType type_ = ObjType::TypePlain; + int count_ = 0; + + int level_ = 0; + int startSerLen_ = 0; + + const std::vector *ordering_ = nullptr; + WrSerializer *buf_ = nullptr; + + // idx - pos in ordering, {startTagPosInSer, endTagPosInSer(post culculated after received next tag)} + std::vector> positions_; + int curTagPos_ = -1; +}; + +} // namespace reindexer diff --git a/cpp_src/core/cjson/fieldextractor.h b/cpp_src/core/cjson/fieldextractor.h index 008a5842b..6b11f179a 100644 --- a/cpp_src/core/cjson/fieldextractor.h +++ b/cpp_src/core/cjson/fieldextractor.h @@ -1,81 +1,165 @@ #pragma once #include "core/payload/fieldsset.h" +#include "estl/span.h" #include "tagsmatcher.h" namespace reindexer { class FieldsExtractor { public: + class FieldParams { + public: + int &index; + int &length; + int field; + }; + FieldsExtractor() = default; - FieldsExtractor(VariantArray *va, KeyValueType expectedType, int expectedPathDepth, FieldsSet *filter = nullptr, int *index = nullptr, - int *size = nullptr) - : values_(va), expectedType_(expectedType), expectedPathDepth_(expectedPathDepth), filter_(filter), index_(index), length_(size) {} + FieldsExtractor(VariantArray *va, KeyValueType expectedType, int expectedPathDepth, FieldsSet *filter = nullptr, + FieldParams *params = nullptr) noexcept + : values_(va), expectedType_(expectedType), expectedPathDepth_(expectedPathDepth), filter_(filter), params_(params) {} FieldsExtractor(FieldsExtractor &&other) = default; FieldsExtractor(const FieldsExtractor &) = delete; FieldsExtractor &operator=(const FieldsExtractor &) = delete; FieldsExtractor &operator=(FieldsExtractor &&) = delete; - void SetTagsMatcher(const TagsMatcher *) {} + void SetTagsMatcher(const TagsMatcher *) noexcept {} - FieldsExtractor Object(int) { return FieldsExtractor(values_, expectedType_, expectedPathDepth_ - 1, filter_, index_, length_); } - FieldsExtractor Array(int) { return FieldsExtractor(values_, expectedType_, expectedPathDepth_ - 1, filter_, index_, length_); } - FieldsExtractor Object(std::string_view) { - return FieldsExtractor(values_, expectedType_, expectedPathDepth_ - 1, filter_, index_, length_); + FieldsExtractor Object(int) noexcept { return FieldsExtractor(values_, expectedType_, expectedPathDepth_ - 1, filter_, params_); } + FieldsExtractor Array(int) noexcept { + assertrx_throw(values_); + return FieldsExtractor(&values_->MarkArray(), expectedType_, expectedPathDepth_ - 1, filter_, params_); + } + FieldsExtractor Object(std::string_view) noexcept { + return FieldsExtractor(values_, expectedType_, expectedPathDepth_ - 1, filter_, params_); } - FieldsExtractor Object(std::nullptr_t) { return Object(std::string_view{}); } - FieldsExtractor Array(std::string_view) { - return FieldsExtractor(values_, expectedType_, expectedPathDepth_ - 1, filter_, index_, length_); + FieldsExtractor Object(std::nullptr_t) noexcept { return Object(std::string_view{}); } + FieldsExtractor Array(std::string_view) noexcept { + return FieldsExtractor(values_, expectedType_, expectedPathDepth_ - 1, filter_, params_); } template void Array(int, span data, int offset) { const IndexedPathNode &pathNode = getArrayPathNode(); - if (index_ && length_) { - *index_ = offset; - *length_ = data.size(); - if (pathNode.IsWithIndex()) { - *index_ += pathNode.Index(); + const PathType ptype = pathNotToType(pathNode); + if (ptype == PathType::Other) { + throw Error(errLogic, "Unable to extract array value without index value"); + } + if (params_) { + if (ptype == PathType::WithIndex) { + params_->index = pathNode.Index() + offset; + params_->length = data.size(); + } else if (params_->index >= 0 && params_->length > 0) { + params_->length += data.size(); + } else { + params_->index = offset; + params_->length = data.size(); } } - int i = 0; - for (auto d : data) { - if (pathNode.IsForAllItems() || i == pathNode.Index()) { - Put(0, Variant(d)); + + if (ptype == PathType::WithIndex) { + int i = 0; + for (const auto &d : data) { + if (i++ == pathNode.Index()) { + put(0, Variant(d)); + } } - ++i; + } else { + for (const auto &d : data) { + put(0, Variant(d)); + } + } + if (expectedPathDepth_ <= 0) { + assertrx_throw(values_); + values_->MarkArray(); } } void Array(int, Serializer &ser, TagType tagType, int count) { const IndexedPathNode &pathNode = getArrayPathNode(); - for (int i = 0; i < count; ++i) { - Variant value = ser.GetRawVariant(KeyValueType{tagType}); - if (pathNode.IsForAllItems() || i == pathNode.Index()) { - Put(0, std::move(value)); + const PathType ptype = pathNotToType(pathNode); + if (ptype == PathType::Other) { + throw Error(errLogic, "Unable to extract array value without index value"); + } + if (params_) { + if (ptype == PathType::WithIndex) { + params_->index = pathNode.Index(); + params_->length = count; + } else if (params_->index >= 0 && params_->length > 0) { + params_->length += count; + } else { + params_->index = 0; + params_->length = count; } } + if (ptype == PathType::WithIndex) { + for (int i = 0; i < count; ++i) { + auto value = ser.GetRawVariant(KeyValueType(tagType)); + if (i == pathNode.Index()) { + put(0, std::move(value)); + } + } + } else { + for (int i = 0; i < count; ++i) { + put(0, ser.GetRawVariant(KeyValueType(tagType))); + } + } + if (expectedPathDepth_ <= 0) { + assertrx_throw(values_); + values_->MarkArray(); + } } - FieldsExtractor &Put(int, Variant arg) { + FieldsExtractor &Put(int t, Variant arg, int offset) { + if (expectedPathDepth_ > 0) return *this; + if (params_) { + if (params_->index >= 0 && params_->length > 0 && offset == params_->index + params_->length) { + // Concatenate fields from objects, nested in arrays + params_->length += 1; + } else { + params_->index = offset; + params_->length = 1; + } + } + return put(t, std::move(arg)); + } + + template + FieldsExtractor &Put(int tag, const T &arg, int offset) { + return Put(tag, Variant{arg}, offset); + } + + FieldsExtractor &Null(int) noexcept { return *this; } + int TargetField() { return params_ ? params_->field : IndexValueType::NotSet; } + bool IsHavingOffset() const noexcept { return params_ && (params_->length >= 0 || params_->index >= 0); } + void OnScopeEnd(int offset) noexcept { + if (expectedPathDepth_ <= 0) { + assertrx(params_ && !IsHavingOffset()); + params_->index = offset; + params_->length = 0; + } + } + +private: + enum class PathType { AllItems, WithIndex, Other }; + PathType pathNotToType(const IndexedPathNode &pathNode) noexcept { + return pathNode.IsForAllItems() ? PathType::AllItems + : (pathNode.Index() == IndexValueType::NotSet) ? PathType::Other + : PathType::WithIndex; + } + FieldsExtractor &put(int, Variant arg) { if (expectedPathDepth_ > 0) return *this; expectedType_.EvaluateOneOf( [&](OneOf) { arg.convert(expectedType_); }, [](OneOf) noexcept {}); - values_->push_back(std::move(arg)); + assertrx_throw(values_); + values_->emplace_back(std::move(arg)); if (expectedPathDepth_ < 0) values_->MarkObject(); return *this; } - template - FieldsExtractor &Put(int tag, const T &arg) { - return Put(tag, Variant{arg}); - } - - FieldsExtractor &Null(int) { return *this; } - -private: const IndexedPathNode &getArrayPathNode() const { if (filter_ && filter_->getTagsPathsLength() > 0) { size_t lastItemIndex = filter_->getTagsPathsLength() - 1; @@ -93,8 +177,7 @@ class FieldsExtractor { KeyValueType expectedType_{KeyValueType::Undefined{}}; int expectedPathDepth_ = 0; FieldsSet *filter_; - int *index_; - int *length_; + FieldParams *params_; }; } // namespace reindexer diff --git a/cpp_src/core/cjson/jschemachecker.cc b/cpp_src/core/cjson/jschemachecker.cc index 1c44ec2a2..c0300a590 100644 --- a/cpp_src/core/cjson/jschemachecker.cc +++ b/cpp_src/core/cjson/jschemachecker.cc @@ -152,9 +152,9 @@ Error JsonSchemaChecker::checkScheme(const gason::JsonNode& node, int typeIndex, if (!descr.subElementsTable[subElemIndex->second].second.array) { return Error(errParseJson, "Element [%s] should array in [%s].", elem.key, path); } - for (auto entry : elem.value) { - if (entry->value.getTag() == gason::JSON_ARRAY || entry->value.getTag() == gason::JSON_OBJECT) { - err = checkScheme(*entry, descr.subElementsTable[subElemIndex->second].second.typeIndex, path, + for (const auto& entry : elem.value) { + if (entry.value.getTag() == gason::JSON_ARRAY || entry.value.getTag() == gason::JSON_OBJECT) { + err = checkScheme(entry, descr.subElementsTable[subElemIndex->second].second.typeIndex, path, descr.subElementsTable[subElemIndex->second].first); if (!err.ok()) return err; } diff --git a/cpp_src/core/cjson/jsonbuilder.cc b/cpp_src/core/cjson/jsonbuilder.cc index c20532cfe..bb8905d7a 100644 --- a/cpp_src/core/cjson/jsonbuilder.cc +++ b/cpp_src/core/cjson/jsonbuilder.cc @@ -17,8 +17,6 @@ JsonBuilder::JsonBuilder(WrSerializer &ser, ObjType type, const TagsMatcher *tm) } } -JsonBuilder::~JsonBuilder() { End(); } - std::string_view JsonBuilder::getNameByTag(int tagName) { return tagName ? tm_->tag2name(tagName) : std::string_view(); } JsonBuilder &JsonBuilder::End() { @@ -56,13 +54,13 @@ void JsonBuilder::putName(std::string_view name) { } } -JsonBuilder &JsonBuilder::Put(std::string_view name, std::string_view arg) { +JsonBuilder &JsonBuilder::Put(std::string_view name, std::string_view arg, int /*offset*/) { putName(name); ser_->PrintJsonString(arg); return *this; } -JsonBuilder &JsonBuilder::Put(std::string_view name, Uuid arg) { +JsonBuilder &JsonBuilder::Put(std::string_view name, Uuid arg, int /*offset*/) { putName(name); ser_->PrintJsonUuid(arg); return *this; @@ -80,18 +78,19 @@ JsonBuilder &JsonBuilder::Null(std::string_view name) { return *this; } -JsonBuilder &JsonBuilder::Put(std::string_view name, const Variant &kv) { +JsonBuilder &JsonBuilder::Put(std::string_view name, const Variant &kv, int offset) { kv.Type().EvaluateOneOf( - [&](KeyValueType::Int) { Put(name, int(kv)); }, [&](KeyValueType::Int64) { Put(name, int64_t(kv)); }, - [&](KeyValueType::Double) { Put(name, double(kv)); }, [&](KeyValueType::String) { Put(name, std::string_view(kv)); }, - [&](KeyValueType::Null) { Null(name); }, [&](KeyValueType::Bool) { Put(name, bool(kv)); }, + [&](KeyValueType::Int) { Put(name, int(kv), offset); }, [&](KeyValueType::Int64) { Put(name, int64_t(kv), offset); }, + [&](KeyValueType::Double) { Put(name, double(kv), offset); }, + [&](KeyValueType::String) { Put(name, std::string_view(kv), offset); }, [&](KeyValueType::Null) { Null(name); }, + [&](KeyValueType::Bool) { Put(name, bool(kv), offset); }, [&](KeyValueType::Tuple) { auto arrNode = Array(name); for (auto &val : kv.getCompositeValues()) { - arrNode.Put({nullptr, 0}, val); + arrNode.Put({nullptr, 0}, val, offset); } }, - [&](KeyValueType::Uuid) { Put(name, Uuid{kv}); }, [](OneOf) noexcept {}); + [&](KeyValueType::Uuid) { Put(name, Uuid{kv}, offset); }, [](OneOf) noexcept {}); return *this; } diff --git a/cpp_src/core/cjson/jsonbuilder.h b/cpp_src/core/cjson/jsonbuilder.h index 0211c07eb..ccdc04b07 100644 --- a/cpp_src/core/cjson/jsonbuilder.h +++ b/cpp_src/core/cjson/jsonbuilder.h @@ -9,9 +9,9 @@ namespace reindexer { class JsonBuilder { public: - JsonBuilder() : ser_(nullptr), tm_(nullptr) {} + JsonBuilder() noexcept : ser_(nullptr), tm_(nullptr) {} JsonBuilder(WrSerializer &ser, ObjType type = ObjType::TypeObject, const TagsMatcher *tm = nullptr); - ~JsonBuilder(); + ~JsonBuilder() { End(); } JsonBuilder(const JsonBuilder &) = delete; JsonBuilder(JsonBuilder &&other) : ser_(other.ser_), tm_(other.tm_), type_(other.type_), count_(other.count_) { other.type_ = ObjType::TypePlain; @@ -19,7 +19,7 @@ class JsonBuilder { JsonBuilder &operator=(const JsonBuilder &) = delete; JsonBuilder &operator=(JsonBuilder &&) = delete; - void SetTagsMatcher(const TagsMatcher *tm) { tm_ = tm; } + void SetTagsMatcher(const TagsMatcher *tm) noexcept { tm_ = tm; } /// Start new object JsonBuilder Object(std::string_view name = {}, int size = KUnknownFieldSize); @@ -50,21 +50,21 @@ class JsonBuilder { while (count--) node.Put({}, ser.GetRawVariant(KeyValueType{tagType})); } - JsonBuilder &Put(std::string_view name, const Variant &arg); - JsonBuilder &Put(std::nullptr_t, const Variant &arg) { return Put(std::string_view{}, arg); } - JsonBuilder &Put(std::string_view name, std::string_view arg); - JsonBuilder &Put(std::string_view name, Uuid arg); - JsonBuilder &Put(std::nullptr_t, std::string_view arg) { return Put(std::string_view{}, arg); } - JsonBuilder &Put(std::string_view name, const char *arg) { return Put(name, std::string_view(arg)); } + JsonBuilder &Put(std::string_view name, const Variant &arg, int offset = 0); + JsonBuilder &Put(std::nullptr_t, const Variant &arg, int offset = 0) { return Put(std::string_view{}, arg, offset); } + JsonBuilder &Put(std::string_view name, std::string_view arg, int offset = 0); + JsonBuilder &Put(std::string_view name, Uuid arg, int offset = 0); + JsonBuilder &Put(std::nullptr_t, std::string_view arg, int offset = 0) { return Put(std::string_view{}, arg, offset); } + JsonBuilder &Put(std::string_view name, const char *arg, int offset = 0) { return Put(name, std::string_view(arg), offset); } template ::value || std::is_floating_point::value>::type * = nullptr> - JsonBuilder &Put(std::string_view name, const T &arg) { + JsonBuilder &Put(std::string_view name, const T &arg, int /*offset*/ = 0) { putName(name); (*ser_) << arg; return *this; } template - JsonBuilder &Put(int tagName, const T &arg) { - return Put(getNameByTag(tagName), arg); + JsonBuilder &Put(int tagName, const T &arg, int offset = 0) { + return Put(getNameByTag(tagName), arg, offset); } JsonBuilder &Raw(int tagName, std::string_view arg) { return Raw(getNameByTag(tagName), arg); } diff --git a/cpp_src/core/cjson/jsondecoder.cc b/cpp_src/core/cjson/jsondecoder.cc index 9e1086c2f..d10e82539 100644 --- a/cpp_src/core/cjson/jsondecoder.cc +++ b/cpp_src/core/cjson/jsondecoder.cc @@ -9,11 +9,9 @@ namespace reindexer { -JsonDecoder::JsonDecoder(TagsMatcher &tagsMatcher) : tagsMatcher_(tagsMatcher), filter_(nullptr) {} -JsonDecoder::JsonDecoder(TagsMatcher &tagsMatcher, const FieldsSet *filter) : tagsMatcher_(tagsMatcher), filter_(filter) {} - Error JsonDecoder::Decode(Payload &pl, WrSerializer &wrser, const gason::JsonValue &v) { try { + objectScalarIndexes_.reset(); tagsPath_.clear(); CJsonBuilder builder(wrser, ObjType::TypePlain, &tagsMatcher_); decodeJson(&pl, builder, v, 0, true); @@ -27,9 +25,9 @@ Error JsonDecoder::Decode(Payload &pl, WrSerializer &wrser, const gason::JsonVal void JsonDecoder::decodeJsonObject(Payload &pl, CJsonBuilder &builder, const gason::JsonValue &v, bool match) { for (const auto &elem : v) { - int tagName = tagsMatcher_.name2tag(elem->key, true); + int tagName = tagsMatcher_.name2tag(elem.key, true); assertrx(tagName); - tagsPath_.push_back(tagName); + tagsPath_.emplace_back(tagName); int field = tagsMatcher_.tags2field(tagsPath_.data(), tagsPath_.size()); if (filter_) { if (field >= 0) @@ -39,31 +37,29 @@ void JsonDecoder::decodeJsonObject(Payload &pl, CJsonBuilder &builder, const gas } if (field < 0) { - decodeJson(&pl, builder, elem->value, tagName, match); + decodeJson(&pl, builder, elem.value, tagName, match); } else if (match) { // Indexed field. extract it const auto &f = pl.Type().Field(field); - switch (elem->value.getTag()) { + switch (elem.value.getTag()) { case gason::JSON_ARRAY: { - if (!f.IsArray()) { + if rx_unlikely (!f.IsArray()) { throw Error(errLogic, "Error parsing json field '%s' - got array, expected scalar %s", f.Name(), f.Type().Name()); } int count = 0; - for (auto subelem : elem->value) { + for (auto &subelem : elem.value) { (void)subelem; - count++; + ++count; } int pos = pl.ResizeArray(field, count, true); - for (auto subelem : elem->value) { - pl.Set(field, pos++, jsonValue2Variant(subelem->value, f.Type(), f.Name())); + for (auto &subelem : elem.value) { + pl.Set(field, pos++, jsonValue2Variant(subelem.value, f.Type(), f.Name())); } builder.ArrayRef(tagName, field, count); } break; case gason::JSON_NULL: - if (isInArray() && !f.IsArray()) { - throw Error(errLogic, "Error parsing json field '%s' - got value in the nested array, but expected scalar %s", - f.Name(), f.Type().Name()); - } + validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, f, field, isInArray(), "json"); + objectScalarIndexes_.set(field); builder.Null(tagName); break; case gason::JSON_NUMBER: @@ -72,17 +68,15 @@ void JsonDecoder::decodeJsonObject(Payload &pl, CJsonBuilder &builder, const gas case gason::JSON_STRING: case gason::JSON_TRUE: case gason::JSON_FALSE: { - if (isInArray() && !f.IsArray()) { - throw Error(errLogic, "Error parsing json field '%s' - got value in the nested array, but expected scalar %s", - f.Name(), f.Type().Name()); - } - Variant v = jsonValue2Variant(elem->value, f.Type(), f.Name()); - pl.Set(field, {v}, true); + validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, f, field, isInArray(), "json"); + objectScalarIndexes_.set(field); + Variant v = jsonValue2Variant(elem.value, f.Type(), f.Name()); builder.Ref(tagName, v, field); + pl.Set(field, std::move(v), true); } break; - default: - abort(); } + } else { + // objectScalarIndexes_.set(field); - do not change objectScalarIndexes_ value for the filtered out fields } tagsPath_.pop_back(); } @@ -125,7 +119,7 @@ void JsonDecoder::decodeJson(Payload *pl, CJsonBuilder &builder, const gason::Js } auto arrNode = builder.Array(tagName, type); for (const auto &elem : v) { - decodeJson(pl, arrNode, elem->value, 0, match); + decodeJson(pl, arrNode, elem.value, 0, match); } break; } @@ -143,7 +137,7 @@ void JsonDecoder::decodeJson(Payload *pl, CJsonBuilder &builder, const gason::Js class TagsPathGuard { public: - TagsPathGuard(TagsPath &tagsPath, int tagName) : tagsPath_(tagsPath) { tagsPath_.push_back(tagName); } + TagsPathGuard(TagsPath &tagsPath, int tagName) noexcept : tagsPath_(tagsPath) { tagsPath_.emplace_back(tagName); } ~TagsPathGuard() { tagsPath_.pop_back(); } public: @@ -152,14 +146,15 @@ class TagsPathGuard { void JsonDecoder::decodeJsonObject(const gason::JsonValue &root, CJsonBuilder &builder) { for (const auto &elem : root) { - int tagName = tagsMatcher_.name2tag(elem->key, true); + int tagName = tagsMatcher_.name2tag(elem.key, true); TagsPathGuard tagsPathGuard(tagsPath_, tagName); - decodeJson(nullptr, builder, elem->value, tagName, true); + decodeJson(nullptr, builder, elem.value, tagName, true); } } void JsonDecoder::Decode(std::string_view json, CJsonBuilder &builder, const TagsPath &fieldPath) { try { + objectScalarIndexes_.reset(); tagsPath_ = fieldPath; gason::JsonParser jsonParser; gason::JsonNode root = jsonParser.Parse(json); diff --git a/cpp_src/core/cjson/jsondecoder.h b/cpp_src/core/cjson/jsondecoder.h index d19e136f0..c44a1432c 100644 --- a/cpp_src/core/cjson/jsondecoder.h +++ b/cpp_src/core/cjson/jsondecoder.h @@ -8,8 +8,8 @@ namespace reindexer { class JsonDecoder { public: - explicit JsonDecoder(TagsMatcher &tagsMatcher); - JsonDecoder(TagsMatcher &tagsMatcher, const FieldsSet *filter); + explicit JsonDecoder(TagsMatcher &tagsMatcher) noexcept : tagsMatcher_(tagsMatcher), filter_(nullptr) {} + JsonDecoder(TagsMatcher &tagsMatcher, const FieldsSet *filter) noexcept : tagsMatcher_(tagsMatcher), filter_(filter) {} Error Decode(Payload &pl, WrSerializer &wrSer, const gason::JsonValue &v); void Decode(std::string_view json, CJsonBuilder &builder, const TagsPath &fieldPath); @@ -23,6 +23,7 @@ class JsonDecoder { TagsPath tagsPath_; const FieldsSet *filter_; int32_t arrayLevel_ = 0; + ScalarIndexesSetT objectScalarIndexes_; }; } // namespace reindexer diff --git a/cpp_src/core/cjson/msgpackbuilder.cc b/cpp_src/core/cjson/msgpackbuilder.cc index 7a0cfb321..719aa7800 100644 --- a/cpp_src/core/cjson/msgpackbuilder.cc +++ b/cpp_src/core/cjson/msgpackbuilder.cc @@ -30,8 +30,6 @@ MsgPackBuilder::MsgPackBuilder(msgpack_packer &packer, const TagsLengths *tagsLe init(KUnknownFieldSize); } -MsgPackBuilder::~MsgPackBuilder() { End(); } - void MsgPackBuilder::Array(int tagName, Serializer &ser, TagType tagType, int count) { checkIfCorrectArray(tagName); skipTag(); @@ -111,15 +109,15 @@ void MsgPackBuilder::appendJsonObject(std::string_view name, const gason::JsonNo auto type = obj.value.getTag(); switch (type) { case gason::JSON_STRING: { - Put(name, obj.As()); + Put(name, obj.As(), 0); break; } case gason::JSON_NUMBER: { - Put(name, obj.As()); + Put(name, obj.As(), 0); break; } case gason::JSON_DOUBLE: { - Put(name, obj.As()); + Put(name, obj.As(), 0); break; } case gason::JSON_OBJECT: @@ -143,11 +141,11 @@ void MsgPackBuilder::appendJsonObject(std::string_view name, const gason::JsonNo break; } case gason::JSON_TRUE: { - Put(std::string_view(obj.key), true); + Put(std::string_view(obj.key), true, 0); break; } case gason::JSON_FALSE: { - Put(std::string_view(obj.key), false); + Put(std::string_view(obj.key), false, 0); break; } case gason::JSON_NULL: { diff --git a/cpp_src/core/cjson/msgpackbuilder.h b/cpp_src/core/cjson/msgpackbuilder.h index b8e425971..a6e6e5bb6 100644 --- a/cpp_src/core/cjson/msgpackbuilder.h +++ b/cpp_src/core/cjson/msgpackbuilder.h @@ -22,17 +22,17 @@ class MsgPackBuilder { const TagsMatcher *tm = nullptr); MsgPackBuilder(msgpack_packer &packer, const TagsLengths *tagsLengths, int *startTag, ObjType = ObjType::TypeObject, const TagsMatcher *tm = nullptr); - MsgPackBuilder() : tm_(nullptr), packer_(), tagsLengths_(nullptr), type_(ObjType::TypePlain), tagIndex_(nullptr) {} - ~MsgPackBuilder(); - MsgPackBuilder(MsgPackBuilder &&other) + MsgPackBuilder() noexcept : tm_(nullptr), packer_(), tagsLengths_(nullptr), type_(ObjType::TypePlain), tagIndex_(nullptr) {} + ~MsgPackBuilder() { End(); } + MsgPackBuilder(MsgPackBuilder &&other) noexcept : tm_(other.tm_), packer_(other.packer_), tagsLengths_(other.tagsLengths_), type_(other.type_), tagIndex_(other.tagIndex_) {} MsgPackBuilder(const MsgPackBuilder &) = delete; MsgPackBuilder &operator=(const MsgPackBuilder &) = delete; MsgPackBuilder &operator=(MsgPackBuilder &&) = delete; - void SetTagsMatcher(const TagsMatcher *tm) { tm_ = tm; } - MsgPackBuilder Raw(std::string_view, std::string_view) { return MsgPackBuilder(); } + void SetTagsMatcher(const TagsMatcher *tm) noexcept { tm_ = tm; } + MsgPackBuilder Raw(std::string_view, std::string_view) noexcept { return MsgPackBuilder(); } MsgPackBuilder Raw(std::nullptr_t, std::string_view arg) { return Raw(std::string_view{}, arg); } template @@ -95,7 +95,7 @@ class MsgPackBuilder { } template - MsgPackBuilder &Put(N tagName, T arg) { + MsgPackBuilder &Put(N tagName, const T &arg, int /*offset*/ = 0) { if (isArray()) skipTag(); skipTag(); packKeyName(tagName); @@ -115,7 +115,7 @@ class MsgPackBuilder { } template - MsgPackBuilder &Put(T tagName, const Variant &kv) { + MsgPackBuilder &Put(T tagName, const Variant &kv, int offset = 0) { if (isArray()) skipTag(); skipTag(); packKeyName(tagName); @@ -126,7 +126,7 @@ class MsgPackBuilder { [&](KeyValueType::Tuple) { auto arrNode = Array(tagName); for (auto &val : kv.getCompositeValues()) { - arrNode.Put(0, val); + arrNode.Put(0, val, offset); } }, [&](KeyValueType::Uuid) { packValue(Uuid{kv}); }, [](OneOf) noexcept {}); diff --git a/cpp_src/core/cjson/msgpackdecoder.cc b/cpp_src/core/cjson/msgpackdecoder.cc index b87234dc0..e29178282 100644 --- a/cpp_src/core/cjson/msgpackdecoder.cc +++ b/cpp_src/core/cjson/msgpackdecoder.cc @@ -1,5 +1,6 @@ #include "msgpackdecoder.h" +#include "core/cjson/cjsontools.h" #include "core/cjson/objtype.h" #include "core/cjson/tagsmatcher.h" #include "tools/flagguard.h" @@ -7,19 +8,15 @@ namespace reindexer { -MsgPackDecoder::MsgPackDecoder(TagsMatcher* tagsMatcher) : tm_(tagsMatcher) {} - template void MsgPackDecoder::setValue(Payload& pl, CJsonBuilder& builder, const T& value, int tagName) { - int field = tm_->tags2field(tagsPath_.data(), tagsPath_.size()); + int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); if (field > 0) { - const auto& f = pl.Type().Field(field); - if (isInArray() && !f.IsArray()) { - throw Error(errLogic, "Error parsing msgpack field '%s' - got array, expected scalar %s", f.Name(), f.Type().Name()); - } + validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, pl.Type().Field(field), field, isInArray(), "msgpack"); Variant val(value); - pl.Set(field, {val}, true); builder.Ref(tagName, val, field); + pl.Set(field, std::move(val), true); + objectScalarIndexes_.set(field); } else { builder.Put(tagName, value); } @@ -35,13 +32,13 @@ int MsgPackDecoder::decodeKeyToTag(const msgpack_object_kv& obj) { using namespace std::string_view_literals; switch (obj.key.type) { case MSGPACK_OBJECT_BOOLEAN: - return tm_->name2tag(obj.key.via.boolean ? "true"sv : "false"sv, true); + return tm_.name2tag(obj.key.via.boolean ? "true"sv : "false"sv, true); case MSGPACK_OBJECT_POSITIVE_INTEGER: - return tm_->name2tag(std::to_string(obj.key.via.u64), true); + return tm_.name2tag(std::to_string(obj.key.via.u64), true); case MSGPACK_OBJECT_NEGATIVE_INTEGER: - return tm_->name2tag(std::to_string(obj.key.via.i64), true); + return tm_.name2tag(std::to_string(obj.key.via.i64), true); case MSGPACK_OBJECT_STR: - return tm_->name2tag(std::string_view(obj.key.via.str.ptr, obj.key.via.str.size), true); + return tm_.name2tag(std::string_view(obj.key.via.str.ptr, obj.key.via.str.size), true); case MSGPACK_OBJECT_FLOAT32: case MSGPACK_OBJECT_FLOAT64: case MSGPACK_OBJECT_NIL: @@ -49,13 +46,13 @@ int MsgPackDecoder::decodeKeyToTag(const msgpack_object_kv& obj) { case MSGPACK_OBJECT_MAP: case MSGPACK_OBJECT_BIN: case MSGPACK_OBJECT_EXT: - default: - throw Error(errParams, "Unsupported MsgPack map key type: %s(%d)", ToString(obj.key.type), obj.key.type); + break; } + throw Error(errParams, "Unsupported MsgPack map key type: %s(%d)", ToString(obj.key.type), obj.key.type); } void MsgPackDecoder::decode(Payload& pl, CJsonBuilder& builder, const msgpack_object& obj, int tagName) { - if (tagName) tagsPath_.push_back(tagName); + if (tagName) tagsPath_.emplace_back(tagName); switch (obj.type) { case MSGPACK_OBJECT_NIL: builder.Null(tagName); @@ -91,8 +88,12 @@ void MsgPackDecoder::decode(Payload& pl, CJsonBuilder& builder, const msgpack_ob prevType = p->type; } } - int field = tm_->tags2field(tagsPath_.data(), tagsPath_.size()); + int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); if (field > 0) { + auto& f = pl.Type().Field(field); + if rx_unlikely (!f.IsArray()) { + throw Error(errLogic, "Error parsing msgpack field '%s' - got array, expected scalar %s", f.Name(), f.Type().Name()); + } auto& array = builder.ArrayRef(tagName, field, count); iterateOverArray(begin, end, pl, array); } else { @@ -123,17 +124,18 @@ void MsgPackDecoder::decode(Payload& pl, CJsonBuilder& builder, const msgpack_ob Error MsgPackDecoder::Decode(std::string_view buf, Payload& pl, WrSerializer& wrser, size_t& offset) { try { + objectScalarIndexes_.reset(); tagsPath_.clear(); size_t baseOffset = offset; MsgPackValue data = parser_.Parse(buf, offset); - if (!data.p) return Error(errLogic, "Error unpacking msgpack data"); - if (data.p->type != MSGPACK_OBJECT_MAP) { + if rx_unlikely (!data.p) return Error(errLogic, "Error unpacking msgpack data"); + if rx_unlikely (data.p->type != MSGPACK_OBJECT_MAP) { std::string_view slice = buf.substr(baseOffset, 16); return Error(errNotValid, "Unexpected MsgPack value. Expected %s, but got %s(%d) at %d(~>\"%s\"...)", ToString(MSGPACK_OBJECT_MAP), ToString(data.p->type), data.p->type, baseOffset, slice); } - CJsonBuilder cjsonBuilder(wrser, ObjType::TypePlain, tm_, 0); + CJsonBuilder cjsonBuilder(wrser, ObjType::TypePlain, &tm_, 0); decode(pl, cjsonBuilder, *(data.p), 0); } catch (const Error& err) { return err; @@ -144,7 +146,7 @@ Error MsgPackDecoder::Decode(std::string_view buf, Payload& pl, WrSerializer& wr return Error(errNotValid, "Unexpected exception"); } - return errOK; + return Error(); } constexpr std::string_view ToString(msgpack_object_type type) { @@ -177,9 +179,8 @@ constexpr std::string_view ToString(msgpack_object_type type) { return "BIN"sv; case MSGPACK_OBJECT_EXT: return "EXT"sv; - default: - return "UNKNOWN_TYPE"sv; } + return "UNKNOWN_TYPE"sv; } } // namespace reindexer diff --git a/cpp_src/core/cjson/msgpackdecoder.h b/cpp_src/core/cjson/msgpackdecoder.h index 6cd3b552d..042b05263 100644 --- a/cpp_src/core/cjson/msgpackdecoder.h +++ b/cpp_src/core/cjson/msgpackdecoder.h @@ -14,7 +14,7 @@ class WrSerializer; class MsgPackDecoder { public: - explicit MsgPackDecoder(TagsMatcher* tagsMatcher); + explicit MsgPackDecoder(TagsMatcher& tagsMatcher) noexcept : tm_(tagsMatcher) {} Error Decode(std::string_view buf, Payload& pl, WrSerializer& wrser, size_t& offset); private: @@ -27,10 +27,11 @@ class MsgPackDecoder { void setValue(Payload& pl, CJsonBuilder& builder, const T& value, int tagName); bool isInArray() const noexcept { return arrayLevel_ > 0; } - TagsMatcher* tm_; + TagsMatcher& tm_; TagsPath tagsPath_; MsgPackParser parser_; int32_t arrayLevel_ = 0; + ScalarIndexesSetT objectScalarIndexes_; }; constexpr std::string_view ToString(msgpack_object_type type); diff --git a/cpp_src/core/cjson/objtype.h b/cpp_src/core/cjson/objtype.h index 17c248e43..f1cbd208b 100644 --- a/cpp_src/core/cjson/objtype.h +++ b/cpp_src/core/cjson/objtype.h @@ -1,8 +1,12 @@ #pragma once +namespace reindexer { + enum class ObjType { TypeObject, TypeArray, TypeObjectArray, TypePlain, }; + +} diff --git a/cpp_src/core/cjson/protobufbuilder.cc b/cpp_src/core/cjson/protobufbuilder.cc index f89b3401b..c9237a642 100644 --- a/cpp_src/core/cjson/protobufbuilder.cc +++ b/cpp_src/core/cjson/protobufbuilder.cc @@ -191,7 +191,7 @@ void ProtobufBuilder::put(int fieldIdx, const Variant& val) { [&](KeyValueType::Tuple) { auto arrNode = ArrayPacked(fieldIdx); for (auto& itVal : val.getCompositeValues()) { - arrNode.Put(fieldIdx, itVal); + arrNode.Put(fieldIdx, itVal, 0); } }, [&](KeyValueType::Uuid) { put(fieldIdx, Uuid{val}); }, diff --git a/cpp_src/core/cjson/protobufbuilder.h b/cpp_src/core/cjson/protobufbuilder.h index 73b488352..0a75fa5db 100644 --- a/cpp_src/core/cjson/protobufbuilder.h +++ b/cpp_src/core/cjson/protobufbuilder.h @@ -25,7 +25,7 @@ enum ProtobufTypes { class ProtobufBuilder { public: - ProtobufBuilder() + ProtobufBuilder() noexcept : type_(ObjType::TypePlain), ser_(nullptr), tm_(nullptr), @@ -48,23 +48,23 @@ class ProtobufBuilder { ProtobufBuilder& operator=(const ProtobufBuilder&) = delete; ~ProtobufBuilder() { End(); } - void SetTagsMatcher(const TagsMatcher* tm) { tm_ = tm; } - void SetTagsPath(const TagsPath* tagsPath) { tagsPath_ = tagsPath; } + void SetTagsMatcher(const TagsMatcher* tm) noexcept { tm_ = tm; } + void SetTagsPath(const TagsPath* tagsPath) noexcept { tagsPath_ = tagsPath; } template - ProtobufBuilder& Put(int fieldIdx, const T& val) { + ProtobufBuilder& Put(int fieldIdx, const T& val, int /*offset*/ = 0) { put(fieldIdx, val); return *this; } template - ProtobufBuilder& Put(std::string_view tagName, const T& val) { + ProtobufBuilder& Put(std::string_view tagName, const T& val, int /*offset*/ = 0) { put(tm_->name2tag(tagName), val); return *this; } template - ProtobufBuilder& Null(T) { + ProtobufBuilder& Null(T) noexcept { return *this; } diff --git a/cpp_src/core/cjson/protobufdecoder.cc b/cpp_src/core/cjson/protobufdecoder.cc index bd9c70ca7..2a1478cd0 100644 --- a/cpp_src/core/cjson/protobufdecoder.cc +++ b/cpp_src/core/cjson/protobufdecoder.cc @@ -6,15 +6,13 @@ namespace reindexer { -ArraysStorage::ArraysStorage(TagsMatcher& tm) : tm_(tm) {} - void ArraysStorage::UpdateArraySize(int tagName, int field) { GetArray(tagName, field); } CJsonBuilder& ArraysStorage::GetArray(int tagName, int field) { assertrx(indexes_.size() > 0); auto it = data_.find(tagName); if (it == data_.end()) { - indexes_.back().push_back(tagName); + indexes_.back().emplace_back(tagName); auto itArrayData = data_.emplace(std::piecewise_construct, std::forward_as_tuple(tagName), std::forward_as_tuple(&tm_, tagName, field)); itArrayData.first->second.size = 1; @@ -44,19 +42,21 @@ void ArraysStorage::onObjectBuilt(CJsonBuilder& parent) { indexes_.pop_back(); } -ProtobufDecoder::ProtobufDecoder(TagsMatcher& tagsMatcher, std::shared_ptr schema) - : tm_(tagsMatcher), schema_(std::move(schema)), arraysStorage_(tm_) {} - void ProtobufDecoder::setValue(Payload& pl, CJsonBuilder& builder, ProtobufValue item) { int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); auto value = item.value.convert(item.itemType); if (field > 0) { - pl.Set(field, {value}, true); + const auto& f = pl.Type().Field(field); + if rx_unlikely (!f.IsArray() && objectScalarIndexes_.test(field)) { + throw Error(errLogic, "Non-array field '%s' [%d] from '%s' can only be encoded once.", f.Name(), field, pl.Type().Name()); + } if (item.isArray) { arraysStorage_.UpdateArraySize(item.tagName, field); } else { builder.Ref(item.tagName, value, field); } + pl.Set(field, std::move(value), true); + objectScalarIndexes_.set(field); } else { if (item.isArray) { auto& array = arraysStorage_.GetArray(item.tagName); @@ -73,10 +73,14 @@ Error ProtobufDecoder::decodeArray(Payload& pl, CJsonBuilder& builder, const Pro bool packed = item.IsOfPrimitiveType(); int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); if (field > 0) { + auto& f = pl.Type().Field(field); + if rx_unlikely (!f.IsArray()) { + throw Error(errLogic, "Error parsing protobuf field '%s' - got array, expected scalar %s", f.Name(), f.Type().Name()); + } if (packed) { int count = 0; while (!parser.IsEof()) { - pl.Set(field, {parser.ReadArrayItem(item.itemType)}, true); + pl.Set(field, parser.ReadArrayItem(item.itemType), true); ++count; } builder.ArrayRef(item.tagName, field, count); @@ -91,7 +95,7 @@ Error ProtobufDecoder::decodeArray(Payload& pl, CJsonBuilder& builder, const Pro } } else { if (item.itemType.Is()) { - Error status{errOK}; + Error status; CJsonProtobufObjectBuilder obj(array, 0, arraysStorage_); while (status.ok() && !parser.IsEof()) { status = decode(pl, obj, parser.ReadValue()); @@ -101,7 +105,7 @@ Error ProtobufDecoder::decodeArray(Payload& pl, CJsonBuilder& builder, const Pro } } } - return errOK; + return Error(); } Error ProtobufDecoder::decode(Payload& pl, CJsonBuilder& builder, const ProtobufValue& item) { @@ -138,7 +142,7 @@ Error ProtobufDecoder::decode(Payload& pl, CJsonBuilder& builder, const Protobuf } Error ProtobufDecoder::decodeObject(Payload& pl, CJsonBuilder& builder, ProtobufObject& object) { - Error status{errOK}; + Error status; ProtobufParser parser(object); while (status.ok() && !parser.IsEof()) { status = decode(pl, builder, parser.ReadValue()); @@ -149,6 +153,7 @@ Error ProtobufDecoder::decodeObject(Payload& pl, CJsonBuilder& builder, Protobuf Error ProtobufDecoder::Decode(std::string_view buf, Payload& pl, WrSerializer& wrser) { try { tagsPath_.clear(); + objectScalarIndexes_.reset(); CJsonProtobufObjectBuilder cjsonBuilder(arraysStorage_, wrser, &tm_, 0); ProtobufObject object(buf, *schema_, tagsPath_, tm_); return decodeObject(pl, cjsonBuilder, object); diff --git a/cpp_src/core/cjson/protobufdecoder.h b/cpp_src/core/cjson/protobufdecoder.h index fce1f432c..7cba753d7 100644 --- a/cpp_src/core/cjson/protobufdecoder.h +++ b/cpp_src/core/cjson/protobufdecoder.h @@ -12,7 +12,7 @@ struct ProtobufObject; class ArraysStorage { public: - explicit ArraysStorage(TagsMatcher& tm); + explicit ArraysStorage(TagsMatcher& tm) noexcept : tm_(tm) {} ArraysStorage(const ArraysStorage&) = delete; ArraysStorage(ArraysStorage&&) = delete; ArraysStorage& operator=(const ArraysStorage&) = delete; @@ -68,7 +68,8 @@ class CJsonProtobufObjectBuilder { class ProtobufDecoder { public: - ProtobufDecoder(TagsMatcher& tagsMatcher, std::shared_ptr schema); + ProtobufDecoder(TagsMatcher& tagsMatcher, std::shared_ptr schema) noexcept + : tm_(tagsMatcher), schema_(std::move(schema)), arraysStorage_(tm_) {} ProtobufDecoder(const ProtobufDecoder&) = delete; ProtobufDecoder(ProtobufDecoder&&) = delete; ProtobufDecoder& operator=(const ProtobufDecoder&) = delete; @@ -86,6 +87,7 @@ class ProtobufDecoder { std::shared_ptr schema_; TagsPath tagsPath_; ArraysStorage arraysStorage_; + ScalarIndexesSetT objectScalarIndexes_; }; } // namespace reindexer diff --git a/cpp_src/core/cjson/protobufschemabuilder.cc b/cpp_src/core/cjson/protobufschemabuilder.cc index 99bc5e1e3..068f0db29 100644 --- a/cpp_src/core/cjson/protobufschemabuilder.cc +++ b/cpp_src/core/cjson/protobufschemabuilder.cc @@ -92,7 +92,7 @@ void ProtobufSchemaBuilder::Field(std::string_view name, int tagName, const Fiel ProtobufSchemaBuilder ProtobufSchemaBuilder::Object(int tagName, std::string_view name, bool buildTypesOnly, const std::function& filler) { - fieldsTypes_->tagsPath_.push_back(tagName); + fieldsTypes_->tagsPath_.emplace_back(tagName); fieldsTypes_->AddObject(std::string{name}); ProtobufSchemaBuilder obj(buildTypesOnly ? nullptr : ser_, fieldsTypes_, ObjType::TypeObject, name, pt_, tm_); if (filler) { diff --git a/cpp_src/core/cjson/tagsmatcher.h b/cpp_src/core/cjson/tagsmatcher.h index ed575d3db..22278dfeb 100644 --- a/cpp_src/core/cjson/tagsmatcher.h +++ b/cpp_src/core/cjson/tagsmatcher.h @@ -43,10 +43,10 @@ class TagsMatcher { auto res = impl_->path2indexedtag(jsonPath, ev); return res.empty() && canAdd ? impl_.clone()->path2indexedtag(jsonPath, ev, canAdd, updated_) : res; } - int version() const { return impl_->version(); } - size_t size() const { return impl_->size(); } - bool isUpdated() const { return updated_; } - uint32_t stateToken() const { return impl_->stateToken(); } + int version() const noexcept { return impl_->version(); } + size_t size() const noexcept { return impl_->size(); } + bool isUpdated() const noexcept { return updated_; } + uint32_t stateToken() const noexcept { return impl_->stateToken(); } void clear() { impl_.clone()->clear(); } void serialize(WrSerializer& ser) const { impl_->serialize(ser); } void deserialize(Serializer& ser) { @@ -57,8 +57,8 @@ class TagsMatcher { impl_.clone()->deserialize(ser, version, stateToken); impl_.clone()->buildTagsCache(updated_); } - void clearUpdated() { updated_ = false; } - void setUpdated() { updated_ = true; } + void clearUpdated() noexcept { updated_ = false; } + void setUpdated() noexcept { updated_ = true; } bool try_merge(const TagsMatcher& tm) { auto tmp = impl_; diff --git a/cpp_src/core/cjson/tagsmatcherimpl.h b/cpp_src/core/cjson/tagsmatcherimpl.h index 170e107b0..c468fbf94 100644 --- a/cpp_src/core/cjson/tagsmatcherimpl.h +++ b/cpp_src/core/cjson/tagsmatcherimpl.h @@ -50,7 +50,7 @@ class TagsMatcherImpl { fieldTags.clear(); return fieldTags; } - fieldTags.push_back(static_cast(fieldTag)); + fieldTags.emplace_back(static_cast(fieldTag)); } } return fieldTags; @@ -251,9 +251,9 @@ class TagsMatcherImpl { return modified; } - size_t size() const { return tags2names_.size(); } - int version() const { return version_; } - int stateToken() const { return stateToken_; } + size_t size() const noexcept { return tags2names_.size(); } + int version() const noexcept { return version_; } + int stateToken() const noexcept { return stateToken_; } void clear() { names2tags_.clear(); diff --git a/cpp_src/core/cjson/uuid_recoders.h b/cpp_src/core/cjson/uuid_recoders.h index 24c4f1db3..fbc0d60fe 100644 --- a/cpp_src/core/cjson/uuid_recoders.h +++ b/cpp_src/core/cjson/uuid_recoders.h @@ -57,7 +57,7 @@ class RecoderStringToUuidArray : public Recoder { void Recode(Serializer &, WrSerializer &) const override final { assertrx(0); } void Recode(Serializer &rdser, Payload &pl, int tagName, WrSerializer &wrser) override final { if (fromNotArrayField_) { - pl.Set(field_, {Variant{rdser.GetStrUuid()}}, true); + pl.Set(field_, Variant{rdser.GetStrUuid()}, true); wrser.PutCTag(ctag{TAG_ARRAY, tagName, field_}); wrser.PutVarUint(1); } else { @@ -98,7 +98,7 @@ class RecoderStringToUuid : public Recoder { [[nodiscard]] bool Match(const TagsPath &) const noexcept override final { return false; } void Recode(Serializer &, WrSerializer &) const override final { assertrx(0); } void Recode(Serializer &rdser, Payload &pl, int tagName, WrSerializer &wrser) override final { - pl.Set(field_, {Variant{rdser.GetStrUuid()}}, true); + pl.Set(field_, Variant{rdser.GetStrUuid()}, true); wrser.PutCTag(ctag{TAG_UUID, tagName, field_}); } diff --git a/cpp_src/core/clusterproxy.cc b/cpp_src/core/clusterproxy.cc index 91f626589..ed7035dfd 100644 --- a/cpp_src/core/clusterproxy.cc +++ b/cpp_src/core/clusterproxy.cc @@ -1,7 +1,11 @@ #include "clusterproxy.h" +#include "core/cjson/jsonbuilder.h" #include "core/defnsconfigs.h" #include "estl/shared_mutex.h" +#include "namespacedef.h" +#include "tools/catch_and_return.h" + namespace reindexer { using namespace std::string_view_literals; @@ -183,4 +187,56 @@ bool ClusterProxy::shouldProxyQuery(const Query &q) { return isClusterReplQuery; } +[[nodiscard]] Error ClusterProxy::ResetShardingConfig(std::optional config) noexcept { + try { + impl_.shardingConfig_.Set(std::move(config)); + return impl_.tryLoadShardingConf(); + } + CATCH_AND_RETURN +} + +#ifdef _MSC_VER +#define REINDEXER_FUNC_NAME __FUNCSIG__ +#else +#define REINDEXER_FUNC_NAME __PRETTY_FUNCTION__ +#endif + +template +[[nodiscard]] Error ClusterProxy::shardingConfigCandidateAction(const RdxContext &ctx, Args &&...args) noexcept { + try { + const auto action = [this](const RdxContext &c, LeaderRefT l, Args &&...aa) { + return baseFollowerAction(c, l, std::forward(aa)...); + }; + + clusterProxyLog(LogTrace, "[%d proxy] %s", getServerIDRel(), REINDEXER_FUNC_NAME); + // kReplicationStatsNamespace required for impl_.NamespaceIsInClusterConfig(nsName) in proxyCall was true always + return proxyCall(ctx, kReplicationStatsNamespace, action, std::forward(args)...); + } + CATCH_AND_RETURN +} + +[[nodiscard]] Error ClusterProxy::SaveShardingCfgCandidate(std::string_view config, int64_t sourceId, const RdxContext &ctx) noexcept { + return shardingConfigCandidateAction<&client::Reindexer::SaveNewShardingConfig, &ReindexerImpl::saveShardingCfgCandidate>(ctx, config, + sourceId); +} + +[[nodiscard]] Error ClusterProxy::ApplyShardingCfgCandidate(int64_t sourceId, const RdxContext &ctx) noexcept { + return shardingConfigCandidateAction<&client::Reindexer::ApplyNewShardingConfig, &ReindexerImpl::applyShardingCfgCandidate>(ctx, + sourceId); +} + +[[nodiscard]] Error ClusterProxy::ResetOldShardingConfig(int64_t sourceId, const RdxContext &ctx) noexcept { + return shardingConfigCandidateAction<&client::Reindexer::ResetOldShardingConfig, &ReindexerImpl::resetOldShardingConfig>(ctx, sourceId); +} + +[[nodiscard]] Error ClusterProxy::ResetShardingConfigCandidate(int64_t sourceId, const RdxContext &ctx) noexcept { + return shardingConfigCandidateAction<&client::Reindexer::ResetShardingConfigCandidate, &ReindexerImpl::resetShardingConfigCandidate>( + ctx, sourceId); +} + +[[nodiscard]] Error ClusterProxy::RollbackShardingConfigCandidate(int64_t sourceId, const RdxContext &ctx) noexcept { + return shardingConfigCandidateAction<&client::Reindexer::RollbackShardingConfigCandidate, + &ReindexerImpl::rollbackShardingConfigCandidate>(ctx, sourceId); +} + } // namespace reindexer diff --git a/cpp_src/core/clusterproxy.h b/cpp_src/core/clusterproxy.h index 696e9696b..99e4a9380 100644 --- a/cpp_src/core/clusterproxy.h +++ b/cpp_src/core/clusterproxy.h @@ -3,6 +3,7 @@ #include #include "client/itemimpl.h" #include "client/reindexer.h" +#include "cluster/config.h" #include "cluster/consts.h" #include "core/reindexerimpl.h" #include "tools/clusterproxyloghelper.h" @@ -217,7 +218,7 @@ class ClusterProxy { Error SetClusterizationStatus(std::string_view nsName, const ClusterizationStatus &status, const RdxContext &ctx) { return impl_.SetClusterizationStatus(nsName, status, ctx); } - bool NeedTraceActivity() { return impl_.NeedTraceActivity(); } + bool NeedTraceActivity() const noexcept { return impl_.NeedTraceActivity(); } Error EnableStorage(const std::string &storagePath, bool skipPlaceholderCheck, const RdxContext &ctx) { return impl_.EnableStorage(storagePath, skipPlaceholderCheck, ctx); } @@ -260,11 +261,24 @@ class ClusterProxy { resetLeader(); } - const atomic_unique_ptr &GetShardingConfig() const noexcept { return impl_.shardingConfig_; } + intrusive_ptr> GetShardingConfig() const noexcept { + return impl_.shardingConfig_.Get(); + } Namespace::Ptr GetNamespacePtr(std::string_view nsName, const RdxContext &ctx) { return impl_.getNamespace(nsName, ctx); } + Namespace::Ptr GetNamespacePtrNoThrow(std::string_view nsName, const RdxContext &ctx) { return impl_.getNamespaceNoThrow(nsName, ctx); } + PayloadType GetPayloadType(std::string_view nsName) { return impl_.getPayloadType(nsName); } std::set GetFTIndexes(std::string_view nsName) { return impl_.getFTIndexes(nsName); } + [[nodiscard]] Error ResetShardingConfig(std::optional config = std::nullopt) noexcept; + void SaveNewShardingConfigFile(const cluster::ShardingConfig &config) const { impl_.saveNewShardingConfigFile(config); } + + [[nodiscard]] Error SaveShardingCfgCandidate(std::string_view config, int64_t sourceId, const RdxContext &ctx) noexcept; + [[nodiscard]] Error ApplyShardingCfgCandidate(int64_t sourceId, const RdxContext &ctx) noexcept; + [[nodiscard]] Error ResetOldShardingConfig(int64_t sourceId, const RdxContext &ctx) noexcept; + [[nodiscard]] Error ResetShardingConfigCandidate(int64_t sourceId, const RdxContext &ctx) noexcept; + [[nodiscard]] Error RollbackShardingConfigCandidate(int64_t sourceId, const RdxContext &ctx) noexcept; + private: static constexpr auto kReplicationStatsTimeout = std::chrono::seconds(10); static constexpr uint32_t kMaxClusterProxyConnCount = 64; @@ -368,6 +382,9 @@ class ClusterProxy { } } + template + [[nodiscard]] Error shardingConfigCandidateAction(const RdxContext &ctx, Args &&...args) noexcept; + #if RX_ENABLE_CLUSTERPROXY_LOGS template ::value>::type * = nullptr> diff --git a/cpp_src/core/comparator.cc b/cpp_src/core/comparator.cc index 3c0c00bdc..08d001339 100644 --- a/cpp_src/core/comparator.cc +++ b/cpp_src/core/comparator.cc @@ -14,12 +14,33 @@ Comparator::Comparator(CondType cond, KeyValueType type, const VariantArray &val cmpGeom(distinct), cmpUuid(distinct) { if (type.Is()) assertrx(fields_.size() > 0); - if (cond_ == CondEq && values.size() != 1) cond_ = CondSet; - if (cond_ == CondAllSet && values.size() == 1) cond_ = CondEq; - if (cond_ == CondDWithin) { - cmpGeom.SetValues(values); - } else { - setValues(values); + switch (cond) { + case CondEq: + if (values.size() != 1) { + cond_ = CondSet; + } + setValues(values); + break; + case CondSet: + case CondAllSet: + if (values.size() == 1) { + cond_ = CondEq; + } + [[fallthrough]]; + case CondLt: + case CondLe: + case CondGt: + case CondGe: + case CondLike: + case CondRange: + setValues(values); + break; + case CondDWithin: + cmpGeom.SetValues(values); + break; + case CondEmpty: + case CondAny: + break; } } @@ -114,7 +135,7 @@ bool Comparator::Compare(const PayloadValue &data, int rowId) { const uint8_t *ptr = data.Ptr() + arr->offset; if (cond_ == CondDWithin) { if (arr->len != 2 || !type_.Is()) throw Error(errQueryExec, "DWithin with not point data"); - return cmpGeom.Compare({*reinterpret_cast(ptr), *reinterpret_cast(ptr + sizeof_)}); + return cmpGeom.Compare(Point{*reinterpret_cast(ptr), *reinterpret_cast(ptr + sizeof_)}); } for (int i = 0; i < arr->len; ++i, ptr += sizeof_) { @@ -182,7 +203,7 @@ void Comparator::ExcludeDistinct(const PayloadValue &data, int rowId) { uint8_t *ptr = data.Ptr() + arr->offset; if (cond_ == CondDWithin) { if (arr->len != 2 || !type_.Is()) throw Error(errQueryExec, "DWithin with not point data"); - return cmpGeom.ExcludeDistinct({*reinterpret_cast(ptr), *reinterpret_cast(ptr + sizeof_)}); + return cmpGeom.ExcludeDistinct(Point{*reinterpret_cast(ptr), *reinterpret_cast(ptr + sizeof_)}); } for (int i = 0; i < arr->len; i++, ptr += sizeof_) excludeDistinct(ptr); diff --git a/cpp_src/core/comparatorimpl.h b/cpp_src/core/comparatorimpl.h index 3135ed431..30868e678 100644 --- a/cpp_src/core/comparatorimpl.h +++ b/cpp_src/core/comparatorimpl.h @@ -97,9 +97,9 @@ class ComparatorImpl { case CondLike: return false; case CondDWithin: - default: - abort(); + break; } + std::abort(); } bool Compare(CondType cond, T lhs) { bool ret = Compare2(cond, lhs); @@ -122,12 +122,19 @@ class ComparatorImpl { private: KeyValueType type() { - if constexpr (std::is_same_v) return KeyValueType::Int{}; - if constexpr (std::is_same_v) return KeyValueType::Bool{}; - if constexpr (std::is_same_v) return KeyValueType::Int64{}; - if constexpr (std::is_same_v) return KeyValueType::Double{}; - if constexpr (std::is_same_v) return KeyValueType::Uuid{}; - std::abort(); + if constexpr (std::is_same_v) + return KeyValueType::Int{}; + else if constexpr (std::is_same_v) + return KeyValueType::Bool{}; + else if constexpr (std::is_same_v) + return KeyValueType::Int64{}; + else if constexpr (std::is_same_v) + return KeyValueType::Double{}; + else if constexpr (std::is_same_v) + return KeyValueType::Uuid{}; + else { + static_assert(std::is_same_v, "Unknown KeyValueType"); + } } void addValue(CondType cond, T value) { @@ -203,9 +210,9 @@ class ComparatorImpl { case CondLike: return false; case CondDWithin: - default: - abort(); + break; } + std::abort(); } bool Compare(CondType cond, Uuid lhs) { bool ret = Compare2(cond, lhs); @@ -287,9 +294,9 @@ class ComparatorImpl { return matchLikePattern(std::string_view(lhs), rhs); } case CondDWithin: - default: - abort(); + break; } + std::abort(); } bool Compare(CondType cond, p_string lhs, const CollateOpts &collateOpts) { bool ret = Compare2(cond, lhs, collateOpts); @@ -326,7 +333,7 @@ class ComparatorImpl { if (cond == CondSet || cond == CondAllSet) { valuesS_->emplace(value); } else { - values_.push_back(value); + values_.emplace_back(value); if (values_.size() == 1) { cachedValueSV_ = std::string_view(*values_[0]); } @@ -393,9 +400,9 @@ class ComparatorImpl { case CondLike: return false; case CondDWithin: - default: - abort(); + break; } + std::abort(); } void ClearAllSetValues() { assertrx(allSetValuesSet_); diff --git a/cpp_src/core/compositearraycomparator.cc b/cpp_src/core/compositearraycomparator.cc index e1e40dbb2..6e58fdf98 100644 --- a/cpp_src/core/compositearraycomparator.cc +++ b/cpp_src/core/compositearraycomparator.cc @@ -6,8 +6,7 @@ CompositeArrayComparator::CompositeArrayComparator() {} void CompositeArrayComparator::BindField(int field, const VariantArray &values, CondType condType) { fields_.push_back(field); - ctx_.push_back(Context()); - Context &ctx = ctx_.back(); + Context &ctx = ctx_.emplace_back(); ctx.cond = condType; ctx.cmpBool.SetValues(condType, values); @@ -22,8 +21,7 @@ void CompositeArrayComparator::BindField(int field, const VariantArray &values, void CompositeArrayComparator::BindField(const TagsPath &tagsPath, const VariantArray &values, CondType condType) { fields_.push_back(tagsPath); - ctx_.push_back(Context()); - Context &ctx = ctx_.back(); + Context &ctx = ctx_.emplace_back(); ctx.cond = condType; ctx.cmpBool.SetValues(condType, values); @@ -40,16 +38,17 @@ bool CompositeArrayComparator::Compare(const PayloadValue &pv, const ComparatorV h_vector vals; size_t tagsPathIdx = 0; + vals.reserve(fields_.size()); for (size_t j = 0; j < fields_.size(); ++j) { - vals.push_back({}); + auto &v = vals.emplace_back(); bool isRegularIndex = fields_[j] != IndexValueType::SetByJsonPath && fields_[j] < vars.payloadType_.NumFields(); if (isRegularIndex) { - pl.Get(fields_[j], vals.back()); + pl.Get(fields_[j], v); } else { assertrx(tagsPathIdx < fields_.getTagsPathsLength()); - pl.GetByJsonPath(fields_.getTagsPath(tagsPathIdx++), vals.back(), KeyValueType::Undefined{}); + pl.GetByJsonPath(fields_.getTagsPath(tagsPathIdx++), v, KeyValueType::Undefined{}); } - if (vals.back().size() < len) len = vals.back().size(); + if (v.size() < len) len = vals.back().size(); } for (size_t i = 0; i < len; ++i) { diff --git a/cpp_src/core/dbconfig.cc b/cpp_src/core/dbconfig.cc index 5efd9e25a..03485a8cf 100644 --- a/cpp_src/core/dbconfig.cc +++ b/cpp_src/core/dbconfig.cc @@ -111,7 +111,7 @@ Error DBConfigProvider::FromJSON(const gason::JsonNode &root, bool autoCorrect) // Applying entire configuration only if no read errors if (errLogString.empty()) { if (typesChanged.find(ProfilingConf) != typesChanged.end()) { - profilingData_ = std::move(profilingDataSafe); + profilingData_ = profilingDataSafe; } if (typesChanged.find(NamespaceDataConf) != typesChanged.end()) { namespacesData_ = std::move(namespacesData); @@ -180,7 +180,7 @@ Error DBConfigProvider::GetConfigParseErrors() const { } void DBConfigProvider::setHandler(ConfigType cfgType, std::function handler) { - smart_lock lk(mtx_, true); + std::lock_guard lk(mtx_); handlers_[cfgType] = std::move(handler); } @@ -196,33 +196,8 @@ void DBConfigProvider::unsetHandler(int id) { replicationConfigDataHandlers_.erase(id); } -ProfilingConfigData DBConfigProvider::GetProfilingConfig() { - smart_lock lk(mtx_, false); - return profilingData_; -} - -LongQueriesLoggingParams DBConfigProvider::GetSelectLoggingParams() { - smart_lock lk(mtx_, false); - return profilingData_.longSelectLoggingParams; -} - -LongQueriesLoggingParams DBConfigProvider::GetUpdDelLoggingParams() { - smart_lock lk(mtx_, false); - return profilingData_.longUpdDelLoggingParams; -} - -LongTxLoggingParams DBConfigProvider::GetTxLoggingParams() { - smart_lock lk(mtx_, false); - return profilingData_.longTxLoggingParams; -} - -bool DBConfigProvider::ActivityStatsEnabled() { - smart_lock lk(mtx_, false); - return profilingData_.activityStats; -} - ReplicationConfigData DBConfigProvider::GetReplicationConfig() { - smart_lock lk(mtx_, false); + shared_lock lk(mtx_); return replicationData_; } @@ -232,7 +207,7 @@ cluster::AsyncReplConfigData DBConfigProvider::GetAsyncReplicationConfig() { } bool DBConfigProvider::GetNamespaceConfig(const std::string &nsName, NamespaceConfigData &data) { - smart_lock lk(mtx_, false); + shared_lock lk(mtx_); auto it = namespacesData_.find(nsName); if (it == namespacesData_.end()) { it = namespacesData_.find("*"); @@ -249,7 +224,7 @@ Error ProfilingConfigData::FromJSON(const gason::JsonNode &v) { using namespace std::string_view_literals; std::string errorString; tryReadOptionalJsonValue(&errorString, v, "queriesperfstats"sv, queriesPerfStats); - tryReadOptionalJsonValue(&errorString, v, "queries_threshold_us"sv, queriedThresholdUS); + tryReadOptionalJsonValue(&errorString, v, "queries_threshold_us"sv, queriesThresholdUS); tryReadOptionalJsonValue(&errorString, v, "perfstats"sv, perfStats); tryReadOptionalJsonValue(&errorString, v, "memstats"sv, memStats); tryReadOptionalJsonValue(&errorString, v, "activitystats"sv, activityStats); @@ -258,25 +233,33 @@ Error ProfilingConfigData::FromJSON(const gason::JsonNode &v) { if (!longQueriesLogging.empty()) { auto &select = longQueriesLogging["select"sv]; if (!select.empty()) { - tryReadOptionalJsonValue(&errorString, select, "threshold_us"sv, longSelectLoggingParams.thresholdUs); - tryReadOptionalJsonValue(&errorString, select, "normalized"sv, longSelectLoggingParams.normalized); + const auto p = longSelectLoggingParams.load(std::memory_order_relaxed); + int32_t thresholdUs = p.thresholdUs; + bool normalized = p.normalized; + tryReadOptionalJsonValue(&errorString, select, "threshold_us"sv, thresholdUs); + tryReadOptionalJsonValue(&errorString, select, "normalized"sv, normalized); + longSelectLoggingParams.store(LongQueriesLoggingParams(thresholdUs, normalized), std::memory_order_relaxed); } auto &updateDelete = longQueriesLogging["update_delete"sv]; if (!updateDelete.empty()) { - tryReadOptionalJsonValue(&errorString, updateDelete, "threshold_us"sv, longUpdDelLoggingParams.thresholdUs); - tryReadOptionalJsonValue(&errorString, updateDelete, "normalized"sv, longUpdDelLoggingParams.normalized); + const auto p = longUpdDelLoggingParams.load(std::memory_order_relaxed); + int32_t thresholdUs = p.thresholdUs; + bool normalized = p.normalized; + tryReadOptionalJsonValue(&errorString, updateDelete, "threshold_us"sv, thresholdUs); + tryReadOptionalJsonValue(&errorString, updateDelete, "normalized"sv, normalized); + longUpdDelLoggingParams.store(LongQueriesLoggingParams(thresholdUs, normalized), std::memory_order_relaxed); } auto &transaction = longQueriesLogging["transaction"sv]; if (!transaction.empty()) { - int32_t value = longTxLoggingParams.thresholdUs; - tryReadOptionalJsonValue(&errorString, transaction, "threshold_us"sv, value); - longTxLoggingParams.thresholdUs = value; + const auto p = longTxLoggingParams.load(std::memory_order_relaxed); + int32_t thresholdUs = p.thresholdUs; + tryReadOptionalJsonValue(&errorString, transaction, "threshold_us"sv, thresholdUs); - value = longTxLoggingParams.avgTxStepThresholdUs; - tryReadOptionalJsonValue(&errorString, transaction, "avg_step_threshold_us"sv, value); - longTxLoggingParams.avgTxStepThresholdUs = value; + int32_t avgTxStepThresholdUs = p.avgTxStepThresholdUs; + tryReadOptionalJsonValue(&errorString, transaction, "avg_step_threshold_us"sv, avgTxStepThresholdUs); + longTxLoggingParams.store(LongTxLoggingParams(thresholdUs, avgTxStepThresholdUs), std::memory_order_relaxed); } } diff --git a/cpp_src/core/dbconfig.h b/cpp_src/core/dbconfig.h index 706f07e79..4de3c7d57 100644 --- a/cpp_src/core/dbconfig.h +++ b/cpp_src/core/dbconfig.h @@ -23,13 +23,19 @@ class WrSerializer; enum ConfigType { ProfilingConf, NamespaceDataConf, AsyncReplicationConf, ReplicationConf }; -struct LongQueriesLoggingParams { - int32_t thresholdUs = -1; - bool normalized = false; +class LongQueriesLoggingParams { +public: + LongQueriesLoggingParams(int32_t t = -1, bool n = false) noexcept : thresholdUs(t), normalized(n ? 1 : 0) {} + + // Do not using int32 + bool here due to MSVC compatibility reasons (alignof should not be less than sizeof in this case to use it in + // atomic). + int64_t thresholdUs : 32; + int64_t normalized : 1; }; -struct LongTxLoggingParams { - LongTxLoggingParams() noexcept : thresholdUs(-1), avgTxStepThresholdUs(-1) {} +class LongTxLoggingParams { +public: + LongTxLoggingParams(int32_t t = -1, int32_t a = -1) noexcept : thresholdUs(t), avgTxStepThresholdUs(a) {} // Do not using 2 int32's here due to MSVC compatibility reasons (alignof should not be less than sizeof in this case to use it in // atomic). @@ -38,17 +44,30 @@ struct LongTxLoggingParams { int64_t avgTxStepThresholdUs : 32; }; -struct ProfilingConfigData { - bool queriesPerfStats = false; - size_t queriedThresholdUS = 10; - bool perfStats = false; - bool memStats = false; - bool activityStats = false; - LongQueriesLoggingParams longSelectLoggingParams; - LongQueriesLoggingParams longUpdDelLoggingParams; - LongTxLoggingParams longTxLoggingParams; +class ProfilingConfigData { +public: + ProfilingConfigData &operator=(const ProfilingConfigData &d) noexcept { + queriesThresholdUS.store(d.queriesThresholdUS, std::memory_order_relaxed); + queriesPerfStats.store(d.queriesPerfStats, std::memory_order_relaxed); + perfStats.store(d.perfStats, std::memory_order_relaxed); + memStats.store(d.memStats, std::memory_order_relaxed); + activityStats.store(d.activityStats, std::memory_order_relaxed); + longSelectLoggingParams.store(d.longSelectLoggingParams, std::memory_order_relaxed); + longUpdDelLoggingParams.store(d.longUpdDelLoggingParams, std::memory_order_relaxed); + longTxLoggingParams.store(d.longTxLoggingParams, std::memory_order_relaxed); + return *this; + } Error FromJSON(const gason::JsonNode &v); + + std::atomic queriesThresholdUS = {10}; + std::atomic queriesPerfStats = {false}; + std::atomic perfStats = {false}; + std::atomic memStats = {false}; + std::atomic activityStats = {false}; + std::atomic longSelectLoggingParams; + std::atomic longUpdDelLoggingParams; + std::atomic longTxLoggingParams; }; struct NamespaceConfigData { @@ -126,14 +145,21 @@ class DBConfigProvider { int setHandler(std::function handler); void unsetHandler(int id); - ProfilingConfigData GetProfilingConfig(); cluster::AsyncReplConfigData GetAsyncReplicationConfig(); ReplicationConfigData GetReplicationConfig(); bool GetNamespaceConfig(const std::string &nsName, NamespaceConfigData &data); - LongQueriesLoggingParams GetSelectLoggingParams(); - LongQueriesLoggingParams GetUpdDelLoggingParams(); - LongTxLoggingParams GetTxLoggingParams(); - bool ActivityStatsEnabled(); + LongQueriesLoggingParams GetSelectLoggingParams() const noexcept { + return profilingData_.longSelectLoggingParams.load(std::memory_order_relaxed); + } + LongQueriesLoggingParams GetUpdDelLoggingParams() const noexcept { + return profilingData_.longUpdDelLoggingParams.load(std::memory_order_relaxed); + } + LongTxLoggingParams GetTxLoggingParams() const noexcept { return profilingData_.longTxLoggingParams.load(std::memory_order_relaxed); } + bool ActivityStatsEnabled() const noexcept { return profilingData_.activityStats.load(std::memory_order_relaxed); } + bool MemStatsEnabled() const noexcept { return profilingData_.memStats.load(std::memory_order_relaxed); } + bool PerfStatsEnabled() const noexcept { return profilingData_.perfStats.load(std::memory_order_relaxed); } + bool QueriesPerfStatsEnabled() const noexcept { return profilingData_.queriesPerfStats.load(std::memory_order_relaxed); } + unsigned QueriesThresholdUS() const noexcept { return profilingData_.queriesThresholdUS.load(std::memory_order_relaxed); } private: ProfilingConfigData profilingData_; diff --git a/cpp_src/core/defnsconfigs.h b/cpp_src/core/defnsconfigs.h index 8690d44eb..7f66401be 100644 --- a/cpp_src/core/defnsconfigs.h +++ b/cpp_src/core/defnsconfigs.h @@ -13,6 +13,7 @@ constexpr char kActivityStatsNamespace[] = "#activitystats"; constexpr char kClientsStatsNamespace[] = "#clientsstats"; constexpr char kClusterConfigNamespace[] = "#clusterconfig"; const std::string_view kReplicationStatsNamespace = "#replicationstats"; +constexpr char kNsNameField[] = "name"; const std::vector kDefDBConfig = { R"json({ @@ -99,7 +100,7 @@ const std::vector kSystemNsDefs = { NamespaceDef(kConfigNamespace, StorageOpts().Enabled().CreateIfMissing().DropOnFileFormatError()) .AddIndex("type", "hash", "string", IndexOpts().PK()), NamespaceDef(kPerfStatsNamespace, StorageOpts()) - .AddIndex("name", "hash", "string", IndexOpts().PK()) + .AddIndex(kNsNameField, "hash", "string", IndexOpts().PK()) .AddIndex("updates.total_queries_count", "-", "int64", IndexOpts().Dense()) .AddIndex("updates.total_avg_latency_us", "-", "int64", IndexOpts().Dense()) .AddIndex("updates.last_sec_qps", "-", "int64", IndexOpts().Dense()) @@ -130,10 +131,10 @@ const std::vector kSystemNsDefs = { .AddIndex("last_sec_avg_latency_us", "-", "int64", IndexOpts().Dense()) .AddIndex("last_sec_avg_lock_time_us", "-", "int64", IndexOpts().Dense()) .AddIndex("latency_stddev", "-", "double", IndexOpts().Dense()), - NamespaceDef(kNamespacesNamespace, StorageOpts()).AddIndex("name", "hash", "string", IndexOpts().PK()), - NamespaceDef(kPerfStatsNamespace, StorageOpts()).AddIndex("name", "hash", "string", IndexOpts().PK()), + NamespaceDef(kNamespacesNamespace, StorageOpts()).AddIndex(kNsNameField, "hash", "string", IndexOpts().PK()), + NamespaceDef(kPerfStatsNamespace, StorageOpts()).AddIndex(kNsNameField, "hash", "string", IndexOpts().PK()), NamespaceDef(kMemStatsNamespace, StorageOpts()) - .AddIndex("name", "hash", "string", IndexOpts().PK()) + .AddIndex(kNsNameField, "hash", "string", IndexOpts().PK()) .AddIndex("items_count", "-", "int64", IndexOpts().Dense()) .AddIndex("total.data_size", "-", "int64", IndexOpts().Dense()) .AddIndex("total.indexes_size", "-", "int64", IndexOpts().Dense()) diff --git a/cpp_src/core/expressiontree.h b/cpp_src/core/expressiontree.h index 2359eed07..1a16eab63 100644 --- a/cpp_src/core/expressiontree.h +++ b/cpp_src/core/expressiontree.h @@ -465,7 +465,7 @@ class ExpressionTree { void EncloseInBracket(size_t from, size_t to, OperationType op, Args&&... args) { assertrx(to > from); assertrx(to <= container_.size()); - for (unsigned b : activeBrackets_) { + for (unsigned& b : activeBrackets_) { assertrx(b < container_.size()); if (b >= from) ++b; } diff --git a/cpp_src/core/expressiontree.md b/cpp_src/core/expressiontree.md index 1700e7782..772fdea75 100644 --- a/cpp_src/core/expressiontree.md +++ b/cpp_src/core/expressiontree.md @@ -19,9 +19,12 @@ class ExpressionTree; `ExpressionTree` does not support operator precedence. You can support it manually as it done in `QueryEntries` and `SelectIteratorContainer`, or by enclosing higher priority operators in brackets as it done in `SortExpression`. -Here do not used traditional way for constructing of trees with inheritance of nodes, allocations of separate nodes and holding of pointers to they. +Here is not used the traditional way for constructing trees with inheritance of nodes, allocations of separate nodes and holding pointers to them. `ExpressionTree` holds all nodes by value in a vector (`container_`) sequentially in type `Node` based on `variant`. -In order to support lazy copying `Node` can hold a reference to payload of another `Node` by using `ExpressionTree::Ref` type. !Warning! lazy copy should not live over the original one. +In order to support lazy copying `Node` can hold a reference to payload of another `Node` by using `ExpressionTree::Ref` type. + +**Warning**: The lazy copy node shall not live longer than the original one. + Subtree is stored in `container_` just behind its head (`SubTree`) which holds occupied space. For details see examples. This architecture allows to reduce count of allocations and virtual functions calls. @@ -187,21 +190,23 @@ It contains operation (value of `OperationType`) and a value of one of the types - `void Node::Append()` increments size of subexpression if it is head of subexpression, fails otherwise. - `void Node::Erase(size_t)` reduces size of subexpression if it is head of subexpression, fails otherwise. - ```c++ -template -void Node::ExecuteAppropriate(const std::function&... funcs); -template -void Node::ExecuteAppropriate(const std::function&... funcs) const; -``` -invoke appropriate functor if the `Node` holds value of one of `Args...` types or `Ref` where `T` is one of `Args...` types, no functor will be invoked otherwise. + template + void Node::ExecuteAppropriate(const std::function&... funcs); + template + void Node::ExecuteAppropriate(const std::function&... funcs) const; + ``` + + invokes appropriate functor if the `Node` holds value of one of `Args...` types or `Ref`, where `T` is one of `Args...` types, no functor will be invoked otherwise. + - ```c++ -template -R Node::CalculateAppropriate(const std::function& f, const std::function&... funcs) const; -``` -invokes appropriate functor depending on type of value is holded by `Node` and provides returned value. + template + R Node::CalculateAppropriate(const std::function& f, const std::function&... funcs) const; + ``` + invokes appropriate functor depending on type of value is held by `Node` and provides returned value. - `Node Node::MakeLazyCopy()&` -!Warning! the copy should not live over the origin. * returns copy of origin one if it is head of subexpression or holds value of `Ref` type. * returns new `Node` that holds `Ref` which references to payload of origin one if it holds `T` (one of `Ts...`). + > **Warning** the copy shall not live longer than the origin. - `Node Node::MakeDeepCopy() const &` * returns copy of origin one if it is head of subexpression or holds value of one of `Ts...` types. * returns new `Node` which holds copy of value that `Ref` references to if origin one holds value of `Ref` type. diff --git a/cpp_src/core/ft/config/baseftconfig.cc b/cpp_src/core/ft/config/baseftconfig.cc index 3fad1a027..2cb4ad7f1 100644 --- a/cpp_src/core/ft/config/baseftconfig.cc +++ b/cpp_src/core/ft/config/baseftconfig.cc @@ -31,14 +31,26 @@ void BaseFTConfig::parseBase(const gason::JsonNode &root) { auto &stemmersNode = root["stemmers"]; if (!stemmersNode.empty()) { stemmers.clear(); - for (auto &st : stemmersNode) stemmers.push_back(st.As()); + for (auto &st : stemmersNode) stemmers.emplace_back(st.As()); } synonyms.clear(); for (auto &se : root["synonyms"]) { Synonym synonym; - for (auto &ae : se["alternatives"]) synonym.alternatives.push_back(ae.As()); - for (auto &te : se["tokens"]) synonym.tokens.push_back(te.As()); - synonyms.push_back(std::move(synonym)); + for (auto &ae : se["alternatives"]) synonym.alternatives.emplace_back(ae.As()); + for (auto &te : se["tokens"]) synonym.tokens.emplace_back(te.As()); + synonyms.emplace_back(std::move(synonym)); + } + const auto &baseRankingConfigNode = root["base_ranking"]; + if (!baseRankingConfigNode.empty()) { + rankingConfig.fullMatch = baseRankingConfigNode["full_match_proc"].As<>(rankingConfig.fullMatch, 0, 500); + rankingConfig.prefixMin = baseRankingConfigNode["prefix_min_proc"].As<>(rankingConfig.prefixMin, 0, 500); + rankingConfig.suffixMin = baseRankingConfigNode["suffix_min_proc"].As<>(rankingConfig.suffixMin, 0, 500); + rankingConfig.typo = baseRankingConfigNode["base_typo_proc"].As<>(rankingConfig.typo, 0, 500); + rankingConfig.typoPenalty = baseRankingConfigNode["typo_proc_penalty"].As<>(rankingConfig.typoPenalty, 0, 500); + rankingConfig.stemmerPenalty = baseRankingConfigNode["stemmer_proc_penalty"].As<>(rankingConfig.stemmerPenalty, 0, 500); + rankingConfig.kblayout = baseRankingConfigNode["kblayout_proc"].As<>(rankingConfig.kblayout, 0, 500); + rankingConfig.translit = baseRankingConfigNode["translit_proc"].As<>(rankingConfig.translit, 0, 500); + rankingConfig.synonyms = baseRankingConfigNode["synonyms_proc"].As<>(rankingConfig.synonyms, 0, 500); } } @@ -71,6 +83,18 @@ void BaseFTConfig::getJson(JsonBuilder &jsonBuilder) const { stopWordsNode.Put(nullptr, sw); } } + { + auto baseRankingConfigNode = jsonBuilder.Object("base_ranking"); + baseRankingConfigNode.Put("full_match_proc", rankingConfig.fullMatch); + baseRankingConfigNode.Put("prefix_min_proc", rankingConfig.prefixMin); + baseRankingConfigNode.Put("suffix_min_proc", rankingConfig.suffixMin); + baseRankingConfigNode.Put("base_typo_proc", rankingConfig.typo); + baseRankingConfigNode.Put("typo_proc_penalty", rankingConfig.typoPenalty); + baseRankingConfigNode.Put("stemmer_proc_penalty", rankingConfig.stemmerPenalty); + baseRankingConfigNode.Put("kblayout_proc", rankingConfig.kblayout); + baseRankingConfigNode.Put("translit_proc", rankingConfig.translit); + baseRankingConfigNode.Put("synonyms_proc", rankingConfig.synonyms); + } } } // namespace reindexer diff --git a/cpp_src/core/ft/config/baseftconfig.h b/cpp_src/core/ft/config/baseftconfig.h index 646aa62a6..532d94f2f 100644 --- a/cpp_src/core/ft/config/baseftconfig.h +++ b/cpp_src/core/ft/config/baseftconfig.h @@ -43,6 +43,28 @@ class BaseFTConfig { std::vector synonyms; int logLevel = 0; std::string extraWordSymbols = "-/+"; // word contains symbols (IsAlpa | IsDigit) {IsAlpa | IsDigit | IsExtra} + struct BaseRankingConfig { + static constexpr int kMinProcAfterPenalty = 1; + // Relevancy of full word match + int fullMatch = 100; + // Mininum relevancy of prefix word match. + int prefixMin = 50; + // Mininum relevancy of suffix word match. + int suffixMin = 10; + // Base relevancy of typo match + int typo = 85; + // Extra penalty for each word's permutation (addition/deletion of the symbol) in typo algorithm + int typoPenalty = 15; + // Penalty for the variants, created by stemming + int stemmerPenalty = 15; + // Relevancy of the match in incorrect kblayout + int kblayout = 90; + // Relevancy of the match in translit + int translit = 90; + // Relevancy of the synonym match + int synonyms = 95; + }; + BaseRankingConfig rankingConfig; protected: void parseBase(const gason::JsonNode& root); diff --git a/cpp_src/core/ft/config/ftfastconfig.cc b/cpp_src/core/ft/config/ftfastconfig.cc index 253436252..f5f87bf32 100644 --- a/cpp_src/core/ft/config/ftfastconfig.cc +++ b/cpp_src/core/ft/config/ftfastconfig.cc @@ -88,8 +88,8 @@ void FtFastConfig::parse(std::string_view json, const RHashMap throw Error(errParseDSL, "Configuration for single field fulltext index cannot contain field specifications"); } std::set modifiedFields; - for (const auto fldCfg : fieldsCfgNode.value) { - const std::string fieldName = (*fldCfg)["field_name"].As(); + for (const auto& fldCfg : fieldsCfgNode.value) { + const std::string fieldName = fldCfg["field_name"].As(); const auto fldIt = fields.find(fieldName); if (fldIt == fields.end()) { throw Error(errParseDSL, "Field '%s' is not included to full text index", fieldName); @@ -100,12 +100,12 @@ void FtFastConfig::parse(std::string_view json, const RHashMap } modifiedFields.insert(fldIt->second); FtFastFieldConfig& curFieldCfg = fieldsCfg[fldIt->second]; - curFieldCfg.bm25Boost = (*fldCfg)["bm25_boost"].As<>(defaultFieldCfg.bm25Boost); - curFieldCfg.bm25Weight = (*fldCfg)["bm25_weight"].As<>(defaultFieldCfg.bm25Weight); - curFieldCfg.termLenBoost = (*fldCfg)["term_len_boost"].As<>(defaultFieldCfg.termLenBoost); - curFieldCfg.termLenWeight = (*fldCfg)["term_len_weight"].As<>(defaultFieldCfg.termLenWeight); - curFieldCfg.positionBoost = (*fldCfg)["position_boost"].As<>(defaultFieldCfg.positionBoost); - curFieldCfg.positionWeight = (*fldCfg)["position_weight"].As<>(defaultFieldCfg.positionWeight); + curFieldCfg.bm25Boost = fldCfg["bm25_boost"].As<>(defaultFieldCfg.bm25Boost); + curFieldCfg.bm25Weight = fldCfg["bm25_weight"].As<>(defaultFieldCfg.bm25Weight); + curFieldCfg.termLenBoost = fldCfg["term_len_boost"].As<>(defaultFieldCfg.termLenBoost); + curFieldCfg.termLenWeight = fldCfg["term_len_weight"].As<>(defaultFieldCfg.termLenWeight); + curFieldCfg.positionBoost = fldCfg["position_boost"].As<>(defaultFieldCfg.positionBoost); + curFieldCfg.positionWeight = fldCfg["position_weight"].As<>(defaultFieldCfg.positionWeight); } } diff --git a/cpp_src/core/ft/filters/itokenfilter.h b/cpp_src/core/ft/filters/itokenfilter.h index c9c1f7315..3daa11a60 100644 --- a/cpp_src/core/ft/filters/itokenfilter.h +++ b/cpp_src/core/ft/filters/itokenfilter.h @@ -20,10 +20,10 @@ class ITokenFilter { public: using Ptr = std::unique_ptr; - virtual void GetVariants(const std::wstring& data, std::vector& result) = 0; + virtual void GetVariants(const std::wstring& data, std::vector& result, int proc) = 0; virtual void SetConfig(BaseFTConfig*) {} - virtual void PreProcess(const FtDSLQuery&, std::vector&) const {} - virtual void PostProcess(const FtDSLEntry&, const FtDSLQuery&, size_t /*termIdx*/, std::vector&) const {} + virtual void PreProcess(const FtDSLQuery&, std::vector&, int /*proc*/) const {} + virtual void PostProcess(const FtDSLEntry&, const FtDSLQuery&, size_t /*termIdx*/, std::vector&, int /*proc*/) const {} virtual ~ITokenFilter() {} }; diff --git a/cpp_src/core/ft/filters/kblayout.cc b/cpp_src/core/ft/filters/kblayout.cc index 23f475b16..0e2616e44 100644 --- a/cpp_src/core/ft/filters/kblayout.cc +++ b/cpp_src/core/ft/filters/kblayout.cc @@ -3,7 +3,7 @@ namespace reindexer { -void KbLayout::GetVariants(const std::wstring& data, std::vector& result) { +void KbLayout::GetVariants(const std::wstring& data, std::vector& result, int proc) { std::wstring result_string; result_string.reserve(data.length()); @@ -20,7 +20,7 @@ void KbLayout::GetVariants(const std::wstring& data, std::vector& result_string.push_back(sym); } } - result.emplace_back(std::move(result_string), 90); + result.emplace_back(std::move(result_string), proc); } void KbLayout::setEnLayout(wchar_t sym, wchar_t data) { diff --git a/cpp_src/core/ft/filters/kblayout.h b/cpp_src/core/ft/filters/kblayout.h index cd1bd0dd5..69319e2b6 100644 --- a/cpp_src/core/ft/filters/kblayout.h +++ b/cpp_src/core/ft/filters/kblayout.h @@ -7,7 +7,7 @@ namespace reindexer { class KbLayout : public ITokenFilter { public: KbLayout(); - virtual void GetVariants(const std::wstring& data, std::vector& result) override final; + virtual void GetVariants(const std::wstring& data, std::vector& result, int proc) override final; private: void PrepareRuLayout(); diff --git a/cpp_src/core/ft/filters/synonyms.cc b/cpp_src/core/ft/filters/synonyms.cc index 5cd03db43..64b2afe59 100644 --- a/cpp_src/core/ft/filters/synonyms.cc +++ b/cpp_src/core/ft/filters/synonyms.cc @@ -5,9 +5,7 @@ namespace reindexer { -constexpr int kSynonymProc = 95; - -void Synonyms::GetVariants(const std::wstring& data, std::vector& result) { +void Synonyms::GetVariants(const std::wstring& data, std::vector& result, int proc) { if (one2one_.empty()) return; auto it = one2one_.find(data); @@ -15,7 +13,7 @@ void Synonyms::GetVariants(const std::wstring& data, std::vector& return; } for (const auto& ait : *it->second) { - result.emplace_back(ait, kSynonymProc); + result.emplace_back(ait, proc); } } @@ -31,18 +29,18 @@ void Synonyms::addDslEntries(std::vector& synonymsDsl, const Multip } } -static FtDslOpts makeOptsForAlternatives(const FtDslOpts& termOpts) { +static FtDslOpts makeOptsForAlternatives(const FtDslOpts& termOpts, int proc) { FtDslOpts result; result.op = OpAnd; - result.boost = termOpts.boost * kSynonymProc / 100.0; + result.boost = termOpts.boost * proc / 100.0; result.termLenBoost = termOpts.termLenBoost; result.fieldsOpts = termOpts.fieldsOpts; result.qpos = termOpts.qpos; return result; } -static void addOptsForAlternatives(FtDslOpts& opts, const FtDslOpts& termOpts) { - opts.boost += termOpts.boost * kSynonymProc / 100.0; +static void addOptsForAlternatives(FtDslOpts& opts, const FtDslOpts& termOpts, int proc) { + opts.boost += termOpts.boost * proc / 100.0; opts.termLenBoost += termOpts.termLenBoost; assertrx(opts.fieldsOpts.size() == termOpts.fieldsOpts.size()); for (size_t i = 0, end = opts.fieldsOpts.size(); i != end; ++i) { @@ -60,7 +58,8 @@ static void divOptsForAlternatives(FtDslOpts& opts, size_t size) { opts.qpos /= size; } -void Synonyms::PostProcess(const FtDSLEntry& term, const FtDSLQuery& dsl, size_t termIdx, std::vector& synonymsDsl) const { +void Synonyms::PostProcess(const FtDSLEntry& term, const FtDSLQuery& dsl, size_t termIdx, std::vector& synonymsDsl, + int proc) const { if (term.opts.groupNum != -1) { // Skip multiword synonyms for phrase search return; @@ -70,13 +69,13 @@ void Synonyms::PostProcess(const FtDSLEntry& term, const FtDSLQuery& dsl, size_t return; } - const auto opts = makeOptsForAlternatives(term.opts); + const auto opts = makeOptsForAlternatives(term.opts, proc); assertrx(it->second); addDslEntries(synonymsDsl, *it->second, opts, {termIdx}, dsl); } -void Synonyms::PreProcess(const FtDSLQuery& dsl, std::vector& synonymsDsl) const { +void Synonyms::PreProcess(const FtDSLQuery& dsl, std::vector& synonymsDsl, int proc) const { for (const auto& multiSynonyms : many2any_) { bool match = !multiSynonyms.first.empty(); FtDslOpts opts; @@ -91,9 +90,9 @@ void Synonyms::PreProcess(const FtDSLQuery& dsl, std::vector& synon break; } else { if (termIt == multiSynonyms.first.cbegin()) { - opts = makeOptsForAlternatives(dslIt->opts); + opts = makeOptsForAlternatives(dslIt->opts, proc); } else { - addOptsForAlternatives(opts, dslIt->opts); + addOptsForAlternatives(opts, dslIt->opts, proc); } size_t idx = dslIt - dsl.cbegin(); termsIdx.push_back(idx); diff --git a/cpp_src/core/ft/filters/synonyms.h b/cpp_src/core/ft/filters/synonyms.h index df2ead40e..9596cbfc2 100644 --- a/cpp_src/core/ft/filters/synonyms.h +++ b/cpp_src/core/ft/filters/synonyms.h @@ -1,8 +1,8 @@ #pragma once +#include "core/ft/usingcontainer.h" #include "estl/fast_hash_map.h" #include "estl/h_vector.h" -#include "core/ft/usingcontainer.h" #include "itokenfilter.h" #include "tools/stringstools.h" @@ -13,10 +13,10 @@ struct FtDslOpts; class Synonyms : public ITokenFilter { public: Synonyms() = default; - virtual void GetVariants(const std::wstring& data, std::vector& result) override final; + virtual void GetVariants(const std::wstring& data, std::vector& result, int proc) override final; void SetConfig(BaseFTConfig* cfg) override final; - void PreProcess(const FtDSLQuery&, std::vector&) const override final; - void PostProcess(const FtDSLEntry&, const FtDSLQuery&, size_t termIdx, std::vector&) const override final; + void PreProcess(const FtDSLQuery&, std::vector&, int proc) const override final; + void PostProcess(const FtDSLEntry&, const FtDSLQuery&, size_t termIdx, std::vector&, int proc) const override final; private: using SingleAlternativeCont = std::vector; diff --git a/cpp_src/core/ft/filters/translit.cc b/cpp_src/core/ft/filters/translit.cc index 7d1800455..7a882d309 100644 --- a/cpp_src/core/ft/filters/translit.cc +++ b/cpp_src/core/ft/filters/translit.cc @@ -9,7 +9,7 @@ Translit::Translit() { PrepareEnglish(); } -void Translit::GetVariants(const std::wstring &data, std::vector &result) { +void Translit::GetVariants(const std::wstring &data, std::vector &result, int proc) { std::wstring strings[maxTranslitVariants]; Context ctx; @@ -54,7 +54,7 @@ void Translit::GetVariants(const std::wstring &data, std::vector & } if (!skip && curent != result_string && curent.length()) { result_string = curent; - result.emplace_back(std::move(curent), 90); + result.emplace_back(std::move(curent), proc); } } } diff --git a/cpp_src/core/ft/filters/translit.h b/cpp_src/core/ft/filters/translit.h index cf4fd4236..4e05845bb 100644 --- a/cpp_src/core/ft/filters/translit.h +++ b/cpp_src/core/ft/filters/translit.h @@ -8,7 +8,7 @@ class Translit : public ITokenFilter { public: Translit(); - virtual void GetVariants(const std::wstring &data, std::vector &result) override final; + virtual void GetVariants(const std::wstring &data, std::vector &result, int proc) override final; private: void PrepareRussian(); diff --git a/cpp_src/core/ft/ft_fast/dataholder.cc b/cpp_src/core/ft/ft_fast/dataholder.cc index bb022f5bf..694eab40b 100644 --- a/cpp_src/core/ft/ft_fast/dataholder.cc +++ b/cpp_src/core/ft/ft_fast/dataholder.cc @@ -181,24 +181,22 @@ void DataHolder::StartCommit(bool complte_updated) { return; } -template -IDataHolder::MergeData DataHolder::Select(FtDSLQuery&& dsl, size_t fieldSize, bool needArea, int maxAreasInDoc, bool inTransaction, - FtMergeStatuses::Statuses&& mergeStatuses, bool mergeStatusesEmpty, - const RdxContext& rdxCtx) { - if (mergeStatusesEmpty) { - return Selecter{*this, fieldSize, needArea, maxAreasInDoc}.template Process(std::move(dsl), inTransaction, - std::move(mergeStatuses), rdxCtx); - } else { - return Selecter{*this, fieldSize, needArea, maxAreasInDoc}.template Process(std::move(dsl), inTransaction, - std::move(mergeStatuses), rdxCtx); - } -} - template void DataHolder::Process(size_t fieldSize, bool multithread) { DataProcessor{*this, fieldSize}.Process(multithread); } +template +IDataHolder::MergeData DataHolder::Select(FtDSLQuery&& dsl, size_t fieldSize, bool needArea, int maxAreasInDoc, bool inTransaction, + FtMergeStatuses::Statuses&& mergeStatuses, FtUseExternStatuses useExternSt, + const RdxContext& rdxCtx) { + if (useExternSt == FtUseExternStatuses::No) { + return Selecter{*this, fieldSize, needArea, maxAreasInDoc}.template Process( + std::move(dsl), inTransaction, std::move(mergeStatuses), rdxCtx); + } + return Selecter{*this, fieldSize, needArea, maxAreasInDoc}.template Process( + std::move(dsl), inTransaction, std::move(mergeStatuses), rdxCtx); +} template class DataHolder; template class DataHolder; diff --git a/cpp_src/core/ft/ft_fast/dataholder.h b/cpp_src/core/ft/ft_fast/dataholder.h index c98af3f16..33d561a9c 100644 --- a/cpp_src/core/ft/ft_fast/dataholder.h +++ b/cpp_src/core/ft/ft_fast/dataholder.h @@ -133,8 +133,8 @@ class IDataHolder { }; virtual ~IDataHolder() = default; - virtual MergeData Select(FtDSLQuery&& dsl, size_t fieldSize, bool needArea, int maxAreasInDoc, bool inTransaction, - FtMergeStatuses::Statuses&& mergeStatuses, bool mergeStatusesEmpty, const RdxContext&) = 0; + virtual MergeData Select(FtDSLQuery&&, size_t fieldSize, bool needArea, int maxAreasInDoc, bool inTransaction, + FtMergeStatuses::Statuses&&, FtUseExternStatuses, const RdxContext&) = 0; virtual void Process(size_t fieldSize, bool multithread) = 0; virtual size_t GetMemStat() = 0; virtual void Clear() = 0; @@ -185,8 +185,8 @@ class IDataHolder { template class DataHolder : public IDataHolder { public: - MergeData Select(FtDSLQuery&& dsl, size_t fieldSize, bool needArea, int maxAreasInDoc, bool inTransaction, - FtMergeStatuses::Statuses&& mergeStatuses, bool mergeStatusesEmpty, const RdxContext&) final; + virtual MergeData Select(FtDSLQuery&&, size_t fieldSize, bool needArea, int maxAreasInDoc, bool inTransaction, + FtMergeStatuses::Statuses&&, FtUseExternStatuses, const RdxContext&) override final; void Process(size_t fieldSize, bool multithread) final; size_t GetMemStat() override final; void StartCommit(bool complte_updated) override final; diff --git a/cpp_src/core/ft/ft_fast/dataprocessor.cc b/cpp_src/core/ft/ft_fast/dataprocessor.cc index 275e99ffc..772b1c785 100644 --- a/cpp_src/core/ft/ft_fast/dataprocessor.cc +++ b/cpp_src/core/ft/ft_fast/dataprocessor.cc @@ -16,7 +16,7 @@ using std::chrono::milliseconds; namespace reindexer { -const int kDigitUtfSizeof = 1; +constexpr int kDigitUtfSizeof = 1; template void DataProcessor::Process(bool multithread) { @@ -112,15 +112,13 @@ std::vector DataProcessor::BuildSuffix(words_map &words_um, // if we still haven't whis word we add it to new suffix tree else we will only add info to current word auto id = words.size(); - WordIdType pos; - pos = holder_.findWord(keyIt.first); - found.push_back(pos); + WordIdType pos = found.emplace_back(holder_.findWord(keyIt.first)); if (!pos.isEmpty()) { continue; } - words.emplace_back(PackedWordEntry()); + words.emplace_back(); pos = holder_.BuildWordId(id); if (holder_.cfg_->enableNumbersSearch && keyIt.second.virtualWord) { suffix.insert(keyIt.first, pos, kDigitUtfSizeof); @@ -133,9 +131,12 @@ std::vector DataProcessor::BuildSuffix(words_map &words_um, template size_t DataProcessor::buildWordsMap(words_map &words_um) { - uint32_t maxIndexWorkers = multithread_ ? std::thread::hardware_concurrency() : 0; - if (!maxIndexWorkers) maxIndexWorkers = 1; - if (maxIndexWorkers > 8) maxIndexWorkers = 8; + uint32_t maxIndexWorkers = multithread_ ? std::thread::hardware_concurrency() : 1; + if (!maxIndexWorkers) { + maxIndexWorkers = 1; + } else if (maxIndexWorkers > 8) { + maxIndexWorkers = 8; + } size_t szCnt = 0; struct context { words_map words_um; @@ -146,8 +147,7 @@ size_t DataProcessor::buildWordsMap(words_map &words_um) { auto &cfg = holder_.cfg_; auto &vdocsTexts = holder_.vdocsTexts; auto &vdocs = holder_.vdocs_; - // int fieldscount = std::max(1, int(this->fields_.size())); - int fieldscount = fieldSize_; + const int fieldscount = fieldSize_; size_t offset = holder_.vdocsOffset_; // build words map parallel in maxIndexWorkers threads auto worker = [this, &ctxs, &vdocsTexts, offset, maxIndexWorkers, fieldscount, &cfg, &vdocs](int i) { @@ -155,17 +155,19 @@ size_t DataProcessor::buildWordsMap(words_map &words_um) { std::string word, str; std::vector wrds; std::vector virtualWords; - for (VDocIdType j = i; j < VDocIdType(vdocsTexts.size()); j += maxIndexWorkers) { - size_t vdocId = offset + j; - vdocs[vdocId].wordsCount.insert(vdocs[vdocId].wordsCount.begin(), fieldscount, 0.0); - vdocs[vdocId].mostFreqWordCount.insert(vdocs[vdocId].mostFreqWordCount.begin(), fieldscount, 0.0); - - for (size_t field = 0; field < vdocsTexts[j].size(); ++field) { - split(vdocsTexts[j][field].first, str, wrds, cfg->extraWordSymbols); - int rfield = vdocsTexts[j][field].second; + for (VDocIdType j = i, sz = VDocIdType(vdocsTexts.size()); j < sz; j += maxIndexWorkers) { + const size_t vdocId = offset + j; + auto &vdoc = vdocs[vdocId]; + vdoc.wordsCount.insert(vdoc.wordsCount.begin(), fieldscount, 0.0); + vdoc.mostFreqWordCount.insert(vdoc.mostFreqWordCount.begin(), fieldscount, 0.0); + + auto &vdocsText = vdocsTexts[j]; + for (size_t field = 0, sz = vdocsText.size(); field < sz; ++field) { + split(vdocsText[field].first, str, wrds, cfg->extraWordSymbols); + const int rfield = vdocsText[field].second; assertrx(rfield < fieldscount); - vdocs[vdocId].wordsCount[rfield] = wrds.size(); + vdoc.wordsCount[rfield] = wrds.size(); int insertPos = -1; for (auto w : wrds) { @@ -173,14 +175,11 @@ size_t DataProcessor::buildWordsMap(words_map &words_um) { word.assign(w); if (!word.length() || cfg->stopWords.find(word) != cfg->stopWords.end()) continue; - auto idxIt = ctx->words_um.find(word); - if (idxIt == ctx->words_um.end()) { - idxIt = ctx->words_um.emplace(word, WordEntry()).first; - // idxIt->second.vids_.reserve(16); - } - int mfcnt = idxIt->second.vids_.Add(vdocId, insertPos, rfield); - if (mfcnt > vdocs[vdocId].mostFreqWordCount[rfield]) { - vdocs[vdocId].mostFreqWordCount[rfield] = mfcnt; + auto [idxIt, emplaced] = ctx->words_um.try_emplace(word, WordEntry()); + (void)emplaced; + const int mfcnt = idxIt->second.vids_.Add(vdocId, insertPos, rfield); + if (mfcnt > vdoc.mostFreqWordCount[rfield]) { + vdoc.mostFreqWordCount[rfield] = mfcnt; } if (cfg->enableNumbersSearch && is_number(word)) { @@ -191,35 +190,44 @@ size_t DataProcessor::buildWordsMap(words_map &words_um) { } }; + for (uint32_t t = 1; t < maxIndexWorkers; ++t) { + ctxs[t].thread = std::thread(worker, t); + } // If there was only 1 build thread. Just return it's build results - if (maxIndexWorkers == 1) { - worker(0); - words_um.swap(ctxs[0].words_um); - } else { - for (uint32_t t = 0; t < maxIndexWorkers; t++) ctxs[t].thread = std::thread(worker, t); - // Merge results into single map - for (uint32_t i = 0; i < maxIndexWorkers; i++) { - try { - ctxs[i].thread.join(); - for (auto it = ctxs[i].words_um.begin(), endIt = ctxs[i].words_um.end(); it != endIt; ++it) { - auto idxIt = words_um.find(it->first); - - if (idxIt == words_um.end()) { - words_um.emplace(it->first, std::move(it->second)); - } else { - idxIt->second.vids_.reserve(it->second.vids_.size() + idxIt->second.vids_.size()); - for (auto &r : it->second.vids_) idxIt->second.vids_.push_back(std::move(r)); - it->second.vids_ = IdRelSet(); - } + worker(0); + words_um = std::move(ctxs[0].words_um); + // Merge results into single map + for (uint32_t i = 1; i < maxIndexWorkers; ++i) { + try { + auto &ctx = ctxs[i]; + ctx.thread.join(); + for (auto &it : ctx.words_um) { +#if defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) + const auto fBeforeMove = it.first; + const auto sBeforeMove = it.second; + const auto sCapacityBeforeMove = it.second.vids_.capacity(); +#endif // defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) + auto [idxIt, emplaced] = words_um.try_emplace(std::move(it.first), std::move(it.second)); + if (!emplaced) { +#if defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) + // Make sure, that try_emplace did not moved the values + assertrx(it.first == fBeforeMove); + assertrx(it.second.virtualWord == sBeforeMove.virtualWord); + assertrx(it.second.vids_.size() == sBeforeMove.vids_.size()); + assertrx(it.second.vids_.capacity() == sCapacityBeforeMove); +#endif // defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) + idxIt->second.vids_.reserve(it.second.vids_.size() + idxIt->second.vids_.size()); + for (auto &&r : it.second.vids_) idxIt->second.vids_.emplace_back(std::move(r)); + it.second.vids_ = IdRelSet(); } - words_map().swap(ctxs[i].words_um); - } catch (const Error &e) { - logPrintf(LogError, "Exeption in loop with thread.join() error= [%s]", e.what()); - } catch (const std::exception &e) { - logPrintf(LogError, "Exeption in loop with thread.join() error= [%s]", e.what()); - } catch (...) { - logPrintf(LogError, "Exeption in loop with thread.join()"); } + words_map().swap(ctx.words_um); + } catch (const Error &e) { + logPrintf(LogError, "Exeption in loop with thread.join() error:[%s]", e.what()); + } catch (const std::exception &e) { + logPrintf(LogError, "Exeption in loop with thread.join() error:[%s]", e.what()); + } catch (...) { + logPrintf(LogError, "Exeption in loop with thread.join()"); } } @@ -235,7 +243,7 @@ size_t DataProcessor::buildWordsMap(words_map &words_um) { } // Check and print potential stop words - if (holder_.cfg_->logLevel >= LogInfo) { + if rx_unlikely (holder_.cfg_->logLevel >= LogInfo) { WrSerializer out; for (auto &w : words_um) { if (w.second.vids_.size() > vdocs.size() / 5 || int64_t(w.second.vids_.size()) > holder_.cfg_->mergeLimit) { @@ -252,16 +260,16 @@ size_t DataProcessor::buildWordsMap(words_map &words_um) { template void DataProcessor::buildVirtualWord(std::string_view word, words_map &words_um, VDocIdType docType, int rfield, size_t insertPos, - std::vector &output) { + std::vector &container) { auto &vdocs = holder_.vdocs_; auto &vdoc(vdocs[docType]); - NumToText::convert(word, output); - for (const std::string &numberWord : output) { + NumToText::convert(word, container); + for (std::string &numberWord : container) { WordEntry wentry; wentry.virtualWord = true; - auto idxIt = words_um.emplace(numberWord, std::move(wentry)).first; - int mfcnt = idxIt->second.vids_.Add(docType, insertPos, rfield); + auto idxIt = words_um.emplace(std::move(numberWord), std::move(wentry)).first; + const int mfcnt = idxIt->second.vids_.Add(docType, insertPos, rfield); if (mfcnt > vdoc.mostFreqWordCount[rfield]) { vdoc.mostFreqWordCount[rfield] = mfcnt; } diff --git a/cpp_src/core/ft/ft_fast/dataprocessor.h b/cpp_src/core/ft/ft_fast/dataprocessor.h index 8fb86b5a0..f0dd5deb4 100644 --- a/cpp_src/core/ft/ft_fast/dataprocessor.h +++ b/cpp_src/core/ft/ft_fast/dataprocessor.h @@ -9,7 +9,6 @@ namespace reindexer { template class DataProcessor { public: - using words_map = RHashMap; DataProcessor(DataHolder& holder, size_t fieldSize) : holder_(holder), multithread_(false), fieldSize_(fieldSize) {} @@ -20,7 +19,7 @@ class DataProcessor { size_t buildWordsMap(words_map& m); void buildVirtualWord(std::string_view word, words_map& words_um, VDocIdType docType, int rfield, size_t insertPos, - std::vector& output); + std::vector& container); void buildTyposMap(uint32_t startPos, const std::vector& found); diff --git a/cpp_src/core/ft/ft_fast/indextexttypes.h b/cpp_src/core/ft/ft_fast/indextexttypes.h index 049e5bf35..36fd3b369 100644 --- a/cpp_src/core/ft/ft_fast/indextexttypes.h +++ b/cpp_src/core/ft/ft_fast/indextexttypes.h @@ -38,4 +38,6 @@ struct WordIdTypeLess { bool operator()(const WordIdType& lhs, const WordIdType& rhs) const noexcept { return lhs.data < rhs.data; } }; +enum class FtUseExternStatuses : bool { Yes, No }; + } // namespace reindexer diff --git a/cpp_src/core/ft/ft_fast/selecter.cc b/cpp_src/core/ft/ft_fast/selecter.cc index 6d5c508e0..b460ca929 100644 --- a/cpp_src/core/ft/ft_fast/selecter.cc +++ b/cpp_src/core/ft/ft_fast/selecter.cc @@ -3,11 +3,12 @@ #include "core/ft/bm25.h" #include "core/ft/typos.h" #include "core/rdxcontext.h" +#include "estl/defines.h" #include "sort/pdqsort.hpp" #include "tools/logger.h" namespace { -inline double pos2rank(int pos) { +RX_ALWAYS_INLINE double pos2rank(int pos) { if (pos <= 10) return 1.0 - (pos / 100.0); if (pos <= 100) return 0.9 - (pos / 1000.0); if (pos <= 1000) return 0.8 - (pos / 10000.0); @@ -18,17 +19,6 @@ inline double pos2rank(int pos) { } // namespace namespace reindexer { -// Relevancy procent of full word match -const int kFullMatchProc = 100; -// Mininum relevancy procent of prefix word match. -const int kPrefixMinProc = 50; -const int kSuffixMinProc = 10; -// Maximum relevancy procent of typo match -const int kTypoProc = 85; -// Relevancy step of typo match -const int kTypoStepProc = 15; -// Decrease procent of relevancy if pattern found by word stem -const int kStemProcDecrease = 15; // Minimal relevant length of the stemmer's term constexpr int kMinStemRellevantLen = 3; // Max length of the stemming result, which will be skipped @@ -42,20 +32,20 @@ void Selecter::prepareVariants(std::vector& variants, RV const FtDSLEntry& term = dsl[termIdx]; variants.clear(); - std::vector variantsUtf16{{term.pattern, kFullMatchProc}}; + std::vector variantsUtf16{{term.pattern, holder_.cfg_->rankingConfig.fullMatch}}; if (synonymsDsl && (!holder_.cfg_->enableNumbersSearch || !term.opts.number)) { // Make translit and kblayout variants if (holder_.cfg_->enableTranslit && !term.opts.exact) { - holder_.translit_->GetVariants(term.pattern, variantsUtf16); + holder_.translit_->GetVariants(term.pattern, variantsUtf16, holder_.cfg_->rankingConfig.translit); } if (holder_.cfg_->enableKbLayout && !term.opts.exact) { - holder_.kbLayout_->GetVariants(term.pattern, variantsUtf16); + holder_.kbLayout_->GetVariants(term.pattern, variantsUtf16, holder_.cfg_->rankingConfig.kblayout); } // Synonyms - if (term.opts.op != OpNot) { - holder_.synonyms_->GetVariants(term.pattern, variantsUtf16); - holder_.synonyms_->PostProcess(term, dsl, termIdx, *synonymsDsl); + if (rx_likely(term.opts.op != OpNot)) { + holder_.synonyms_->GetVariants(term.pattern, variantsUtf16, holder_.cfg_->rankingConfig.synonyms); + holder_.synonyms_->PostProcess(term, dsl, termIdx, *synonymsDsl, holder_.cfg_->rankingConfig.kblayout); } } @@ -70,7 +60,7 @@ void Selecter::prepareVariants(std::vector& variants, RV if (!term.opts.exact) { if (term.opts.op == OpNot && term.opts.suff) { // More strict match for negative (excluding) suffix terms - if (holder_.cfg_->logLevel >= LogTrace) { + if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { logPrintf(LogInfo, "Skipping stemming for '%s%s%s'", term.opts.suff ? "*" : "", tmpstr, term.opts.pref ? "*" : ""); } continue; @@ -85,7 +75,7 @@ void Selecter::prepareVariants(std::vector& variants, RV if (tmpstr != stemstr && !stemstr.empty()) { const auto charsCount = getUTF8StringCharactersCount(stemstr); if (charsCount <= kMaxStemSkipLen) { - if (holder_.cfg_->logLevel >= LogTrace) { + if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { logPrintf(LogInfo, "Skipping too short stemmer's term '%s%s*'", term.opts.suff && &v != &variantsUtf16[0] ? "*" : "", stemstr); } @@ -98,9 +88,15 @@ void Selecter::prepareVariants(std::vector& variants, RV const auto charCount = getUTF8StringCharactersCount(stemstr); if (charCount >= kMinStemRellevantLen || !lowRelVariants) { - variants.emplace_back(std::move(stemstr), std::move(opts), v.proc - kStemProcDecrease, charsCount); + variants.emplace_back(std::move(stemstr), std::move(opts), + std::max(v.proc - holder_.cfg_->rankingConfig.stemmerPenalty, + BaseFTConfig::BaseRankingConfig::kMinProcAfterPenalty), + charsCount); } else { - lowRelVariants->emplace_back(std::move(stemstr), std::move(opts), v.proc - kStemProcDecrease, charsCount); + lowRelVariants->emplace_back(std::move(stemstr), std::move(opts), + std::max(v.proc - holder_.cfg_->rankingConfig.stemmerPenalty, + BaseFTConfig::BaseRankingConfig::kMinProcAfterPenalty), + charsCount); } } } @@ -108,15 +104,16 @@ void Selecter::prepareVariants(std::vector& variants, RV } } +// RX_NO_INLINE just for build test purpose. Do not expect any effect here template -template -IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses::Statuses&& mergeStatuses, - const RdxContext& rdxCtx) { +template +RX_NO_INLINE IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransaction, + FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext& rdxCtx) { FtSelectContext ctx; ctx.rawResults.reserve(dsl.size()); // STEP 2: Search dsl terms for each variant std::vector synonymsDsl; - holder_.synonyms_->PreProcess(dsl, synonymsDsl); + holder_.synonyms_->PreProcess(dsl, synonymsDsl, holder_.cfg_->rankingConfig.synonyms); if (!inTransaction) ThrowOnCancel(rdxCtx); for (size_t i = 0; i < dsl.size(); ++i) { const auto irrVariantsCount = ctx.lowRelVariants.size(); @@ -134,7 +131,7 @@ IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransa ctx.lowRelVariants[j].rawResultIdx = ctx.rawResults.size() - 1; } - if (holder_.cfg_->logLevel >= LogInfo) { + if rx_unlikely (holder_.cfg_->logLevel >= LogInfo) { WrSerializer wrSer; wrSer << "variants: ["; for (auto& variant : ctx.variants) { @@ -165,7 +162,7 @@ IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransa logPrintf(LogInfo, "Variants: [%s]", wrSer.Slice()); } - processVariants(ctx, mergeStatuses); + processVariants(ctx, mergeStatuses); if (res.term.opts.typos) { // Lookup typos from typos_ map and fill results TyposHandler h(*holder_.cfg_); @@ -185,7 +182,7 @@ IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransa synCtx.rawResults.reserve(synDsl.dsl.size()); for (size_t i = 0; i < synDsl.dsl.size(); ++i) { prepareVariants(synCtx.variants, nullptr, i, holder_.cfg_->stemmers, synDsl.dsl, nullptr); - if (holder_.cfg_->logLevel >= LogInfo) { + if rx_unlikely (holder_.cfg_->logLevel >= LogInfo) { WrSerializer wrSer; for (auto& variant : synCtx.variants) { if (&variant != &*synCtx.variants.begin()) wrSer << ", "; @@ -198,7 +195,7 @@ IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransa if (synCtx.rawResults.back().term.opts.op == OpAnd) { ctx.rawResults.back().SwitchToInternalWordsMap(); } - processVariants(synCtx, mergeStatuses); + processVariants(synCtx, mergeStatuses); } for (size_t idx : synDsl.termsIdx) { assertrx(idx < ctx.rawResults.size()); @@ -210,7 +207,7 @@ IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransa } synonymsBounds.push_back(results.size()); } - processLowRelVariants(ctx, mergeStatuses); + processLowRelVariants(ctx, mergeStatuses); // Typos for terms with low relevancy will not be processed for (auto& res : ctx.rawResults) results.emplace_back(std::move(res)); @@ -218,7 +215,7 @@ IDataHolder::MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransa } template -template +template void Selecter::processStepVariants(FtSelectContext& ctx, typename DataHolder::CommitStep& step, const FtVariantEntry& variant, unsigned curRawResultIdx, const FtMergeStatuses::Statuses& mergeStatuses, int vidsLimit) { @@ -240,8 +237,8 @@ void Selecter::processStepVariants(FtSelectContext& ctx, typename DataHo do { if (keyIt == suffixes.end()) break; if (vidsLimit <= vids) { - if (holder_.cfg_->logLevel >= LogInfo) { - logPrintf(LogInfo, "Terminating suffix loop on limit (%d). Current vairiant is '%s%s%s'", initialLimit, + if rx_unlikely (holder_.cfg_->logLevel >= LogInfo) { + logPrintf(LogInfo, "Terminating suffix loop on limit (%d). Current variant is '%s%s%s'", initialLimit, variant.opts.suff ? "*" : "", variant.pattern, variant.opts.pref ? "*" : ""); } break; @@ -250,7 +247,7 @@ void Selecter::processStepVariants(FtSelectContext& ctx, typename DataHo const WordIdType glbwordId = keyIt->second; const auto& hword = holder_.getWordById(glbwordId); - if constexpr (!mergeStatusesEmpty) { + if constexpr (useExternSt == FtUseExternStatuses::Yes) { bool excluded = true; for (const auto& id : hword.vids_) { if (mergeStatuses[id.Id()] != FtMergeStatuses::kExcluded) { @@ -277,7 +274,7 @@ void Selecter::processStepVariants(FtSelectContext& ctx, typename DataHo const int matchDif = std::abs(long(wordLength - matchLen + suffixLen)); const int proc = std::max(variant.proc - holder_.cfg_->partialMatchDecrease * matchDif / std::max(matchLen, 3), - suffixLen ? kSuffixMinProc : kPrefixMinProc); + suffixLen ? holder_.cfg_->rankingConfig.suffixMin : holder_.cfg_->rankingConfig.prefixMin); const auto it = res.foundWords->find(glbwordId); if (it == res.foundWords->end() || it->second.first != curRawResultIdx) { @@ -288,7 +285,7 @@ void Selecter::processStepVariants(FtSelectContext& ctx, typename DataHo ctx.totalORVids += vidsSize; } (*res.foundWords)[glbwordId] = std::make_pair(curRawResultIdx, res.size() - 1); - if (holder_.cfg_->logLevel >= LogTrace) { + if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { logPrintf(LogInfo, " matched %s '%s' of word '%s' (variant '%s'), %d vids, %d%%", suffixLen ? "suffix" : "prefix", keyIt->first, word, variant.pattern, holder_.getWordById(glbwordId).vids_.size(), proc); } @@ -300,7 +297,7 @@ void Selecter::processStepVariants(FtSelectContext& ctx, typename DataHo ++skipped; } } while ((keyIt++).lcp() >= int(tmpstr.length())); - if (holder_.cfg_->logLevel >= LogInfo) { + if rx_unlikely (holder_.cfg_->logLevel >= LogInfo) { std::string limitString; if (vidsLimit <= vids) { limitString = fmt::sprintf(". Lookup terminated by VIDs limit(%d)", initialLimit); @@ -311,18 +308,17 @@ void Selecter::processStepVariants(FtSelectContext& ctx, typename DataHo } template -template +template void Selecter::processVariants(FtSelectContext& ctx, const FtMergeStatuses::Statuses& mergeStatuses) { for (const FtVariantEntry& variant : ctx.variants) { for (auto& step : holder_.steps) { - processStepVariants(ctx, step, variant, ctx.rawResults.size() - 1, mergeStatuses, - std::numeric_limits::max()); + processStepVariants(ctx, step, variant, ctx.rawResults.size() - 1, mergeStatuses, std::numeric_limits::max()); } } } template -template +template void Selecter::processLowRelVariants(FtSelectContext& ctx, const FtMergeStatuses::Statuses& mergeStatuses) { // Add words from low relevancy variants, ordered by length & proc if constexpr (kVariantsWithDifLength) { @@ -352,7 +348,7 @@ void Selecter::processLowRelVariants(FtSelectContext& ctx, const FtMerge if constexpr (kVariantsWithDifLength) { if (variant.GetLenCached() != lastVariantLen) { if (unsigned(targetORLimit) <= ctx.totalORVids) { - if (holder_.cfg_->logLevel >= LogTrace) { + if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { logPrintf(LogInfo, "Terminating on target OR limit. Current vairiant is '%s%s%s'", variant.opts.suff ? "*" : "", variant.pattern, variant.opts.pref ? "*" : ""); } @@ -363,7 +359,7 @@ void Selecter::processLowRelVariants(FtSelectContext& ctx, const FtMerge } else { (void)lastVariantLen; } - if (holder_.cfg_->logLevel >= LogTrace) { + if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { logPrintf(LogInfo, "Handling '%s%s%s' as variant with low relevancy", variant.opts.suff ? "*" : "", variant.pattern, variant.opts.pref ? "*" : ""); } @@ -372,7 +368,7 @@ void Selecter::processLowRelVariants(FtSelectContext& ctx, const FtMerge int remainingLimit = targetORLimit - ctx.totalORVids; if (remainingLimit > 0) { for (auto& step : holder_.steps) { - processStepVariants(ctx, step, variant, variant.rawResultIdx, mergeStatuses, remainingLimit); + processStepVariants(ctx, step, variant, variant.rawResultIdx, mergeStatuses, remainingLimit); } } break; @@ -383,7 +379,7 @@ void Selecter::processLowRelVariants(FtSelectContext& ctx, const FtMerge int remainingLimit = targetANDLimit - res.idsCnt_; if (remainingLimit > 0) { for (auto& step : holder_.steps) { - processStepVariants(ctx, step, variant, variant.rawResultIdx, mergeStatuses, remainingLimit); + processStepVariants(ctx, step, variant, variant.rawResultIdx, mergeStatuses, remainingLimit); } } break; @@ -392,10 +388,11 @@ void Selecter::processLowRelVariants(FtSelectContext& ctx, const FtMerge } } -static double bound(double k, double weight, double boost) noexcept { return (1.0 - weight) + k * boost * weight; } +RX_ALWAYS_INLINE double bound(double k, double weight, double boost) noexcept { return (1.0 - weight) + k * boost * weight; } template -void Selecter::debugMergeStep(const char* msg, int vid, float normBm25, float normDist, int finalRank, int prevRank) { +RX_ALWAYS_INLINE void Selecter::debugMergeStep(const char* msg, int vid, float normBm25, float normDist, int finalRank, + int prevRank) { #ifdef REINDEX_FT_EXTRA_DEBUG if (holder_.cfg_->logLevel < LogTrace) return; @@ -411,9 +408,9 @@ void Selecter::debugMergeStep(const char* msg, int vid, float normBm25, #endif } template -void Selecter::calcFieldBoost(double idf, unsigned long long f, const IdRelType& relid, const FtDslOpts& opts, int termProc, - double& termRank, double& normBm25, bool& dontSkipCurTermRank, h_vector& ranksInFields, - int& field) { +RX_ALWAYS_INLINE void Selecter::calcFieldBoost(double idf, unsigned long long f, const IdRelType& relid, const FtDslOpts& opts, + int termProc, double& termRank, double& normBm25, bool& dontSkipCurTermRank, + h_vector& ranksInFields, int& field) { assertrx(f < holder_.cfg_->fieldsCfg.size()); const auto& fldCfg = holder_.cfg_->fieldsCfg[f]; // raw bm25 @@ -718,7 +715,7 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI if (!termRank) { continue; } - if (holder_.cfg_->logLevel >= LogTrace) { + if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { logPrintf(LogInfo, "Pattern %s, idf %f, termLenBoost %f", r.pattern, idf, rawRes.term.opts.termLenBoost); } @@ -841,7 +838,7 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra auto [termRank, field] = calcTermRank(rawRes, idf, relid, r.proc_); if (!termRank) continue; - if (holder_.cfg_->logLevel >= LogTrace) { + if rx_unlikely (holder_.cfg_->logLevel >= LogTrace) { logPrintf(LogInfo, "Pattern %s, idf %f, termLenBoost %f", r.pattern, idf, rawRes.term.opts.termLenBoost); } @@ -1039,7 +1036,11 @@ void Selecter::TyposHandler::operator()(std::vector& } const uint8_t wordLength = step.suffixes_.word_len_at(wordIdSfx); - const int proc = kTypoProc - tcount * kTypoStepProc / std::max((wordLength - tcount) / 3, 1); + const int proc = + std::max(holder.cfg_->rankingConfig.typo - + tcount * holder.cfg_->rankingConfig.typoPenalty / + std::max((wordLength - tcount) / 3, BaseFTConfig::BaseRankingConfig::kMinProcAfterPenalty), + 1); const auto it = res.foundWords->find(wordTypo.word); if (it == res.foundWords->end() || it->second.first != curRawResultIdx) { const auto& hword = holder.getWordById(wordTypo.word); @@ -1058,18 +1059,18 @@ void Selecter::TyposHandler::operator()(std::vector& if (dontUseMaxTyposForBoth_ && level == 1 && typo.size() != patternSize) return; } }); - if (holder.cfg_->logLevel >= LogInfo) { + if rx_unlikely (holder.cfg_->logLevel >= LogInfo) { logPrintf(LogInfo, "Lookup typos, matched %d typos, with %d vids, skiped %d", matched, vids, skiped); } } } -static unsigned uabs(int a) { return unsigned(std::abs(a)); } +RX_ALWAYS_INLINE unsigned uabs(int a) { return unsigned(std::abs(a)); } template template void Selecter::TyposHandler::logTraceF(int level, const char* fmt, Args&&... args) { - if (logLevel_ >= LogTrace) { + if rx_unlikely (logLevel_ >= LogTrace) { logPrintf(level, fmt, std::forward(args)...); } } @@ -1316,7 +1317,7 @@ typename IDataHolder::MergeData Selecter::mergeResults(std::vectorlogLevel >= LogInfo) { + if rx_unlikely (holder_.cfg_->logLevel >= LogInfo) { logPrintf(LogInfo, "Complex merge (%d patterns): out %d vids", rawResults.size(), merged.size()); } @@ -1337,11 +1338,14 @@ typename IDataHolder::MergeData Selecter::mergeResults(std::vector; -template IDataHolder::MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, const RdxContext&); -template IDataHolder::MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, - const RdxContext&); +template IDataHolder::MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, + const RdxContext&); +template IDataHolder::MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, + const RdxContext&); template class Selecter; -template IDataHolder::MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, const RdxContext&); -template IDataHolder::MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, const RdxContext&); +template IDataHolder::MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, + const RdxContext&); +template IDataHolder::MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, + const RdxContext&); } // namespace reindexer diff --git a/cpp_src/core/ft/ft_fast/selecter.h b/cpp_src/core/ft/ft_fast/selecter.h index f7211469b..0c67aea00 100644 --- a/cpp_src/core/ft/ft_fast/selecter.h +++ b/cpp_src/core/ft/ft_fast/selecter.h @@ -14,7 +14,7 @@ class Selecter { Selecter(DataHolder& holder, size_t fieldSize, bool needArea, int maxAreasInDoc) : holder_(holder), fieldSize_(fieldSize), needArea_(needArea), maxAreasInDoc_(maxAreasInDoc) {} - template + template IDataHolder::MergeData Process(FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext&); private: @@ -204,13 +204,13 @@ class Selecter { } void debugMergeStep(const char* msg, int vid, float normBm25, float normDist, int finalRank, int prevRank); - template + template void processVariants(FtSelectContext&, const FtMergeStatuses::Statuses& mergeStatuses); - template + template void processLowRelVariants(FtSelectContext&, const FtMergeStatuses::Statuses& mergeStatuses); void prepareVariants(std::vector&, RVector* lowRelVariants, size_t termIdx, const std::vector& langs, const FtDSLQuery&, std::vector*); - template + template void processStepVariants(FtSelectContext& ctx, typename DataHolder::CommitStep& step, const FtVariantEntry& variant, unsigned curRawResultIdx, const FtMergeStatuses::Statuses& mergeStatuses, int vidsLimit); diff --git a/cpp_src/core/ft/ft_fuzzy/baseseacher.cc b/cpp_src/core/ft/ft_fuzzy/baseseacher.cc index 31fc4e2a2..45b0c0b82 100644 --- a/cpp_src/core/ft/ft_fuzzy/baseseacher.cc +++ b/cpp_src/core/ft/ft_fuzzy/baseseacher.cc @@ -88,13 +88,13 @@ SearchResult BaseSearcher::Compare(const BaseHolder::Ptr &holder, const FtDSLQue data_size += ParseData(holder, term.pattern, max_id, min_id, rusults, term.opts, 1); if (holder->cfg_.enableTranslit) { - searchers_[0]->GetVariants(term.pattern, data); + searchers_[0]->GetVariants(term.pattern, data, holder->cfg_.rankingConfig.translit); ParseData(holder, data[0].pattern, max_id, min_id, rusults, term.opts, holder->cfg_.startDefaultDecreese); } if (holder->cfg_.enableKbLayout) { data.clear(); - searchers_[1]->GetVariants(term.pattern, data); + searchers_[1]->GetVariants(term.pattern, data, holder->cfg_.rankingConfig.kblayout); ParseData(holder, data[0].pattern, max_id, min_id, rusults, term.opts, holder->cfg_.startDefaultDecreese); } } diff --git a/cpp_src/core/ft/ft_fuzzy/prefilter/prefilter.h b/cpp_src/core/ft/ft_fuzzy/prefilter/prefilter.h index 6c86a770a..c06ca9081 100644 --- a/cpp_src/core/ft/ft_fuzzy/prefilter/prefilter.h +++ b/cpp_src/core/ft/ft_fuzzy/prefilter/prefilter.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include diff --git a/cpp_src/core/ft/ftdsl.cc b/cpp_src/core/ft/ftdsl.cc index a448b4d74..e153c2f87 100644 --- a/cpp_src/core/ft/ftdsl.cc +++ b/cpp_src/core/ft/ftdsl.cc @@ -1,4 +1,3 @@ - #include "core/ft/ftdsl.h" #include #include @@ -9,14 +8,14 @@ namespace reindexer { // Format: see fulltext.md -bool is_term(int ch, const std::string &extraWordSymbols) { +static bool is_term(int ch, const std::string &extraWordSymbols) noexcept { return IsAlpha(ch) || IsDigit(ch) || extraWordSymbols.find(ch) != std::string::npos // wrong kb layout || ch == '[' || ch == ';' || ch == ',' || ch == '.'; } -bool is_dslbegin(int ch, const std::string &extraWordSymbols) { +static bool is_dslbegin(int ch, const std::string &extraWordSymbols) noexcept { return is_term(ch, extraWordSymbols) || ch == '+' || ch == '-' || ch == '*' || ch == '\'' || ch == '\"' || ch == '@' || ch == '=' || ch == '\\'; } @@ -29,9 +28,11 @@ void FtDSLQuery::parse(const std::string &q) { void FtDSLQuery::parse(std::wstring &utf16str) { int groupTermCounter = 0; bool inGroup = false; + bool hasAnythingExceptNot = false; int groupCounter = 0; int maxPatternLen = 1; h_vector fieldsOpts; + std::string utf8str; fieldsOpts.insert(fieldsOpts.end(), std::max(int(fields_.size()), 1), {1.0, false}); for (auto it = utf16str.begin(); it != utf16str.end();) { @@ -141,8 +142,12 @@ void FtDSLQuery::parse(std::wstring &utf16str) { if (endIt != begIt) { fte.pattern.assign(begIt, endIt); - std::string utf8str = utf16_to_utf8(fte.pattern); + utf16_to_utf8(fte.pattern, utf8str); if (is_number(utf8str)) fte.opts.number = true; + if (fte.opts.op != OpNot && groupTermCounter == 0) { + // Setting up this flag before stopWords check, to prevent error on DSL with stop word + NOT + hasAnythingExceptNot = true; + } if (stopWords_.find(utf8str) != stopWords_.end()) { continue; } @@ -157,6 +162,9 @@ void FtDSLQuery::parse(std::wstring &utf16str) { if (inGroup) { throw Error(errParseDSL, "No closing quote in full text search query DSL"); } + if (!hasAnythingExceptNot && size()) { + throw Error(errParams, "Fulltext query can not contain only 'NOT' terms (i.e. terms with minus)"); + } int cnt = 0; for (auto &e : *this) { diff --git a/cpp_src/core/ft/ftsetcashe.h b/cpp_src/core/ft/ftsetcashe.h index 81ab06304..810a06ccd 100644 --- a/cpp_src/core/ft/ftsetcashe.h +++ b/cpp_src/core/ft/ftsetcashe.h @@ -7,17 +7,16 @@ namespace reindexer { struct FtIdSetCacheVal { - FtIdSetCacheVal() : ids(make_intrusive>()) {} - FtIdSetCacheVal(IdSet::Ptr i) noexcept : ids(std::move(i)) {} - FtIdSetCacheVal(IdSet::Ptr i, FtCtx::Data::Ptr c) noexcept : ids(std::move(i)), ctx(std::move(c)) {} + FtIdSetCacheVal() = default; + FtIdSetCacheVal(IdSet::Ptr&& i) noexcept : ids(std::move(i)) {} + FtIdSetCacheVal(IdSet::Ptr&& i, FtCtx::Data::Ptr&& c) noexcept : ids(std::move(i)), ctx(std::move(c)) {} - size_t Size() const noexcept { return ids ? sizeof(*ids.get()) + ids->heap_size() : 0; } + size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } IdSet::Ptr ids; FtCtx::Data::Ptr ctx; }; class FtIdSetCache : public LRUCache {}; -class PreselectedFtIdSetCache : public LRUCache {}; } // namespace reindexer diff --git a/cpp_src/core/ft/idrelset.h b/cpp_src/core/ft/idrelset.h index 8400fb5e8..05ea5f8b7 100644 --- a/cpp_src/core/ft/idrelset.h +++ b/cpp_src/core/ft/idrelset.h @@ -12,6 +12,7 @@ namespace reindexer { typedef uint32_t VDocIdType; +static constexpr int kMaxFtCompositeFields = 63; // the position of the word in the document (the index of the word in the field (pos), the field in which the word field was // encountered (field) @@ -39,12 +40,13 @@ class IdRelType { struct PosType { static const int posBits = 24; PosType() = default; - PosType(int pos, int field) : fpos(pos | (field << posBits)) {} + PosType(int pos, int field) noexcept : fpos(pos | (field << posBits)) {} int pos() const noexcept { return fpos & ((1 << posBits) - 1); } int field() const noexcept { return fpos >> posBits; } bool operator<(PosType other) const noexcept { return fpos < other.fpos; } bool operator==(PosType other) const noexcept { return fpos == other.fpos; } - unsigned fpos; + + uint32_t fpos; }; template @@ -89,12 +91,15 @@ class IdRelType { } void Add(int pos, int field) { + assertrx_throw(0 <= field && field <= kMaxFtCompositeFields); pos_.emplace_back(pos, field); addField(field); } void Add(PosType p) { + const auto field = p.field(); + assertrx_throw(0 <= field && field <= kMaxFtCompositeFields); pos_.emplace_back(p); - addField(p.field()); + addField(field); } void SortAndUnique() { boost::sort::pdqsort(pos_.begin(), pos_.end()); @@ -117,12 +122,7 @@ class IdRelType { size_t HeapSize() const noexcept { return heapSize(pos_); } private: - static constexpr int maxField = 63; - - void addField(int field) noexcept { - assertrx(0 <= field && field <= maxField); - usedFieldsMask_ |= (uint64_t(1) << field); - } + void addField(int field) noexcept { usedFieldsMask_ |= (uint64_t(1) << field); } template size_t heapSize(const T& p) const noexcept { diff --git a/cpp_src/core/ft/numtotext.cc b/cpp_src/core/ft/numtotext.cc index 9a8d86198..fc91c1848 100644 --- a/cpp_src/core/ft/numtotext.cc +++ b/cpp_src/core/ft/numtotext.cc @@ -1,37 +1,33 @@ #include "numtotext.h" -#include -#include #include -#include +#include +#include #include "tools/errors.h" namespace reindexer { -using std::string; -using std::vector; -using std::pair; - -const string units[] = {"", "один", "два", "три", "четыре", "пять", "шесть", "семь", "восемь", "девять"}; -const string unitsNominat[] = {"", "одна", "две"}; -const string tens[] = {"", "одиннадцать", "двенадцать", "тринадцать", "четырнадцать", - "пятнадцать", "шестнадцать", "семнадцать", "восемнадцать", "девятнадцать"}; -const string decades[] = {"", "десять", "двадцать", "тридцать", "сорок", - "пятьдесят", "шестьдесят", "семьдесят", "восемьдесят", "девяносто"}; -const string hundreads[] = {"", "сто", "двести", "триста", "четыреста", "пятьсот", "шестьсот", "семьсот", "восемьсот", "девятьсот"}; -const string thousands[] = {"тысяча", "тысячи", "тысяч"}; -const string millions[] = {"миллион", "миллиона", "миллионов"}; -const string billions[] = {"миллиард", "миллиарда", "миллиардов"}; -const string trillions[] = {"триллион", "триллиона", "триллионов"}; -const string quadrillion[] = {"квадриллион", "квадриллиона", "квадриллионов"}; -const string quintillion[] = {"квинтиллион", "квинтиллиона", "квинтиллионов"}; -const string sextillion[] = {"секстиллион", "секстиллиона", "секстиллионов"}; -const string septillion[] = {"септиллион", "септиллиона", "септиллионов"}; +constexpr std::string_view units[] = {"", "один", "два", "три", "четыре", "пять", "шесть", "семь", "восемь", "девять"}; +constexpr std::string_view unitsNominat[] = {"", "одна", "две"}; +constexpr std::string_view tens[] = {"", "одиннадцать", "двенадцать", "тринадцать", "четырнадцать", + "пятнадцать", "шестнадцать", "семнадцать", "восемнадцать", "девятнадцать"}; +constexpr std::string_view decades[] = {"", "десять", "двадцать", "тридцать", "сорок", + "пятьдесят", "шестьдесят", "семьдесят", "восемьдесят", "девяносто"}; +constexpr std::string_view hundreads[] = {"", "сто", "двести", "триста", "четыреста", + "пятьсот", "шестьсот", "семьсот", "восемьсот", "девятьсот"}; +constexpr std::string_view thousands[] = {"тысяча", "тысячи", "тысяч"}; +constexpr std::string_view millions[] = {"миллион", "миллиона", "миллионов"}; +constexpr std::string_view billions[] = {"миллиард", "миллиарда", "миллиардов"}; +constexpr std::string_view trillions[] = {"триллион", "триллиона", "триллионов"}; +constexpr std::string_view quadrillion[] = {"квадриллион", "квадриллиона", "квадриллионов"}; +constexpr std::string_view quintillion[] = {"квинтиллион", "квинтиллиона", "квинтиллионов"}; +constexpr std::string_view sextillion[] = {"секстиллион", "секстиллиона", "секстиллионов"}; +constexpr std::string_view septillion[] = {"септиллион", "септиллиона", "септиллионов"}; enum Numorders : int { Thousands, Millions, Billions, Trillions, Quadrillion, Quintillion, Sextillion, Septillion }; -const std::string& getNumorder(int numorder, int i) { +static std::string_view getNumorder(int numorder, int i) { switch (numorder) { case Thousands: return thousands[i]; @@ -53,13 +49,14 @@ const std::string& getNumorder(int numorder, int i) { throw Error(errParams, "Incorrect order [%s]: too big", numorder); } -int ansiCharacterToDigit(char ch) { return static_cast(ch - 48); } +RX_ALWAYS_INLINE int ansiCharacterToDigit(char ch) noexcept { return static_cast(ch - 48); } -std::vector getOrders(std::string_view str) { +static std::vector getOrders(std::string_view str) { std::string numStr(str); std::reverse(numStr.begin(), numStr.end()); int numChars = numStr.length(); std::vector orders; + orders.reserve(numChars / 3); for (int i = 0; i < numChars; i += 3) { std::string tempString; if (i <= numChars - 3) { @@ -78,12 +75,12 @@ std::vector getOrders(std::string_view str) { break; } } - orders.push_back(tempString); + orders.emplace_back(std::move(tempString)); } return orders; } -std::vector getDecimal(const string& str, int i) { +static std::vector getDecimal(const std::string& str, int i) { std::vector words; int v = std::stoi(str); if (v < 10) { @@ -102,7 +99,7 @@ std::vector getDecimal(const string& str, int i) { return words; } -std::string getNumOrders(int i, int num) { +static std::string getNumOrders(int i, int num) { std::string orders; if (i > 0) { if (num % 10 > 4 || (num % 100 > 10 && num % 100 < 20) || num % 10 == 0) { @@ -116,7 +113,7 @@ std::string getNumOrders(int i, int num) { return orders; } -std::vector formTextString(const string& str, int i) { +static std::vector formTextString(const std::string& str, int i) { std::vector words; int strlen = str.length(); if (strlen == 3) { @@ -141,8 +138,8 @@ std::vector formTextString(const string& str, int i) { return words; } -vector& NumToText::convert(std::string_view str, std::vector& output) { - output.clear(); +std::vector& NumToText::convert(std::string_view str, std::vector& output) { + output.resize(0); if ((str.length() == 1) && (str[0] == '0')) { output = {"ноль"}; return output; @@ -159,4 +156,5 @@ vector& NumToText::convert(std::string_view str, std::vectorsize() * sizeof(VariantArray::value_type); } + size_t Size() const noexcept { return sizeof(IdSetCacheKey) + keys->size() * sizeof(VariantArray::value_type); } const VariantArray *keys; CondType cond; @@ -57,9 +58,9 @@ T &operator<<(T &os, const IdSetCacheKey &k) { } struct IdSetCacheVal { - IdSetCacheVal() : ids(nullptr) {} - IdSetCacheVal(const IdSet::Ptr &i) : ids(i) {} - size_t Size() const { return ids ? sizeof(*ids.get()) + ids->heap_size() : 0; } + IdSetCacheVal() = default; + IdSetCacheVal(IdSet::Ptr &&i) noexcept : ids(std::move(i)) {} + size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } IdSet::Ptr ids; }; @@ -84,7 +85,7 @@ struct hash_idset_cache_key { class IdSetCache : public LRUCache { public: - void ClearSorted(const std::bitset<64> &s) { + void ClearSorted(const std::bitset &s) { if (s.any()) { Clear([&s](const IdSetCacheKey &k) { return s.test(k.sort); }); } diff --git a/cpp_src/core/index/ft_preselect.h b/cpp_src/core/index/ft_preselect.h index b2be838c2..ebd6aa085 100644 --- a/cpp_src/core/index/ft_preselect.h +++ b/cpp_src/core/index/ft_preselect.h @@ -18,9 +18,8 @@ struct FtMergeStatuses { Statuses statuses; std::vector rowIds; const std::vector* rowId2Vdoc; - std::optional cacheKey; }; -using FtPreselectT = std::variant; +using FtPreselectT = FtMergeStatuses; } // namespace reindexer diff --git a/cpp_src/core/index/index.cc b/cpp_src/core/index/index.cc index 50a0d4dac..552033fc2 100644 --- a/cpp_src/core/index/index.cc +++ b/cpp_src/core/index/index.cc @@ -46,6 +46,7 @@ std::unique_ptr Index::New(const IndexDef& idef, PayloadType payloadType, case IndexInt64Store: case IndexDoubleStore: case IndexBool: + case IndexUuidStore: return IndexStore_New(idef, std::move(payloadType), fields); case IndexFastFT: case IndexCompositeFastFT: @@ -59,9 +60,8 @@ std::unique_ptr Index::New(const IndexDef& idef, PayloadType payloadType, return IndexRTree_New(idef, std::move(payloadType), fields); case IndexUuidHash: return IndexUuid_New(idef, std::move(payloadType), fields); - default: - throw Error(errParams, "Ivalid index type %d for index '%s'", idef.Type(), idef.name_); } + throw Error(errParams, "Ivalid index type %d for index '%s'", idef.Type(), idef.name_); } template diff --git a/cpp_src/core/index/index.h b/cpp_src/core/index/index.h index 0b7164ab6..a30594354 100644 --- a/cpp_src/core/index/index.h +++ b/cpp_src/core/index/index.h @@ -102,7 +102,7 @@ class Index { assertrx(0); abort(); } - virtual reindexer::FtPreselectT FtPreselect(const QueryEntries&, int /*idxNo*/, const SelectFunction&, const RdxContext&) { + virtual reindexer::FtPreselectT FtPreselect(const RdxContext&) { assertrx(0); abort(); } @@ -120,7 +120,7 @@ class Index { } virtual bool HoldsStrings() const noexcept = 0; virtual void ClearCache() {} - virtual void ClearCache(const std::bitset<64>&) {} + virtual void ClearCache(const std::bitset&) {} virtual bool IsBuilt() const noexcept { return isBuilt_; } virtual void MarkBuilt() noexcept { isBuilt_ = true; } virtual void EnableUpdatesCountingMode(bool) noexcept {} diff --git a/cpp_src/core/index/indexiterator.h b/cpp_src/core/index/indexiterator.h index 920ae2f45..2ed2fcb0b 100644 --- a/cpp_src/core/index/indexiterator.h +++ b/cpp_src/core/index/indexiterator.h @@ -9,11 +9,11 @@ class IndexIteratorBase { public: virtual ~IndexIteratorBase() = default; virtual void Start(bool reverse) = 0; - virtual IdType Value() const = 0; - virtual bool Next() = 0; - virtual void ExcludeLastSet() = 0; + virtual IdType Value() const noexcept = 0; + virtual bool Next() noexcept = 0; + virtual void ExcludeLastSet() noexcept = 0; virtual size_t GetMaxIterations(size_t limitIters) noexcept = 0; - virtual void SetMaxIterations(size_t iters) = 0; + virtual void SetMaxIterations(size_t iters) noexcept = 0; }; class IndexIterator : public intrusive_atomic_rc_wrapper { diff --git a/cpp_src/core/index/indexordered.cc b/cpp_src/core/index/indexordered.cc index 08859b62c..828030b1b 100644 --- a/cpp_src/core/index/indexordered.cc +++ b/cpp_src/core/index/indexordered.cc @@ -235,9 +235,10 @@ static std::unique_ptr IndexOrdered_New(const IndexDef &idef, PayloadType case IndexTtl: case IndexRTree: case IndexUuidHash: - default: - abort(); + case IndexUuidStore: + break; } + std::abort(); } // NOLINTBEGIN(*cplusplus.NewDeleteLeaks) diff --git a/cpp_src/core/index/indexstore.cc b/cpp_src/core/index/indexstore.cc index 43654c377..27fe6239f 100644 --- a/cpp_src/core/index/indexstore.cc +++ b/cpp_src/core/index/indexstore.cc @@ -106,6 +106,41 @@ SelectKeyResults IndexStore::SelectKey(const VariantArray &keys, CondType con if (condition == CondAny && !this->opts_.IsArray() && !this->opts_.IsSparse() && !sopts.distinct) throw Error(errParams, "The 'NOT NULL' condition is suported only by 'sparse' or 'array' indexes"); + // TODO: it may be necessary to remove or change this switch after QueryEntry refactoring + switch (condition) { + case CondAny: + if (!this->opts_.IsArray() && !this->opts_.IsSparse() && !sopts.distinct) { + throw Error(errParams, "The 'NOT NULL' condition is suported only by 'sparse' or 'array' indexes"); + } + break; + case CondEmpty: + if (!this->opts_.IsArray() && !this->opts_.IsSparse()) { + throw Error(errParams, "The 'is NULL' condition is suported only by 'sparse' or 'array' indexes"); + } + break; + case CondAllSet: + case CondSet: + case CondEq: + break; + case CondRange: + case CondDWithin: + if (keys.size() != 2) { + throw Error(errParams, "For condition %s required exactly 2 arguments, but provided %d", CondTypeToStr(condition), + keys.size()); + } + break; + case CondLt: + case CondLe: + case CondGt: + case CondGe: + case CondLike: + if (keys.size() != 1) { + throw Error(errParams, "For condition %s required exactly 1 argument, but provided %d", CondTypeToStr(condition), + keys.size()); + } + break; + } + res.comparators_.push_back(Comparator(condition, KeyType(), keys, opts_.IsArray(), sopts.distinct, payloadType_, fields_, idx_data.size() ? idx_data.data() : nullptr, opts_.collateOpts_)); return SelectKeyResults(std::move(res)); @@ -160,6 +195,8 @@ std::unique_ptr IndexStore_New(const IndexDef &idef, PayloadType payloadT return std::unique_ptr{new IndexStore(idef, std::move(payloadType), fields)}; case IndexStrStore: return std::unique_ptr{new IndexStore(idef, std::move(payloadType), fields)}; + case IndexUuidStore: + return std::unique_ptr{new IndexStore(idef, std::move(payloadType), fields)}; case IndexStrHash: case IndexStrBTree: case IndexIntBTree: @@ -176,9 +213,9 @@ std::unique_ptr IndexStore_New(const IndexDef &idef, PayloadType payloadT case IndexTtl: case IndexRTree: case IndexUuidHash: - default: - abort(); + break; } + std::abort(); } template class IndexStore; diff --git a/cpp_src/core/index/indexstore.h b/cpp_src/core/index/indexstore.h index 3a25b48b6..832c11a4f 100644 --- a/cpp_src/core/index/indexstore.h +++ b/cpp_src/core/index/indexstore.h @@ -27,6 +27,7 @@ class IndexStore : public Index { void Dump(std::ostream &os, std::string_view step = " ", std::string_view offset = "") const override { dump(os, step, offset); } virtual void AddDestroyTask(tsl::detail_sparse_hash::ThreadTaskQueue &) override; virtual bool IsDestroyPartSupported() const noexcept override { return true; } + virtual bool IsUuid() const noexcept override final { return std::is_same_v; } template struct HasAddTask : std::false_type {}; diff --git a/cpp_src/core/index/indextext/fastindextext.cc b/cpp_src/core/index/indextext/fastindextext.cc index d46cb0e4d..36db2dc49 100644 --- a/cpp_src/core/index/indextext/fastindextext.cc +++ b/cpp_src/core/index/indextext/fastindextext.cc @@ -46,7 +46,7 @@ void FastIndexText::initHolder(FtFastConfig &cfg) { template Variant FastIndexText::Upsert(const Variant &key, IdType id, bool &clearCache) { - if (key.Type().Is()) { + if rx_unlikely (key.Type().Is()) { if (this->empty_ids_.Unsorted().Add(id, IdSet::Auto, 0)) { this->isBuilt_ = false; } @@ -64,7 +64,6 @@ Variant FastIndexText::Upsert(const Variant &key, IdType id, bool &clearCache if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, 0)) { this->isBuilt_ = false; if (this->cache_ft_) this->cache_ft_->Clear(); - if (this->preselected_cache_ft_) this->preselected_cache_ft_->Clear(); clearCache = true; } this->addMemStat(keyIt); @@ -79,7 +78,7 @@ Variant FastIndexText::Upsert(const Variant &key, IdType id, bool &clearCache template void FastIndexText::Delete(const Variant &key, IdType id, StringsHolder &strHolder, bool &clearCache) { int delcnt = 0; - if (key.Type().Is()) { + if rx_unlikely (key.Type().Is()) { delcnt = this->empty_ids_.Unsorted().Erase(id); assertrx(delcnt); this->isBuilt_ = false; @@ -117,7 +116,6 @@ void FastIndexText::Delete(const Variant &key, IdType id, StringsHolder &strH IndexStore>::Delete(key, id, strHolder, clearCache); } if (this->cache_ft_) this->cache_ft_->Clear(); - if (this->preselected_cache_ft_) this->preselected_cache_ft_->Clear(); clearCache = true; } @@ -128,70 +126,78 @@ IndexMemStat FastIndexText::GetMemStat(const RdxContext &ctx) { contexted_shared_lock lck(this->mtx_, &ctx); ret.fulltextSize = this->holder_->GetMemStat(); if (this->cache_ft_) ret.idsetCache = this->cache_ft_->GetMemStat(); - if (this->preselected_cache_ft_) ret.idsetCache += this->preselected_cache_ft_->GetMemStat(); return ret; } template IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTransaction, FtMergeStatuses &&statuses, - bool mergeStatusesEmpty, const RdxContext &rdxCtx) { - fctx->GetData()->extraWordSymbols_ = this->GetConfig()->extraWordSymbols; + FtUseExternStatuses useExternSt, const RdxContext &rdxCtx) { + fctx->GetData()->extraWordSymbols_ = this->getConfig()->extraWordSymbols; fctx->GetData()->isWordPositions_ = true; - auto mergeData = this->holder_->Select(std::move(dsl), this->fields_.size(), fctx->NeedArea(), GetConfig()->maxAreasInDoc, - inTransaction, std::move(statuses.statuses), mergeStatusesEmpty, rdxCtx); + auto mergeData = this->holder_->Select(std::move(dsl), this->fields_.size(), fctx->NeedArea(), getConfig()->maxAreasInDoc, + inTransaction, std::move(statuses.statuses), useExternSt, rdxCtx); // convert vids(uniq documents id) to ids (real ids) IdSet::Ptr mergedIds = make_intrusive>(); auto &holder = *this->holder_; - auto &vdocs = holder.vdocs_; if (mergeData.empty()) { return mergedIds; } int cnt = 0; const double scalingFactor = mergeData.maxRank > 255 ? 255.0 / mergeData.maxRank : 1.0; - int minRelevancy = GetConfig()->minRelevancy * 100 * scalingFactor; + const int minRelevancy = getConfig()->minRelevancy * 100 * scalingFactor; + size_t releventDocs = 0; for (auto &vid : mergeData) { - assertrx(vid.id < int(vdocs.size())); - if (!vdocs[vid.id].keyEntry) { + auto &vdoc = holder.vdocs_[vid.id]; + if (!vdoc.keyEntry) { continue; } vid.proc *= scalingFactor; if (vid.proc <= minRelevancy) break; - cnt += vdocs[vid.id].keyEntry->Sorted(0).size(); + + assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); + cnt += vdoc.keyEntry->Sorted(0).size(); + ++releventDocs; } mergedIds->reserve(cnt); fctx->Reserve(cnt); - for (auto &vid : mergeData) { - auto id = vid.id; - assertrx(id < IdType(vdocs.size())); - - if (!vdocs[id].keyEntry) { - continue; + if (!fctx->NeedArea()) { + if (useExternSt == FtUseExternStatuses::No) { + appendMergedIds(mergeData, releventDocs, + [&fctx, &mergedIds](IdSetRef::iterator ebegin, IdSetRef::iterator eend, const IDataHolder::MergeInfo &vid) { + fctx->Add(ebegin, eend, vid.proc); + mergedIds->Append(ebegin, eend, IdSet::Unordered); + }); + } else { + appendMergedIds( + mergeData, releventDocs, + [&fctx, &mergedIds, &statuses](IdSetRef::iterator ebegin, IdSetRef::iterator eend, const IDataHolder::MergeInfo &vid) { + fctx->Add(ebegin, eend, vid.proc, statuses.rowIds); + mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); + }); } - assertrx(!vdocs[id].keyEntry->Unsorted().empty()); - if (vid.proc <= minRelevancy) break; - if (mergeStatusesEmpty) { - if (vid.areaIndex == std::numeric_limits::max()) { - fctx->Add(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), vid.proc); - } else { - fctx->Add(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), vid.proc, - std::move(mergeData.vectorAreas[vid.areaIndex])); - } - mergedIds->Append(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), IdSet::Unordered); + } else { + if (useExternSt == FtUseExternStatuses::No) { + appendMergedIds( + mergeData, releventDocs, + [&fctx, &mergedIds, &mergeData](IdSetRef::iterator ebegin, IdSetRef::iterator eend, const IDataHolder::MergeInfo &vid) { + assertrx_throw(vid.areaIndex != std::numeric_limits::max()); + fctx->Add(ebegin, eend, vid.proc, std::move(mergeData.vectorAreas[vid.areaIndex])); + mergedIds->Append(ebegin, eend, IdSet::Unordered); + }); } else { - if (vid.areaIndex == std::numeric_limits::max()) { - fctx->Add(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), vid.proc, statuses.rowIds); - } else { - fctx->Add(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), vid.proc, statuses.rowIds, - std::move(mergeData.vectorAreas[vid.areaIndex])); - } - mergedIds->Append(vdocs[id].keyEntry->Sorted(0).begin(), vdocs[id].keyEntry->Sorted(0).end(), statuses.rowIds, - IdSet::Unordered); + appendMergedIds(mergeData, releventDocs, + [&fctx, &mergedIds, &mergeData, &statuses](IdSetRef::iterator ebegin, IdSetRef::iterator eend, + const IDataHolder::MergeInfo &vid) { + assertrx_throw(vid.areaIndex != std::numeric_limits::max()); + fctx->Add(ebegin, eend, vid.proc, statuses.rowIds, std::move(mergeData.vectorAreas[vid.areaIndex])); + mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); + }); } } - if (GetConfig()->logLevel >= LogInfo) { + if rx_unlikely (getConfig()->logLevel >= LogInfo) { logPrintf(LogInfo, "Total merge out: %d ids", mergedIds->size()); std::string str; @@ -210,7 +216,7 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr } logPrintf(LogInfo, "Relevancy(%d): %s", fctx->GetSize(), str); } - assertrx(mergedIds->size() == fctx->GetSize()); + assertrx_throw(mergedIds->size() == fctx->GetSize()); return mergedIds; } template @@ -243,8 +249,8 @@ void FastIndexText::commitFulltextImpl() { } } } - auto tm2 = high_resolution_clock::now(); - if (GetConfig()->logLevel >= LogInfo) { + if rx_unlikely (getConfig()->logLevel >= LogInfo) { + auto tm2 = high_resolution_clock::now(); logPrintf(LogInfo, "FastIndexText::Commit elapsed %d ms total [ build vdocs %d ms, process data %d ms ]", duration_cast(tm2 - tm0).count(), duration_cast(tm1 - tm0).count(), duration_cast(tm2 - tm1).count()); @@ -295,7 +301,7 @@ void FastIndexText::buildVdocs(Container &data) { vdocs.push_back({doc->second.get(), {}, {}}); #endif - if (GetConfig()->logLevel <= LogInfo) { + if rx_unlikely (getConfig()->logLevel <= LogInfo) { for (auto &f : vdocsTexts.back()) this->holder_->szCnt += f.first.length(); } } @@ -305,9 +311,18 @@ void FastIndexText::buildVdocs(Container &data) { } template -FtFastConfig *FastIndexText::GetConfig() const { - return dynamic_cast(this->cfg_.get()); +template +RX_ALWAYS_INLINE void FastIndexText::appendMergedIds(IDataHolder::MergeData &mergeData, size_t releventDocs, F &&appender) { + auto &holder = *this->holder_; + for (size_t i = 0; i < releventDocs; ++i) { + auto &vid = mergeData[i]; + auto &vdoc = holder.vdocs_[vid.id]; + if (vdoc.keyEntry) { + appender(vdoc.keyEntry->Sorted(0).begin(), vdoc.keyEntry->Sorted(0).end(), vid); + } + } } + template void FastIndexText::initConfig(const FtFastConfig *cfg) { if (cfg) { @@ -316,7 +331,7 @@ void FastIndexText::initConfig(const FtFastConfig *cfg) { this->cfg_.reset(new FtFastConfig(this->ftFields_.size())); this->cfg_->parse(this->opts_.config, this->ftFields_); } - initHolder(*GetConfig()); // -V522 + initHolder(*getConfig()); // -V522 this->holder_->synonyms_->SetConfig(this->cfg_.get()); } @@ -327,9 +342,9 @@ bool eq_c(Container &c1, Container &c2) { template void FastIndexText::SetOpts(const IndexOpts &opts) { - auto oldCfg = *GetConfig(); + auto oldCfg = *getConfig(); IndexText::SetOpts(opts); - auto &newCfg = *GetConfig(); + auto &newCfg = *getConfig(); if (!eq_c(oldCfg.stopWords, newCfg.stopWords) || oldCfg.stemmers != newCfg.stemmers || oldCfg.maxTypoLen != newCfg.maxTypoLen || oldCfg.enableNumbersSearch != newCfg.enableNumbersSearch || oldCfg.extraWordSymbols != newCfg.extraWordSymbols || @@ -343,32 +358,19 @@ void FastIndexText::SetOpts(const IndexOpts &opts) { } this->holder_->status_ = FullRebuild; if (this->cache_ft_) this->cache_ft_->Clear(); - if (this->preselected_cache_ft_) this->preselected_cache_ft_->Clear(); for (auto &idx : this->idx_map) idx.second.SetVDocID(FtKeyEntryData::ndoc); } else { logPrintf(LogInfo, "FulltextIndex config changed, cache cleared"); if (this->cache_ft_) this->cache_ft_->Clear(); - if (this->preselected_cache_ft_) this->preselected_cache_ft_->Clear(); } this->holder_->synonyms_->SetConfig(&newCfg); } template -reindexer::FtPreselectT FastIndexText::FtPreselect(const QueryEntries &qentries, int idxNo, const SelectFunction &fnCtx, - const RdxContext &rdxCtx) { - WrSerializer ser; - qentries.Serialize(ser); - QueryCacheKey ckey{ser}; +reindexer::FtPreselectT FastIndexText::FtPreselect(const RdxContext &rdxCtx) { this->build(rdxCtx); - auto cacheIt = this->preselected_cache_ft_->Get(ckey); - if (cacheIt.valid && cacheIt.val.ids->size()) { - if (cacheIt.val.ctx->need_area_ || !fnCtx.NeedArea(idxNo)) { - return {std::move(cacheIt)}; - } - } return FtMergeStatuses{FtMergeStatuses::Statuses(holder_->vdocs_.size(), FtMergeStatuses::kExcluded), - std::vector(holder_->rowId2Vdoc_.size(), false), &holder_->rowId2Vdoc_, - cacheIt.valid ? std::optional{std::move(ckey)} : std::nullopt}; + std::vector(holder_->rowId2Vdoc_.size(), false), &holder_->rowId2Vdoc_}; } std::unique_ptr FastIndexText_New(const IndexDef &idef, PayloadType payloadType, const FieldsSet &fields) { @@ -396,9 +398,10 @@ std::unique_ptr FastIndexText_New(const IndexDef &idef, PayloadType paylo case IndexTtl: case IndexRTree: case IndexUuidHash: - default: - abort(); + case IndexUuidStore: + break; } + std::abort(); } } // namespace reindexer diff --git a/cpp_src/core/index/indextext/fastindextext.h b/cpp_src/core/index/indextext/fastindextext.h index 6878c59ff..5a6ebb50d 100644 --- a/cpp_src/core/index/indextext/fastindextext.h +++ b/cpp_src/core/index/indextext/fastindextext.h @@ -17,7 +17,7 @@ class FastIndexText : public IndexText { using ref_type = typename IndexUnordered::ref_type; FastIndexText(const FastIndexText& other) : Base(other) { - initConfig(other.GetConfig()); + initConfig(other.getConfig()); for (auto& idx : this->idx_map) idx.second.SetVDocID(FtKeyEntryData::ndoc); this->CommitFulltext(); } @@ -26,7 +26,7 @@ class FastIndexText : public IndexText { initConfig(); } std::unique_ptr Clone() const override { return std::unique_ptr{new FastIndexText(*this)}; } - IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, bool mergeStatusesEmpty, + IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, FtUseExternStatuses, const RdxContext&) override final; IndexMemStat GetMemStat(const RdxContext&) override final; Variant Upsert(const Variant& key, IdType id, bool& clearCache) override final; @@ -35,20 +35,21 @@ class FastIndexText : public IndexText { FtMergeStatuses GetFtMergeStatuses(const RdxContext& rdxCtx) override final { this->build(rdxCtx); return {FtMergeStatuses::Statuses(holder_->vdocs_.size(), 0), std::vector(holder_->rowId2Vdoc_.size(), false), - &holder_->rowId2Vdoc_, std::nullopt}; + &holder_->rowId2Vdoc_}; } - reindexer::FtPreselectT FtPreselect(const QueryEntries& qentries, int idxNo, const SelectFunction& fnCtx, - const RdxContext& rdxCtx) override final; - bool EnablePreselectBeforeFt() const override final { return GetConfig()->enablePreselectBeforeFt; } + reindexer::FtPreselectT FtPreselect(const RdxContext& rdxCtx) override final; + bool EnablePreselectBeforeFt() const override final { return getConfig()->enablePreselectBeforeFt; } protected: void commitFulltextImpl() override final; - FtFastConfig* GetConfig() const; + FtFastConfig* getConfig() const noexcept { return dynamic_cast(this->cfg_.get()); } void initConfig(const FtFastConfig* = nullptr); void initHolder(FtFastConfig&); - template void buildVdocs(Data& data); + template + void appendMergedIds(IDataHolder::MergeData& merged, size_t releventDocs, F&& appender); + std::unique_ptr holder_; }; diff --git a/cpp_src/core/index/indextext/fieldsgetter.h b/cpp_src/core/index/indextext/fieldsgetter.h index 89f17ca1b..791932b27 100644 --- a/cpp_src/core/index/indextext/fieldsgetter.h +++ b/cpp_src/core/index/indextext/fieldsgetter.h @@ -1,4 +1,5 @@ #pragma once +#include "core/ft/usingcontainer.h" #include "core/index/payload_map.h" #include "core/payload/fieldsset.h" #include "vendor/utf8cpp/utf8.h" @@ -9,7 +10,6 @@ class FieldsGetter { public: FieldsGetter(const FieldsSet &fields, const PayloadType &plt, KeyValueType type) : fields_(fields), plt_(plt), type_(type) {} - RVector, 8> getDocFields(const key_string &doc, std::vector> &) { if (!utf8::is_valid(doc->cbegin(), doc->cend())) throw Error(errParams, "Invalid UTF8 string in FullText index"); @@ -19,8 +19,8 @@ class FieldsGetter { VariantArray krefs; // Specific implemetation for composite index - - RVector, 8> getDocFields(const PayloadValue &doc, std::vector> &strsBuf) { + RVector, 8> getDocFields(const PayloadValue &doc, + std::vector> &strsBuf) { ConstPayload pl(plt_, doc); uint32_t fieldPos = 0; @@ -29,7 +29,7 @@ class FieldsGetter { RVector, 8> ret; for (auto field : fields_) { - krefs.resize(0); + krefs.clear(); bool fieldFromCjson = (field == IndexValueType::SetByJsonPath); if (fieldFromCjson) { assertrx(tagsPathIdx < fields_.getTagsPathsLength()); @@ -39,12 +39,13 @@ class FieldsGetter { } for (const Variant &kref : krefs) { if (!kref.Type().Is()) { - strsBuf.emplace_back(std::unique_ptr(new std::string(kref.As()))); - ret.emplace_back(*strsBuf.back().get(), fieldPos); + auto &str = strsBuf.emplace_back(std::make_unique(kref.As())); + ret.emplace_back(*str, fieldPos); } else { const std::string_view stringRef(kref); - if (!utf8::is_valid(stringRef.data(), stringRef.data() + stringRef.size())) + if (rx_likely(!utf8::is_valid(stringRef.data(), stringRef.data() + stringRef.size()))) { throw Error(errParams, "Invalid UTF8 string in FullTextindex"); + } ret.emplace_back(stringRef, fieldPos); } } diff --git a/cpp_src/core/index/indextext/fuzzyindextext.cc b/cpp_src/core/index/indextext/fuzzyindextext.cc index fd09d1b12..339ac1252 100644 --- a/cpp_src/core/index/indextext/fuzzyindextext.cc +++ b/cpp_src/core/index/indextext/fuzzyindextext.cc @@ -7,10 +7,10 @@ namespace reindexer { template -IdSet::Ptr FuzzyIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, bool mergeStatusesEmpty, - const RdxContext& rdxCtx) { - assertrx(mergeStatusesEmpty); - (void)mergeStatusesEmpty; +IdSet::Ptr FuzzyIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, + FtUseExternStatuses withExternSt, const RdxContext& rdxCtx) { + assertrx_throw(withExternSt == FtUseExternStatuses::No); + (void)withExternSt; auto result = engine_.Search(dsl, inTransaction, rdxCtx); auto mergedIds = make_intrusive>(); @@ -24,7 +24,7 @@ IdSet::Ptr FuzzyIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inT size_t counter = 0; for (auto it = result.data_->begin(); it != result.data_->end(); ++it, ++counter) { it->proc_ *= coof; - if (it->proc_ < GetConfig()->minOkProc) continue; + if (it->proc_ < getConfig()->minOkProc) continue; assertrx(it->id_ < this->vdocs_.size()); const auto& id_set = this->vdocs_[it->id_].keyEntry->Sorted(0); fctx->Add(id_set.begin(), id_set.end(), it->proc_); @@ -54,10 +54,7 @@ void FuzzyIndexText::commitFulltextImpl() { engine_.Commit(); this->isBuilt_ = true; } -template -FtFuzzyConfig* FuzzyIndexText::GetConfig() const { - return dynamic_cast(this->cfg_.get()); -} + template void FuzzyIndexText::CreateConfig(const FtFuzzyConfig* cfg) { if (cfg) { @@ -94,9 +91,10 @@ std::unique_ptr FuzzyIndexText_New(const IndexDef& idef, PayloadType payl case IndexTtl: case IndexRTree: case IndexUuidHash: - default: - abort(); + case IndexUuidStore: + break; } + std::abort(); } } // namespace reindexer diff --git a/cpp_src/core/index/indextext/fuzzyindextext.h b/cpp_src/core/index/indextext/fuzzyindextext.h index 9f0e4f816..d1a94780a 100644 --- a/cpp_src/core/index/indextext/fuzzyindextext.h +++ b/cpp_src/core/index/indextext/fuzzyindextext.h @@ -13,7 +13,7 @@ class FuzzyIndexText : public IndexText { using Base = IndexText; public: - FuzzyIndexText(const FuzzyIndexText& other) : Base(other) { CreateConfig(other.GetConfig()); } + FuzzyIndexText(const FuzzyIndexText& other) : Base(other) { CreateConfig(other.getConfig()); } FuzzyIndexText(const IndexDef& idef, PayloadType payloadType, const FieldsSet& fields) : Base(idef, std::move(payloadType), fields) { CreateConfig(); @@ -25,7 +25,7 @@ class FuzzyIndexText : public IndexText { abort(); } std::unique_ptr Clone() const override final { return std::unique_ptr{new FuzzyIndexText(*this)}; } - IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, bool mergeStatusesEmpty, + IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, FtUseExternStatuses, const RdxContext&) override final; Variant Upsert(const Variant& key, IdType id, bool& clearCache) override final { this->isBuilt_ = false; @@ -37,12 +37,12 @@ class FuzzyIndexText : public IndexText { } FtMergeStatuses GetFtMergeStatuses(const RdxContext& rdxCtx) override final { this->build(rdxCtx); - return {{}, {}, nullptr, std::nullopt}; + return {{}, {}, nullptr}; } protected: void commitFulltextImpl() override final; - FtFuzzyConfig* GetConfig() const; + FtFuzzyConfig* getConfig() const noexcept { return dynamic_cast(this->cfg_.get()); } void CreateConfig(const FtFuzzyConfig* cfg = nullptr); search_engine::SearchEngine engine_; diff --git a/cpp_src/core/index/indextext/indextext.cc b/cpp_src/core/index/indextext/indextext.cc index 6550b4ea8..4a8a646db 100644 --- a/cpp_src/core/index/indextext/indextext.cc +++ b/cpp_src/core/index/indextext/indextext.cc @@ -9,10 +9,7 @@ namespace reindexer { template -IndexText::IndexText(const IndexText &other) - : IndexUnordered(other), - cache_ft_(std::make_shared()), - preselected_cache_ft_(std::make_shared()) { +IndexText::IndexText(const IndexText &other) : IndexUnordered(other), cache_ft_(std::make_shared()) { initSearchers(); } // Generic implemetation for string index @@ -26,11 +23,18 @@ void IndexText::initSearchers() { auto fieldIdx = this->fields_[i]; if (fieldIdx == IndexValueType::SetByJsonPath) { assertrx(jsonPathIdx < this->fields_.getJsonPathsLength()); - ftFields_.insert({this->fields_.getJsonPath(jsonPathIdx++), i}); + ftFields_.emplace(this->fields_.getJsonPath(jsonPathIdx++), i); } else { - ftFields_.insert({this->payloadType_->Field(fieldIdx).Name(), i}); + ftFields_.emplace(this->payloadType_->Field(fieldIdx).Name(), i); } } + if rx_unlikely (ftFields_.size() != this->fields_.size()) { + throw Error(errParams, "Composite fulltext index '%s' contains duplicated fields", this->name_); + } + if rx_unlikely (ftFields_.size() > kMaxFtCompositeFields) { + throw Error(errParams, "Unable to create composite fulltext '%s' index with %d fields. Fileds count limit is %d", this->name_, + ftFields_.size(), kMaxFtCompositeFields); + } } } @@ -54,7 +58,7 @@ void IndexText::SetOpts(const IndexOpts &opts) { template FtCtx::Ptr IndexText::prepareFtCtx(const BaseFunctionCtx::Ptr &ctx) { FtCtx::Ptr ftctx = reindexer::reinterpret_pointer_cast(ctx); - if (!ftctx) { + if rx_unlikely (!ftctx) { throw Error(errParams, "Full text index (%s) may not be used without context", Index::Name()); } ftctx->PrepareAreas(ftFields_, this->name_); @@ -79,7 +83,7 @@ template SelectKeyResults IndexText::SelectKey(const VariantArray &keys, CondType condition, SortType, Index::SelectOpts opts, const BaseFunctionCtx::Ptr &ctx, const RdxContext &rdxCtx) { const auto indexWard(rdxCtx.BeforeIndexWork()); - if (keys.size() < 1 || (condition != CondEq && condition != CondSet)) { + if rx_unlikely (keys.size() < 1 || (condition != CondEq && condition != CondSet)) { throw Error(errParams, "Full text index (%s) support only EQ or SET condition with 1 or 2 parameter", Index::Name()); } @@ -89,37 +93,38 @@ SelectKeyResults IndexText::SelectKey(const VariantArray &keys, CondType cond IdSetCacheKey ckey{keys, condition, 0}; auto cache_ft = cache_ft_->Get(ckey); if (cache_ft.valid) { - if (!cache_ft.val.ids->size() || (ftctx->NeedArea() && !cache_ft.val.ctx->need_area_)) { + if (!cache_ft.val.ids) { + needPutCache = true; + } else if (ftctx->NeedArea() && (!cache_ft.val.ctx || !cache_ft.val.ctx->need_area_)) { needPutCache = true; } else { - return resultFromCache(keys, cache_ft, std::move(ftctx)); + return resultFromCache(keys, std::move(cache_ft), ftctx); } } - return doSelectKey(keys, *cache_ft_, needPutCache ? std::optional{std::move(ckey)} : std::nullopt, std::move(mergeStatuses), - opts.inTransaction, std::move(ftctx), rdxCtx); + return doSelectKey(keys, needPutCache ? std::optional{std::move(ckey)} : std::nullopt, std::move(mergeStatuses), + FtUseExternStatuses::No, opts.inTransaction, std::move(ftctx), rdxCtx); } template -template -SelectKeyResults IndexText::resultFromCache(const VariantArray &keys, const CacheIt &it, const FtCtx::Ptr &ftctx) { - if (cfg_->logLevel >= LogInfo) { +SelectKeyResults IndexText::resultFromCache(const VariantArray &keys, FtIdSetCache::Iterator &&it, FtCtx::Ptr &ftctx) { + if rx_unlikely (cfg_->logLevel >= LogInfo) { logPrintf(LogInfo, "Get search results for '%s' in '%s' from cache", keys[0].As(), this->payloadType_ ? this->payloadType_->Name() : ""); } - SelectKeyResult res; - res.push_back(SingleSelectKeyResult(it.val.ids)); - SelectKeyResults r(std::move(res)); + SelectKeyResults r; + auto &res = r.emplace_back(); + res.emplace_back(std::move(it.val.ids)); + assertrx(it.val.ctx); - ftctx->SetData(it.val.ctx); + ftctx->SetData(std::move(it.val.ctx)); return r; } template -template -SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, Cache &cache, std::optional ckey, - FtMergeStatuses &&mergeStatuses, bool inTransaction, FtCtx::Ptr ftctx, - const RdxContext &rdxCtx) { - if (cfg_->logLevel >= LogInfo) { +SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, const std::optional &ckey, + FtMergeStatuses &&mergeStatuses, FtUseExternStatuses useExternSt, bool inTransaction, + FtCtx::Ptr ftctx, const RdxContext &rdxCtx) { + if rx_unlikely (cfg_->logLevel >= LogInfo) { logPrintf(LogInfo, "Searching for '%s' in '%s' %s", keys[0].As(), this->payloadType_ ? this->payloadType_->Name() : "", ckey ? "(will cache)" : ""); } @@ -128,10 +133,10 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, Cache &cach FtDSLQuery dsl(this->ftFields_, this->cfg_->stopWords, this->cfg_->extraWordSymbols); dsl.parse(keys[0].As()); - auto mergedIds = Select(ftctx, std::move(dsl), inTransaction, std::move(mergeStatuses), std::is_same_v, rdxCtx); + IdSet::Ptr mergedIds = Select(ftctx, std::move(dsl), inTransaction, std::move(mergeStatuses), useExternSt, rdxCtx); SelectKeyResult res; if (mergedIds) { - bool need_put = ckey.has_value(); + bool need_put = (useExternSt == FtUseExternStatuses::No) && ckey.has_value(); if (ftctx->NeedArea() && need_put && mergedIds->size()) { auto config = dynamic_cast(cfg_.get()); if (config && config->maxTotalAreasToCache >= 0) { @@ -153,10 +158,10 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, Cache &cach d->area_[area.second].Commit(); } } - cache.Put(*ckey, FtIdSetCacheVal{mergedIds, std::move(d)}); + cache_ft_->Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(d)}); } - res.push_back(SingleSelectKeyResult(std::move(mergedIds))); + res.emplace_back(std::move(mergedIds)); } return SelectKeyResults(std::move(res)); } @@ -165,19 +170,10 @@ template SelectKeyResults IndexText::SelectKey(const VariantArray &keys, CondType condition, Index::SelectOpts opts, const BaseFunctionCtx::Ptr &ctx, FtPreselectT &&preselect, const RdxContext &rdxCtx) { const auto indexWard(rdxCtx.BeforeIndexWork()); - if (keys.size() < 1 || (condition != CondEq && condition != CondSet)) { + if rx_unlikely (keys.size() < 1 || (condition != CondEq && condition != CondSet)) { throw Error(errParams, "Full text index (%s) support only EQ or SET condition with 1 or 2 parameter", Index::Name()); } - - FtCtx::Ptr ftctx = prepareFtCtx(ctx); - auto res = std::visit(overloaded{[&](FtMergeStatuses &mergeStatuses) { - auto ckey = std::move(mergeStatuses.cacheKey); - return doSelectKey(keys, *preselected_cache_ft_, std::move(ckey), std::move(mergeStatuses), - opts.inTransaction, std::move(ftctx), rdxCtx); - }, - [&](PreselectedFtIdSetCache::Iterator &it) { return resultFromCache(keys, it, std::move(ftctx)); }}, - preselect); - return res; + return doSelectKey(keys, std::nullopt, std::move(preselect), FtUseExternStatuses::Yes, opts.inTransaction, prepareFtCtx(ctx), rdxCtx); } template diff --git a/cpp_src/core/index/indextext/indextext.h b/cpp_src/core/index/indextext/indextext.h index 1fd9986f3..d09c61a26 100644 --- a/cpp_src/core/index/indextext/indextext.h +++ b/cpp_src/core/index/indextext/indextext.h @@ -21,9 +21,7 @@ class IndexText : public IndexUnordered { public: IndexText(const IndexText& other); IndexText(const IndexDef& idef, PayloadType payloadType, const FieldsSet& fields) - : IndexUnordered(idef, std::move(payloadType), fields), - cache_ft_(std::make_shared()), - preselected_cache_ft_(std::make_shared()) { + : IndexUnordered(idef, std::move(payloadType), fields), cache_ft_(std::make_shared()) { this->selectKeyType_ = KeyValueType::String{}; initSearchers(); } @@ -33,7 +31,7 @@ class IndexText : public IndexUnordered { SelectKeyResults SelectKey(const VariantArray& keys, CondType, Index::SelectOpts, const BaseFunctionCtx::Ptr&, FtPreselectT&&, const RdxContext&) override; void UpdateSortedIds(const UpdateSortedContext&) override {} - virtual IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, bool mergeStatusesEmpty, + virtual IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, FtUseExternStatuses, const RdxContext&) = 0; void SetOpts(const IndexOpts& opts) override; void Commit() override final { @@ -42,7 +40,6 @@ class IndexText : public IndexUnordered { } void CommitFulltext() override final { cache_ft_ = std::make_shared(); - preselected_cache_ft_ = std::make_shared(); commitFulltextImpl(); this->isBuilt_ = true; } @@ -51,12 +48,8 @@ class IndexText : public IndexUnordered { void ClearCache() override { Base::ClearCache(); cache_ft_.reset(); - preselected_cache_ft_.reset(); - } - void ClearCache(const std::bitset<64>& s) override { - Base::ClearCache(s); - if (preselected_cache_ft_) preselected_cache_ft_->Clear(); } + void ClearCache(const std::bitset& s) override { Base::ClearCache(s); } void MarkBuilt() noexcept override { assertrx(0); } bool IsFulltext() const noexcept override { return true; } @@ -65,18 +58,15 @@ class IndexText : public IndexUnordered { virtual void commitFulltextImpl() = 0; FtCtx::Ptr prepareFtCtx(const BaseFunctionCtx::Ptr&); - template - SelectKeyResults doSelectKey(const VariantArray& keys, Cache&, std::optional, FtMergeStatuses&&, - bool inTransaction, FtCtx::Ptr, const RdxContext&); - template - SelectKeyResults resultFromCache(const VariantArray& keys, const CacheIt&, const FtCtx::Ptr&); + SelectKeyResults doSelectKey(const VariantArray& keys, const std::optional&, FtMergeStatuses&&, + FtUseExternStatuses useExternSt, bool inTransaction, FtCtx::Ptr, const RdxContext&); + SelectKeyResults resultFromCache(const VariantArray& keys, FtIdSetCache::Iterator&&, FtCtx::Ptr&); void build(const RdxContext& rdxCtx); void initSearchers(); FieldsGetter Getter(); std::shared_ptr cache_ft_; - std::shared_ptr preselected_cache_ft_; RHashMap ftFields_; std::unique_ptr cfg_; diff --git a/cpp_src/core/index/indexunordered.cc b/cpp_src/core/index/indexunordered.cc index e2b0a4f6b..b311fe25f 100644 --- a/cpp_src/core/index/indexunordered.cc +++ b/cpp_src/core/index/indexunordered.cc @@ -251,10 +251,6 @@ SelectKeyResults IndexUnordered::SelectKey(const VariantArray &keys, CondType break; // Get set of keys or single key case CondEq: - if (keys.size() == 0) { - throw Error(errParams, "Condition EQ must have at least 1 argument, but provided 0"); - } - [[fallthrough]]; case CondSet: { struct { T *i_map; @@ -466,9 +462,10 @@ static std::unique_ptr IndexUnordered_New(const IndexDef &idef, PayloadTy case IndexTtl: case IndexRTree: case IndexUuidHash: - default: - abort(); + case IndexUuidStore: + break; } + std::abort(); } // NOLINTBEGIN(*cplusplus.NewDeleteLeaks) diff --git a/cpp_src/core/index/indexunordered.h b/cpp_src/core/index/indexunordered.h index 61cc94c54..05cc89d95 100644 --- a/cpp_src/core/index/indexunordered.h +++ b/cpp_src/core/index/indexunordered.h @@ -41,7 +41,7 @@ class IndexUnordered : public IndexStore> { void SetSortedIdxCount(int sortedIdxCount) override; bool HoldsStrings() const noexcept override; void ClearCache() override { cache_.reset(); } - void ClearCache(const std::bitset<64> &s) override { + void ClearCache(const std::bitset &s) override { if (cache_) cache_->ClearSorted(s); } void Dump(std::ostream &os, std::string_view step = " ", std::string_view offset = "") const override { dump(os, step, offset); } diff --git a/cpp_src/core/index/payload_map.h b/cpp_src/core/index/payload_map.h index 7f8c242b8..91132989f 100644 --- a/cpp_src/core/index/payload_map.h +++ b/cpp_src/core/index/payload_map.h @@ -173,13 +173,27 @@ class unordered_payload_map std::pair insert(const std::pair &v) { PayloadValueWithHash key(v.first, payloadType_, fields_); - auto res = base_hash_map::insert(std::make_pair(std::move(key), v.second)); + auto res = base_hash_map::emplate(std::move(key), v.second); if (res.second) add_ref(res.first->first); return res; } std::pair insert(std::pair &&v) { PayloadValueWithHash key(std::move(v.first), payloadType_, fields_); - auto res = base_hash_map::insert(std::make_pair(std::move(key), std::move(v.second))); + auto res = base_hash_map::emplace(std::move(key), std::move(v.second)); + if (res.second) this->add_ref(res.first->first); + return res; + } + template + std::pair emplace(const PayloadValue &pl, V &&v) { + PayloadValueWithHash key(pl, payloadType_, fields_); + auto res = base_hash_map::emplace(std::move(key), std::forward(v)); + if (res.second) this->add_ref(res.first->first); + return res; + } + template + std::pair emplace(PayloadValue &&pl, V &&v) { + PayloadValueWithHash key(std::move(pl), payloadType_, fields_); + auto res = base_hash_map::emplace(std::move(key), std::forward(v)); if (res.second) this->add_ref(res.first->first); return res; } diff --git a/cpp_src/core/index/uuid_index.h b/cpp_src/core/index/uuid_index.h index 0863eee22..1352c3f43 100644 --- a/cpp_src/core/index/uuid_index.h +++ b/cpp_src/core/index/uuid_index.h @@ -13,7 +13,6 @@ class UuidIndex : public IndexUnordered std::unique_ptr Clone() const override { return std::unique_ptr{new UuidIndex{*this}}; } using Base::Upsert; void Upsert(VariantArray& result, const VariantArray& keys, IdType id, bool& clearCache) override; // TODO delete this after #1353 - bool IsUuid() const noexcept override { return true; } }; std::unique_ptr IndexUuid_New(const IndexDef& idef, PayloadType payloadType, const FieldsSet& fields); diff --git a/cpp_src/core/indexdef.cc b/cpp_src/core/indexdef.cc index 064770787..afe75ba9b 100644 --- a/cpp_src/core/indexdef.cc +++ b/cpp_src/core/indexdef.cc @@ -68,6 +68,7 @@ static const std::unordered_map, std::equal {IndexFuzzyFT, {"string"s, "fuzzytext"s, condsText(), CapFullText}}, {IndexRTree, {"point"s, "rtree"s, condsGeom(), 0}}, {IndexUuidHash, {"uuid"s, "hash"s, condsUsual(), CapSortable}}, + {IndexUuidStore, {"uuid"s, "-"s, condsUsual(), CapSortable}}, }; // clang-format on return data; @@ -116,13 +117,14 @@ bool IndexDef::IsEqual(const IndexDef &other, bool skipConfig) const { } IndexType IndexDef::Type() const { - std::string iType = indexType_; + using namespace std::string_view_literals; + std::string_view iType = indexType_; if (iType == "") { - if (fieldType_ == "double") { + if (fieldType_ == "double"sv) { iType = "tree"; - } else if (fieldType_ == "bool") { + } else if (fieldType_ == "bool"sv) { iType = "-"; - } else if (fieldType_ == "point") { + } else if (fieldType_ == "point"sv) { iType = "rtree"; } else { iType = "hash"; @@ -150,7 +152,8 @@ const std::vector &IndexDef::Conditions() const { bool isSortable(IndexType type) { return availableIndexes().at(type).caps & CapSortable; } bool isStore(IndexType type) noexcept { - return type == IndexIntStore || type == IndexInt64Store || type == IndexStrStore || type == IndexDoubleStore || type == IndexBool; + return type == IndexIntStore || type == IndexInt64Store || type == IndexStrStore || type == IndexDoubleStore || type == IndexBool || + type == IndexUuidStore; } std::string IndexDef::getCollateMode() const { return availableCollates().at(opts_.GetCollateMode()); } @@ -179,13 +182,8 @@ void IndexDef::FromJSON(const gason::JsonNode &root) { opts_.Array(root["is_array"].As()); opts_.Dense(root["is_dense"].As()); opts_.Sparse(root["is_sparse"].As()); - if (fieldType_ == "uuid") { - if (indexType_ != "hash") { - throw Error(errParams, "Unsupported combination of field '%s' type 'uuid' and index type '%s'", name_, indexType_); - } - if (opts_.IsSparse()) { - throw Error(errParams, "UUID index cannot be sparse"); - } + if (fieldType_ == "uuid" && opts_.IsSparse()) { + throw Error(errParams, "UUID index cannot be sparse"); } opts_.SetConfig(stringifyJson(root["config"])); const std::string rtreeType = root["rtree_type"].As(); diff --git a/cpp_src/core/item.cc b/cpp_src/core/item.cc index 968ae2bb1..379f441ce 100644 --- a/cpp_src/core/item.cc +++ b/cpp_src/core/item.cc @@ -8,9 +8,6 @@ namespace reindexer { -Item::FieldRef::FieldRef(int field, ItemImpl *itemImpl) noexcept : itemImpl_(itemImpl), field_(field) {} -Item::FieldRef::FieldRef(std::string_view jsonPath, ItemImpl *itemImpl) noexcept : itemImpl_(itemImpl), jsonPath_(jsonPath), field_(-1) {} - Item &Item::operator=(Item &&other) noexcept { if (&other != this) { if (impl_) { @@ -29,6 +26,13 @@ Item &Item::operator=(Item &&other) noexcept { return *this; } +KeyValueType Item::GetIndexType(int field) const noexcept { + if (!impl_ || field < 0 || field >= impl_->Type().NumFields()) { + return KeyValueType::Undefined{}; + } + return impl_->Type().Field(field).Type(); +} + std::string_view Item::FieldRef::Name() const { return field_ >= 0 ? itemImpl_->Type().Field(field_).Name() : jsonPath_; } Item::FieldRef::operator Variant() const { @@ -45,12 +49,12 @@ Item::FieldRef::operator Variant() const { } Item::FieldRef::operator VariantArray() const { - VariantArray kr; - if (field_ >= 0) + if (field_ >= 0) { + VariantArray kr; itemImpl_->GetPayload().Get(field_, kr); - else - kr = itemImpl_->GetValueByJSONPath(jsonPath_); - return kr; + return kr; + } + return itemImpl_->GetValueByJSONPath(jsonPath_); } Item::FieldRef &Item::FieldRef::operator=(Variant kr) { @@ -115,10 +119,10 @@ Item::~Item() { auto ns = impl_->GetNamespace(); if (ns) { ns->ToPool(impl_); - impl_ = nullptr; + } else { + delete impl_; } } - delete impl_; } Error Item::FromJSON(std::string_view slice, char **endp, bool pkOnly) &noexcept { @@ -147,18 +151,17 @@ Error Item::GetMsgPack(WrSerializer &wrser) &noexcept { RETURN_RESULT_NOEXCEPT(i Error Item::GetProtobuf(WrSerializer &wrser) &noexcept { RETURN_RESULT_NOEXCEPT(impl_->GetProtobuf(wrser)); } int Item::NumFields() const { return impl_->Type().NumFields(); } -Item::FieldRef Item::operator[](int field) const noexcept { - assertrx(field >= 0 && field < impl_->Type().NumFields()); + +Item::FieldRef Item::operator[](int field) const { + if (rx_unlikely(field < 0 || field >= impl_->Type().NumFields())) { + throw Error(errLogic, "Item::operator[] requires indexed field. Values range: [0; %d]", impl_->Type().NumFields()); + } return FieldRef(field, impl_); } Item::FieldRef Item::operator[](std::string_view name) const noexcept { int field = 0; - if (impl_->Type().FieldByName(name, field)) { - return FieldRef(field, impl_); - } else { - return FieldRef(name, impl_); - } + return (impl_->Type().FieldByName(name, field)) ? FieldRef(field, impl_) : FieldRef(name, impl_); } int Item::GetFieldTag(std::string_view name) const { return impl_->NameTag(name); } diff --git a/cpp_src/core/item.h b/cpp_src/core/item.h index 68dc9dd54..a63f8c2cc 100644 --- a/cpp_src/core/item.h +++ b/cpp_src/core/item.h @@ -65,7 +65,7 @@ class Item { /// Set single point type value /// @param p - point value, which will be setted to field FieldRef &operator=(Point p) { - const double arr[]{p.x, p.y}; + const double arr[]{p.X(), p.Y()}; return operator=(span(arr, 2)); } @@ -110,8 +110,8 @@ class Item { FieldRef &operator=(const VariantArray &krs); private: - FieldRef(int field, ItemImpl *itemImpl) noexcept; - FieldRef(std::string_view jsonPath, ItemImpl *itemImpl) noexcept; + FieldRef(int field, ItemImpl *itemImpl) noexcept : itemImpl_(itemImpl), field_(field) {} + FieldRef(std::string_view jsonPath, ItemImpl *itemImpl) noexcept : itemImpl_(itemImpl), jsonPath_(jsonPath), field_(-1) {} ItemImpl *itemImpl_; std::string_view jsonPath_; int field_; @@ -179,7 +179,7 @@ class Item { /// Get field by number /// @param field - number of field. Must be >= 0 && < NumFields /// @return FieldRef which contains reference to indexed field - [[nodiscard]] FieldRef operator[](int field) const noexcept; + [[nodiscard]] FieldRef operator[](int field) const; /// Get field by name /// @param name - name of field /// @return FieldRef which contains reference to indexed field @@ -211,6 +211,9 @@ class Item { /// The disadvantage of unsafe mode is potentially danger code. Most of C++ stl containters in many cases invalidates references - /// and in unsafe mode caller is responsibe to guarantee, that all resources passed to Item will keep valid Item &Unsafe(bool enable = true) &noexcept; + /// Get index type by field id + /// @return either index type or Undefined (if index with this number does not exist or PayloadType is not available) + KeyValueType GetIndexType(int field) const noexcept; private: explicit Item(ItemImpl *impl) : impl_(impl) {} diff --git a/cpp_src/core/itemimpl.cc b/cpp_src/core/itemimpl.cc index 0ae11e57a..be2458028 100644 --- a/cpp_src/core/itemimpl.cc +++ b/cpp_src/core/itemimpl.cc @@ -53,10 +53,10 @@ void ItemImpl::ModifyField(const IndexedTagsPath &tagsPath, const VariantArray & try { switch (mode) { case FieldModeSet: - cjsonModifier.SetFieldValue(cjson, tagsPath, keys, ser_); + cjsonModifier.SetFieldValue(cjson, tagsPath, keys, ser_, pl); break; case FieldModeSetJson: - cjsonModifier.SetObject(cjson, tagsPath, keys, ser_, &pl); + cjsonModifier.SetObject(cjson, tagsPath, keys, ser_, pl); break; case FieldModeDrop: cjsonModifier.RemoveField(cjson, tagsPath, ser_); @@ -72,7 +72,7 @@ void ItemImpl::ModifyField(const IndexedTagsPath &tagsPath, const VariantArray & } tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); } void ItemImpl::SetField(std::string_view jsonPath, const VariantArray &keys, const IndexExpressionEvaluator &ev) { @@ -85,7 +85,7 @@ void ItemImpl::GetField(int field, VariantArray &values) { GetPayload().Get(fiel Error ItemImpl::FromMsgPack(std::string_view buf, size_t &offset) { Payload pl = GetPayload(); if (!msgPackDecoder_) { - msgPackDecoder_.reset(new MsgPackDecoder(&tagsMatcher_)); + msgPackDecoder_.reset(new MsgPackDecoder(tagsMatcher_)); } ser_.Reset(); @@ -93,7 +93,7 @@ Error ItemImpl::FromMsgPack(std::string_view buf, size_t &offset) { Error err = msgPackDecoder_->Decode(buf, pl, ser_, offset); if (err.ok()) { tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); } return err; } @@ -108,7 +108,7 @@ Error ItemImpl::FromProtobuf(std::string_view buf) { Error err = decoder.Decode(buf, pl, ser_); if (err.ok()) { tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); } return err; } @@ -122,7 +122,7 @@ Error ItemImpl::GetMsgPack(WrSerializer &wrser) { MsgPackBuilder msgpackBuilder(wrser, &tagsLengths, &startTag, ObjType::TypePlain, &tagsMatcher_); msgpackEncoder.Encode(pl, msgpackBuilder); - return errOK; + return Error(); } Error ItemImpl::GetProtobuf(WrSerializer &wrser) { @@ -131,7 +131,7 @@ Error ItemImpl::GetProtobuf(WrSerializer &wrser) { ProtobufBuilder protobufBuilder(&wrser, ObjType::TypePlain, schema_.get(), &tagsMatcher_); ProtobufEncoder protobufEncoder(&tagsMatcher_); protobufEncoder.Encode(pl, protobufBuilder); - return errOK; + return Error(); } // Construct item from compressed json @@ -165,7 +165,7 @@ void ItemImpl::FromCJSON(std::string_view slice, bool pkOnly, Recoder *recoder) if (!rdser.Eof()) throw Error(errParseJson, "Internal error - left unparsed data %d", rdser.Pos()); tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); } Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { @@ -216,7 +216,7 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { // Put tuple to field[0] tupleData_ = ser_.DetachLStr(); - pl.Set(0, {Variant(p_string(reinterpret_cast(tupleData_.get())))}); + pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())))); return err; } diff --git a/cpp_src/core/itemmodifier.cc b/cpp_src/core/itemmodifier.cc index adcdcd19e..fccc8950d 100644 --- a/cpp_src/core/itemmodifier.cc +++ b/cpp_src/core/itemmodifier.cc @@ -8,7 +8,7 @@ namespace reindexer { ItemModifier::FieldData::FieldData(const UpdateEntry &entry, NamespaceImpl &ns) - : entry_(entry), tagsPath_(), arrayIndex_(IndexValueType::NotSet), isIndex_(false) { + : entry_(entry), tagsPathWithLastIndex_{std::nullopt}, arrayIndex_(IndexValueType::NotSet), isIndex_(false) { if (ns.getIndexByName(entry_.Column(), fieldIndex_)) { isIndex_ = true; if (!entry.IsExpression()) { @@ -44,10 +44,6 @@ ItemModifier::FieldData::FieldData(const UpdateEntry &entry, NamespaceImpl &ns) if (tagsPath_.empty()) { throw Error(errParams, "Cannot find field by json: '%s'", entry_.Column()); } - if (tagsPath_.back().IsWithIndex()) { - arrayIndex_ = tagsPath_.back().Index(); - tagsPath_.back().SetIndex(IndexValueType::NotSet); - } } } else { TagsPath tp; @@ -64,7 +60,7 @@ ItemModifier::FieldData::FieldData(const UpdateEntry &entry, NamespaceImpl &ns) isIndex_ = true; } else { fieldIndex_ = 0; - isIndex_ = ns.getIndexByName(jsonPath, fieldIndex_); + isIndex_ = ns.getIndexByNameOrJsonPath(jsonPath, fieldIndex_) || ns.getSparseIndexByJsonPath(jsonPath, fieldIndex_); } if (!entry.IsExpression()) { tagsPath_ = std::move(tagsPath); @@ -72,19 +68,11 @@ ItemModifier::FieldData::FieldData(const UpdateEntry &entry, NamespaceImpl &ns) throw Error(errParams, "Cannot find field by json: '%s'", entry_.Column()); } if (isIndex_) { - bool hasObjectArrayNodes = false; - for (unsigned i = 0, size = tagsPath_.size(); i < size; ++i) { - if (tagsPath_[i].IsWithIndex() && i + 1 < size) { - hasObjectArrayNodes = true; - break; - } - } - if (hasObjectArrayNodes && !tagsPath_.back().IsArrayNode()) { - throw Error(errParams, "Update of the index non-array field, nested in the objects array is not supported yet"); - } - if (tagsPath_.back().IsWithIndex()) { - arrayIndex_ = tagsPath_.back().Index(); - tagsPath_.back().SetIndex(IndexValueType::NotSet); + auto &lastTag = tagsPath_.back(); + if (lastTag.IsWithIndex()) { + tagsPathWithLastIndex_ = tagsPath_; + arrayIndex_ = lastTag.Index(); + lastTag.SetIndex(IndexValueType::NotSet); } } } @@ -96,7 +84,6 @@ void ItemModifier::FieldData::updateTagsPath(TagsMatcher &tm, const IndexExpress tagsPath_ = tm.path2indexedtag(entry_.Column(), ev, true); } for (size_t i = 0; i < tagsPath_.size(); ++i) { - bool isLast = (i == tagsPath_.size() - 1); if (tagsPath_[i].IsWithExpression()) { IndexedPathNode &node = tagsPath_[i]; VariantArray vals = ev(node.Expression()); @@ -110,9 +97,13 @@ void ItemModifier::FieldData::updateTagsPath(TagsMatcher &tm, const IndexExpress }); node.SetIndex(vals.front().As()); } - if (isLast && isIndex_ && tagsPath_[i].IsWithIndex()) { - arrayIndex_ = tagsPath_[i].Index(); - tagsPath_[i].SetIndex(IndexValueType::NotSet); + } + if (tagsPath_.size()) { + auto &lastTag = tagsPath_.back(); + if (lastTag.IsWithIndex()) { + arrayIndex_ = lastTag.Index(); + tagsPathWithLastIndex_ = tagsPath_; + lastTag.SetIndex(IndexValueType::NotSet); } } } @@ -135,8 +126,9 @@ void ItemModifier::Modify(IdType itemId, const RdxContext &ctx, h_vector(); if (field.details().IsExpression()) { assertrx(field.details().Values().size() > 0); values = ev.Evaluate(static_cast(field.details().Values().front()), pv, field.name(), ctx); @@ -147,10 +139,14 @@ void ItemModifier::Modify(IdType itemId, const RdxContext &ctx, h_vector &replUpdates, const RdxContext &ctx) { PayloadValue &plData = ns_.items_[id]; - Payload pl(ns_.payloadType_, plData); + Payload pl(*ns_.payloadType_.get(), plData); VariantArray cjsonKref; pl.Get(0, cjsonKref); cjsonCache_.Reset(); - if (cjsonKref.size() > 0) { - Variant v = cjsonKref.front(); - if (v.Type().Is()) { - cjsonCache_.Assign(std::string_view(p_string(v))); - } + + const Variant &v = cjsonKref.front(); + if (v.Type().Is()) { + cjsonCache_.Assign(std::string_view(p_string(v))); } ItemImpl itemimpl(ns_.payloadType_, pv, ns_.tagsMatcher_); @@ -176,7 +171,10 @@ void ItemModifier::modifyCJSON(PayloadValue &pv, IdType id, FieldData &field, Va Item item = ns_.newItem(); Error err = item.FromCJSON(itemimpl.GetCJSON(true)); - if (!err.ok()) throw err; + if (!err.ok()) { + pl.Set(0, cjsonKref); + throw err; + } item.setID(id); ItemImpl *impl = item.impl_; ns_.setFieldsBasedOnPrecepts(impl, replUpdates, ctx); @@ -185,7 +183,7 @@ void ItemModifier::modifyCJSON(PayloadValue &pv, IdType id, FieldData &field, Va Payload plNew = impl->GetPayload(); plData.Clone(pl.RealSize()); - auto strHolder = ns_.StrHolder(true, ctx); + auto strHolder = ns_.strHolder(); auto indexesCacheCleaner{ns_.GetIndexesCacheCleaner()}; h_vector needUpdateCompIndexes(ns_.indexes_.compositeIndexesSize(), false); for (int i = ns_.indexes_.firstCompositePos(); i < ns_.indexes_.totalSize(); ++i) { @@ -277,7 +275,7 @@ void ItemModifier::modifyCJSON(PayloadValue &pv, IdType id, FieldData &field, Va impl->RealValue() = pv; } -void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, VariantArray &values, const RdxContext &ctx) { +void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, VariantArray &values) { Index &index = *(ns_.indexes_[field.index()]); if (field.isIndex() && !index.Opts().IsSparse() && field.details().Mode() == FieldModeDrop /*&& !(field.arrayIndex() != IndexValueType::NotSet || field.tagspath().back().IsArrayNode())*/) { // TODO #1218 allow to drop array fields @@ -289,22 +287,19 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var throw Error(errParams, "It's not possible to Update single index fields with arrays!"); } - if (index.Opts().IsSparse()) { - pl.GetByJsonPath(index.Fields().getTagsPath(0), ns_.skrefs, index.KeyType()); - } else if (!index.Opts().IsArray()) { - pl.Get(field.index(), ns_.skrefs); - } - if (index.Opts().GetCollateMode() == CollateUTF8) { for (const Variant &key : values) key.EnsureUTF8(); } - auto strHolder = ns_.StrHolder(true, ctx); + auto strHolder = ns_.strHolder(); auto indexesCacheCleaner{ns_.GetIndexesCacheCleaner()}; h_vector needUpdateCompIndexes(ns_.indexes_.compositeIndexesSize(), false); - for (int i = ns_.indexes_.firstCompositePos(); i < ns_.indexes_.totalSize(); ++i) { - const auto &fields = ns_.indexes_[i]->Fields(); - const auto idxId = i - ns_.indexes_.firstCompositePos(); + const auto firstCompositePos = ns_.indexes_.firstCompositePos(); + const auto totalIndexes = ns_.indexes_.totalSize(); + for (int i = firstCompositePos; i < totalIndexes; ++i) { + auto &compositeIdx = ns_.indexes_[i]; + const auto &fields = compositeIdx->Fields(); + const auto idxId = i - firstCompositePos; for (const auto f : fields) { if (f == IndexValueType::SetByJsonPath) continue; if (f == field.index()) { @@ -322,22 +317,23 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var if (!needUpdateCompIndexes[idxId]) continue; } bool needClearCache{false}; - ns_.indexes_[i]->Delete(Variant(ns_.items_[itemId]), itemId, *strHolder, needClearCache); - if (needClearCache && ns_.indexes_[i]->IsOrdered()) indexesCacheCleaner.Add(ns_.indexes_[i]->SortId()); + compositeIdx->Delete(Variant(ns_.items_[itemId]), itemId, *strHolder, needClearCache); + if (needClearCache && compositeIdx->IsOrdered()) indexesCacheCleaner.Add(compositeIdx->SortId()); } const auto insertItemIntoCompositeIndexes = [&] { - for (int i = ns_.indexes_.firstCompositePos(); i < ns_.indexes_.totalSize(); ++i) { - if (!needUpdateCompIndexes[i - ns_.indexes_.firstCompositePos()]) continue; + for (int i = firstCompositePos; i < totalIndexes; ++i) { + if (!needUpdateCompIndexes[i - firstCompositePos]) continue; bool needClearCache{false}; - ns_.indexes_[i]->Upsert(Variant(ns_.items_[itemId]), itemId, needClearCache); - if (needClearCache && ns_.indexes_[i]->IsOrdered()) indexesCacheCleaner.Add(ns_.indexes_[i]->SortId()); + auto &compositeIdx = ns_.indexes_[i]; + compositeIdx->Upsert(Variant(ns_.items_[itemId]), itemId, needClearCache); + if (needClearCache && compositeIdx->IsOrdered()) indexesCacheCleaner.Add(compositeIdx->SortId()); } }; try { if (field.isIndex()) { - modifyIndexValues(itemId, field, values, pl, ctx); + modifyIndexValues(itemId, field, values, pl); } if (index.Opts().IsSparse() || index.Opts().IsArray() || index.KeyType().Is() || !field.isIndex()) { @@ -345,17 +341,18 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var Variant oldTupleValue = item.GetField(0); oldTupleValue.EnsureHold(); bool needClearCache{false}; - ns_.indexes_[0]->Delete(oldTupleValue, itemId, *strHolder, needClearCache); + auto &tupleIdx = ns_.indexes_[0]; + tupleIdx->Delete(oldTupleValue, itemId, *strHolder, needClearCache); Variant tupleValue; std::exception_ptr exception; try { - item.ModifyField(field.tagspath(), values, field.details().Mode()); + item.ModifyField(field.tagspathWithLastIndex(), values, field.details().Mode()); } catch (...) { exception = std::current_exception(); } - tupleValue = ns_.indexes_[0]->Upsert(item.GetField(0), itemId, needClearCache); - if (needClearCache && ns_.indexes_[0]->IsOrdered()) indexesCacheCleaner.Add(ns_.indexes_[0]->SortId()); - pl.Set(0, {std::move(tupleValue)}); + tupleValue = tupleIdx->Upsert(item.GetField(0), itemId, needClearCache); + if (needClearCache && tupleIdx->IsOrdered()) indexesCacheCleaner.Add(tupleIdx->SortId()); + pl.Set(0, std::move(tupleValue)); ns_.tagsMatcher_.try_merge(item.tagsMatcher()); if (exception) { std::rethrow_exception(exception); @@ -370,66 +367,96 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var insertItemIntoCompositeIndexes(); } -void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, VariantArray &values, Payload &pl, const RdxContext &ctx) { +void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, VariantArray &values, Payload &pl) { Index &index = *(ns_.indexes_[field.index()]); if (values.IsNullValue() && !index.Opts().IsArray()) { throw Error(errParams, "Non-array index fields cannot be set to null!"); } - bool isArrayItem = (field.tagspath().back().IsForAllItems() || field.arrayIndex() != IndexValueType::NotSet); - if (index.Opts().IsArray() && values.IsArrayValue() && isArrayItem) { - throw Error(errParams, "Array items are supposed to be updated with a single value, not an array"); - } - auto strHolder = ns_.StrHolder(true, ctx); + auto strHolder = ns_.strHolder(); auto indexesCacheCleaner{ns_.GetIndexesCacheCleaner()}; - if (index.Opts().IsArray() && !values.IsArrayValue() && !values.IsNullValue()) { - if (values.empty()) { + bool updateArrayPart = field.arrayIndex() >= 0; + bool isForAllItems = false; + for (const auto &tag : field.tagspath()) { + if (tag.IsArrayNode()) { + updateArrayPart = true; + } + if (tag.IsForAllItems()) { + isForAllItems = true; + continue; + } + if (isForAllItems && tag.IsWithIndex()) { + throw Error(errParams, "Expressions like 'field[*].field[1]=10' are supported for sparse indexes/non-index fields only"); + } + } + + ns_.krefs.resize(0); + for (Variant &key : values) { + key.convert(index.KeyType()); + } + + if (index.Opts().IsArray() && updateArrayPart && !index.Opts().IsSparse()) { + if (!values.IsArrayValue() && values.empty()) { throw Error(errParams, "Cannot update array item with an empty value"); // TODO #1218 maybe delete this } - int offset = 0, length = 0; - values.front().convert(index.KeyType()); - bool needClearCache{false}; - ns_.krefs.resize(1); - if (field.tagspath().back().IsForAllItems()) { - ns_.skrefs = pl.GetIndexedArrayData(field.tagspath(), offset, length); - if (!ns_.skrefs.empty()) { - bool needClearCache{false}; - index.Delete(ns_.skrefs, itemId, *strHolder, needClearCache); - if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); - } - ns_.krefs.front() = index.Upsert(values.front(), itemId, needClearCache); - if (!index.Opts().IsSparse()) { - for (int i = offset; i < offset + length; ++i) { - pl.Set(field.index(), i, ns_.krefs.front()); - } + int offset = -1, length = -1; + bool isForAllItems = false; + for (const auto &tag : field.tagspath()) { // TODO: Move to FieldEntry? + if (tag.IsForAllItems()) { + isForAllItems = true; + continue; } - } else { - if (field.arrayIndex() == IndexValueType::NotSet) { - throw Error(errParams, "Array index is not set"); + if (isForAllItems && tag.IsWithIndex()) { + throw Error(errParams, "Expressions like 'field[*].field[1]=10' are supported for sparse indexes/non-index fields only"); } - IndexedTagsPath arrayPath = field.tagspath(); - arrayPath.back().SetIndex(field.arrayIndex()); - ns_.skrefs = pl.GetIndexedArrayData(arrayPath, offset, length); - if (field.arrayIndex() < length) { - if (!ns_.skrefs.empty()) { - bool needClearCache{false}; - index.Delete(ns_.skrefs.front(), itemId, *strHolder, needClearCache); - if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); - } - ns_.krefs.front() = index.Upsert(values.front(), itemId, needClearCache); - if (!index.Opts().IsSparse()) { - pl.Set(field.index(), offset, ns_.krefs.front()); + } + + ns_.skrefs = pl.GetIndexedArrayData(field.tagspathWithLastIndex(), field.index(), offset, length); + if (offset < 0 || length < 0) { + const auto &path = field.tagspathWithLastIndex(); + std::string indexesStr; + for (auto &p : path) { + if (p.Index() >= 0) { + if (indexesStr.size()) { + indexesStr.append(","); + } + indexesStr.append(std::to_string(p.Index())); } - } else { - throw Error(errLogic, "Array index is out of range: [%d/%d]", field.arrayIndex(), length); } + throw Error(errParams, "Requested array's index was not found: [%s]", indexesStr); } + if (field.arrayIndex() != IndexValueType::NotSet && field.arrayIndex() >= length) { + throw Error(errLogic, "Array index is out of range: [%d/%d]", field.arrayIndex(), length); + } + + if (!ns_.skrefs.empty()) { + bool needClearCache{false}; + index.Delete(ns_.skrefs.front(), itemId, *strHolder, needClearCache); + if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); + } + + bool needClearCache{false}; + index.Upsert(ns_.krefs, values, itemId, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); - if (!index.Opts().IsSparse()) { - values.clear(); - values.resize(length, ns_.krefs.front()); + + if (isForAllItems) { + for (int i = offset, end = offset + length; i < end; ++i) { + pl.Set(field.index(), i, ns_.krefs.front()); + } + } else if (field.arrayIndex() == IndexValueType::NotSet) { + // Array may be resized + VariantArray v; + pl.Get(field.index(), v); + v.erase(v.begin() + offset, v.begin() + offset + length); + v.insert(v.begin() + offset, ns_.krefs.begin(), ns_.krefs.end()); + pl.Set(field.index(), v); + } else { + // Exactly one value was changed + pl.Set(field.index(), offset, ns_.krefs.front()); } } else { - if (index.Opts().IsArray() && !index.Opts().IsSparse()) { + if (index.Opts().IsSparse()) { + pl.GetByJsonPath(field.tagspathWithLastIndex(), ns_.skrefs, index.KeyType()); + } else { pl.Get(field.index(), ns_.skrefs, true); } if (!ns_.skrefs.empty()) { @@ -437,21 +464,13 @@ void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, Vari index.Delete(ns_.skrefs, itemId, *strHolder, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); } - ns_.krefs.resize(0); - ns_.krefs.reserve(values.size()); - for (Variant &key : values) { - key.convert(index.KeyType()); - } + bool needClearCache{false}; index.Upsert(ns_.krefs, values, itemId, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); if (!index.Opts().IsSparse()) { pl.Set(field.index(), ns_.krefs); } - // Do not using std::swap, because it may clear some of the internal properties of the VariantsArray - values.clear(); - values.insert(values.begin(), std::make_move_iterator(ns_.krefs.begin()), std::make_move_iterator(ns_.krefs.end())); - ns_.krefs.clear(); } } diff --git a/cpp_src/core/itemmodifier.h b/cpp_src/core/itemmodifier.h index f883318c4..ed0dc8a42 100644 --- a/cpp_src/core/itemmodifier.h +++ b/cpp_src/core/itemmodifier.h @@ -1,5 +1,6 @@ #pragma once +#include #include "cluster/updaterecord.h" #include "core/keyvalue/p_string.h" #include "core/payload/payloadiface.h" @@ -27,6 +28,9 @@ class ItemModifier { void updateTagsPath(TagsMatcher &tm, const IndexExpressionEvaluator &ev); const UpdateEntry &details() const noexcept { return entry_; } const IndexedTagsPath &tagspath() const noexcept { return tagsPath_; } + const IndexedTagsPath &tagspathWithLastIndex() const noexcept { + return tagsPathWithLastIndex_ ? *tagsPathWithLastIndex_ : tagsPath_; + } int arrayIndex() const noexcept { return arrayIndex_; } int index() const noexcept { return fieldIndex_; } bool isIndex() const noexcept { return isIndex_; } @@ -35,6 +39,7 @@ class ItemModifier { private: const UpdateEntry &entry_; IndexedTagsPath tagsPath_; + std::optional tagsPathWithLastIndex_; int fieldIndex_{IndexValueType::SetByJsonPath}; int arrayIndex_; bool isIndex_; @@ -58,10 +63,10 @@ class ItemModifier { std::string_view cjson_; }; - void modifyField(IdType itemId, FieldData &field, Payload &pl, VariantArray &values, const RdxContext &); + void modifyField(IdType itemId, FieldData &field, Payload &pl, VariantArray &values); void modifyCJSON(PayloadValue &pv, IdType itemId, FieldData &field, VariantArray &values, h_vector &pendedRepl, const RdxContext &); - void modifyIndexValues(IdType itemId, const FieldData &field, VariantArray &values, Payload &pl, const RdxContext &); + void modifyIndexValues(IdType itemId, const FieldData &field, VariantArray &values, Payload &pl); NamespaceImpl &ns_; const std::vector &updateEntries_; diff --git a/cpp_src/core/joincache.h b/cpp_src/core/joincache.h index def18e5fc..8ec4030f2 100644 --- a/cpp_src/core/joincache.h +++ b/cpp_src/core/joincache.h @@ -9,12 +9,11 @@ namespace reindexer { struct JoinCacheKey { - JoinCacheKey() {} - JoinCacheKey(const JoinCacheKey &other) { - if (this != &other) { - buf_ = other.buf_; - } - } + JoinCacheKey() = default; + JoinCacheKey(JoinCacheKey &&other) = default; + JoinCacheKey(const JoinCacheKey &other) = default; + JoinCacheKey &operator=(JoinCacheKey &&other) = default; + JoinCacheKey &operator=(const JoinCacheKey &other) = delete; void SetData(const Query &q) { WrSerializer ser; q.Serialize(ser, (SkipJoinQueries | SkipMergeQueries)); @@ -28,7 +27,7 @@ struct JoinCacheKey { buf_.reserve(buf_.size() + ser.Len()); buf_.insert(buf_.end(), ser.Buf(), ser.Buf() + ser.Len()); } - size_t Size() const { return sizeof(JoinCacheKey) + (buf_.is_hdata() ? 0 : buf_.size()); } + size_t Size() const noexcept { return sizeof(JoinCacheKey) + (buf_.is_hdata() ? 0 : buf_.size()); } h_vector buf_; }; @@ -48,8 +47,8 @@ struct hash_join_cache_key { struct JoinPreResult; struct JoinCacheVal { - JoinCacheVal() {} - size_t Size() const { return ids_ ? sizeof(*ids_.get()) + ids_->heap_size() : 0; } + JoinCacheVal() = default; + size_t Size() const noexcept { return ids_ ? (sizeof(*ids_.get()) + ids_->heap_size()) : 0; } IdSet::Ptr ids_; bool matchedAtLeastOnce = false; bool inited = false; diff --git a/cpp_src/core/keyvalue/geometry.cc b/cpp_src/core/keyvalue/geometry.cc new file mode 100644 index 000000000..a130fdb60 --- /dev/null +++ b/cpp_src/core/keyvalue/geometry.cc @@ -0,0 +1,9 @@ +#include "geometry.h" + +namespace reindexer { + +[[noreturn]] void Point::throwInfError(std::string_view name) { throw Error(errParams, "Point coordinate '%s' can not be inf", name); } + +[[noreturn]] void Point::throwNanError(std::string_view name) { throw Error(errParams, "Point coordinate '%s' can not be nan", name); } + +} // namespace reindexer diff --git a/cpp_src/core/keyvalue/geometry.h b/cpp_src/core/keyvalue/geometry.h index e74a64b9a..af779eba8 100644 --- a/cpp_src/core/keyvalue/geometry.h +++ b/cpp_src/core/keyvalue/geometry.h @@ -5,16 +5,37 @@ #include #include #include "tools/assertrx.h" +#include "tools/errors.h" namespace reindexer { -struct Point { - double x, y; +class Point { +public: + explicit Point(double x = 0.0, double y = 0.0) : x_(x), y_(y) { + validate(x, "x"); + validate(y, "y"); + } + double X() const noexcept { return x_; } + double Y() const noexcept { return y_; } + +private: + void validate(double v, std::string_view name) { + if rx_unlikely (std::isinf(v)) { + throwInfError(name); + } + if rx_unlikely (std::isnan(v)) { + throwNanError(name); + } + } + [[noreturn]] void throwInfError(std::string_view name); + [[noreturn]] void throwNanError(std::string_view name); + + double x_, y_; }; template T& operator<<(T& os, Point p) { - return os << '{' << p.x << ", " << p.y << '}'; + return os << '{' << p.X() << ", " << p.Y() << '}'; } inline bool approxEqual(double lhs, double rhs) noexcept { @@ -22,27 +43,27 @@ inline bool approxEqual(double lhs, double rhs) noexcept { ((std::abs(lhs) < std::abs(rhs) ? std::abs(rhs) : std::abs(lhs)) * std::numeric_limits::epsilon()); } -inline bool operator==(Point lhs, Point rhs) noexcept { return approxEqual(lhs.x, rhs.x) && approxEqual(lhs.y, rhs.y); } +inline bool operator==(Point lhs, Point rhs) noexcept { return approxEqual(lhs.X(), rhs.X()) && approxEqual(lhs.Y(), rhs.Y()); } inline bool operator!=(Point lhs, Point rhs) noexcept { return !(lhs == rhs); } struct point_strict_equal { bool operator()(const Point& lhs, const Point& rhs) const noexcept { - return std::equal_to()(lhs.x, rhs.x) && std::equal_to()(lhs.y, rhs.y); + return std::equal_to()(lhs.X(), rhs.X()) && std::equal_to()(lhs.Y(), rhs.Y()); } }; struct point_strict_less { - bool operator()(const Point& lhs, const Point& rhs) const noexcept { return lhs.x < rhs.x || lhs.y < rhs.y; } + bool operator()(const Point& lhs, const Point& rhs) const noexcept { return lhs.X() < rhs.X() || lhs.Y() < rhs.Y(); } }; inline bool DWithin(Point lhs, Point rhs, double distance) noexcept { - return (lhs.x - rhs.x) * (lhs.x - rhs.x) + (lhs.y - rhs.y) * (lhs.y - rhs.y) <= distance * distance; + return (lhs.X() - rhs.X()) * (lhs.X() - rhs.X()) + (lhs.Y() - rhs.Y()) * (lhs.Y() - rhs.Y()) <= distance * distance; } class Rectangle { public: Rectangle() noexcept : left_{}, right_{}, bottom_{}, top_{} {} Rectangle(Point a, Point b) noexcept - : left_{std::min(a.x, b.x)}, right_{std::max(a.x, b.x)}, bottom_{std::min(a.y, b.y)}, top_{std::max(a.y, b.y)} {} + : left_{std::min(a.X(), b.X())}, right_{std::max(a.X(), b.X())}, bottom_{std::min(a.Y(), b.Y())}, top_{std::max(a.Y(), b.Y())} {} Rectangle(double l, double r, double b, double t) noexcept : left_{std::min(l, r)}, right_{std::max(l, r)}, bottom_{std::min(b, t)}, top_{std::max(b, t)} {} double Left() const noexcept { return left_; } @@ -50,7 +71,7 @@ class Rectangle { double Bottom() const noexcept { return bottom_; } double Top() const noexcept { return top_; } double Area() const noexcept { return (right_ - left_) * (top_ - bottom_); } - bool Contain(Point p) const noexcept { return left_ <= p.x && p.x <= right_ && bottom_ <= p.y && p.y <= top_; } + bool Contain(Point p) const noexcept { return left_ <= p.X() && p.X() <= right_ && bottom_ <= p.Y() && p.Y() <= top_; } bool Contain(const Rectangle& r) const noexcept { return left_ <= r.left_ && r.right_ <= right_ && bottom_ <= r.bottom_ && r.top_ <= top_; } @@ -70,7 +91,7 @@ inline bool DWithin(const Rectangle& r, Point p, double distance) noexcept { DWithin(Point{r.Right(), r.Bottom()}, p, distance) && DWithin(Point{r.Right(), r.Top()}, p, distance); } -inline Rectangle boundRect(Point p) noexcept { return {p.x, p.x, p.y, p.y}; } +inline Rectangle boundRect(Point p) noexcept { return {p.X(), p.X(), p.Y(), p.Y()}; } inline Rectangle boundRect(const Rectangle& r1, const Rectangle& r2) noexcept { return {std::min(r1.Left(), r2.Left()), std::max(r1.Right(), r2.Right()), std::min(r1.Bottom(), r2.Bottom()), @@ -78,7 +99,7 @@ inline Rectangle boundRect(const Rectangle& r1, const Rectangle& r2) noexcept { } inline Rectangle boundRect(const Rectangle& r, Point p) noexcept { - return {std::min(r.Left(), p.x), std::max(r.Right(), p.x), std::min(r.Bottom(), p.y), std::max(r.Top(), p.y)}; + return {std::min(r.Left(), p.X()), std::max(r.Right(), p.X()), std::min(r.Bottom(), p.Y()), std::max(r.Top(), p.Y())}; } class Circle { @@ -94,30 +115,30 @@ class Circle { }; inline bool intersect(const Rectangle& r, const Circle& c) noexcept { - if (c.Center().x < r.Left()) { - const auto diffX = r.Left() - c.Center().x; - if (c.Center().y < r.Bottom()) { - const auto diffY = r.Bottom() - c.Center().y; + if (c.Center().X() < r.Left()) { + const auto diffX = r.Left() - c.Center().X(); + if (c.Center().Y() < r.Bottom()) { + const auto diffY = r.Bottom() - c.Center().Y(); return diffX * diffX + diffY * diffY <= c.Radius() * c.Radius(); - } else if (c.Center().y > r.Top()) { - const auto diffY = c.Center().y - r.Top(); + } else if (c.Center().Y() > r.Top()) { + const auto diffY = c.Center().Y() - r.Top(); return diffX * diffX + diffY * diffY <= c.Radius() * c.Radius(); } else { return diffX <= c.Radius(); } - } else if (c.Center().x > r.Right()) { - const auto diffX = c.Center().x - r.Right(); - if (c.Center().y < r.Bottom()) { - const auto diffY = r.Bottom() - c.Center().y; + } else if (c.Center().X() > r.Right()) { + const auto diffX = c.Center().X() - r.Right(); + if (c.Center().Y() < r.Bottom()) { + const auto diffY = r.Bottom() - c.Center().Y(); return diffX * diffX + diffY * diffY <= c.Radius() * c.Radius(); - } else if (c.Center().y > r.Top()) { - const auto diffY = c.Center().y - r.Top(); + } else if (c.Center().Y() > r.Top()) { + const auto diffY = c.Center().Y() - r.Top(); return diffX * diffX + diffY * diffY <= c.Radius() * c.Radius(); } else { return diffX <= c.Radius(); } } else { - return c.Center().y + c.Radius() >= r.Bottom() && c.Center().y - c.Radius() <= r.Top(); + return c.Center().Y() + c.Radius() >= r.Bottom() && c.Center().Y() - c.Radius() <= r.Top(); } } @@ -127,7 +148,7 @@ namespace std { template <> struct hash { - size_t operator()(reindexer::Point p) const noexcept { return (hash()(p.x) << 1) ^ hash()(p.y); } + size_t operator()(reindexer::Point p) const noexcept { return (hash()(p.X()) << 1) ^ hash()(p.Y()); } }; } // namespace std diff --git a/cpp_src/core/keyvalue/uuid.cc b/cpp_src/core/keyvalue/uuid.cc index 6103bc26d..5a549252b 100644 --- a/cpp_src/core/keyvalue/uuid.cc +++ b/cpp_src/core/keyvalue/uuid.cc @@ -6,76 +6,190 @@ namespace reindexer { Uuid::Uuid(std::string_view str) : data_{0, 0} { const auto err = tryParse(str, data_); - if (!err.ok()) { + if (rx_unlikely(!err.ok())) { throw err; } } +#ifdef GET_NUM +static_assert(false, "GET_NUM is already defined"); +#endif + +#define GET_NUM(i) \ + num = hexCharToNum[static_cast(str[i])]; \ + if (rx_unlikely(num > 15)) { \ + if (str[i] == '-') { \ + return Error(errNotValid, "Invalid UUID format: '%s'", str); \ + } else { \ + return Error(errNotValid, "UUID cannot contain char '%c': '%s'", str[i], str); \ + } \ + } + Error Uuid::tryParse(std::string_view str, uint64_t (&data)[2]) noexcept { - unsigned i = 0; - for (char ch : str) { - if (i >= 32 && ch != '-') { + static constexpr uint64_t hexCharToNum[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 1, 2, 3, + 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; + static constexpr size_t kHexDigitsCount = 32; + uint64_t num; + switch (str.size()) { + case kHexDigitsCount: + GET_NUM(0) + data[0] = num << 60; + GET_NUM(1) + data[0] |= num << 56; + GET_NUM(2) + data[0] |= num << 52; + GET_NUM(3) + data[0] |= num << 48; + GET_NUM(4) + data[0] |= num << 44; + GET_NUM(5) + data[0] |= num << 40; + GET_NUM(6) + data[0] |= num << 36; + GET_NUM(7) + data[0] |= num << 32; + GET_NUM(8) + data[0] |= num << 28; + GET_NUM(9) + data[0] |= num << 24; + GET_NUM(10) + data[0] |= num << 20; + GET_NUM(11) + data[0] |= num << 16; + GET_NUM(12) + data[0] |= num << 12; + GET_NUM(13) + data[0] |= num << 8; + GET_NUM(14) + data[0] |= num << 4; + GET_NUM(15) + data[0] |= num; + GET_NUM(16) + data[1] = num << 60; + GET_NUM(17) + data[1] |= num << 56; + GET_NUM(18) + data[1] |= num << 52; + GET_NUM(19) + data[1] |= num << 48; + GET_NUM(20) + data[1] |= num << 44; + GET_NUM(21) + data[1] |= num << 40; + GET_NUM(22) + data[1] |= num << 36; + GET_NUM(23) + data[1] |= num << 32; + GET_NUM(24) + data[1] |= num << 28; + GET_NUM(25) + data[1] |= num << 24; + GET_NUM(26) + data[1] |= num << 20; + GET_NUM(27) + data[1] |= num << 16; + GET_NUM(28) + data[1] |= num << 12; + GET_NUM(29) + data[1] |= num << 8; + GET_NUM(30) + data[1] |= num << 4; + GET_NUM(31) + data[1] |= num; + break; + case kStrFormLen: + if (rx_unlikely(str[8] != '-' || str[13] != '-' || str[18] != '-' || str[23] != '-')) { + return Error(errNotValid, "Invalid UUID format: '%s'", str); + } + GET_NUM(0) + data[0] = num << 60; + GET_NUM(1) + data[0] |= num << 56; + GET_NUM(2) + data[0] |= num << 52; + GET_NUM(3) + data[0] |= num << 48; + GET_NUM(4) + data[0] |= num << 44; + GET_NUM(5) + data[0] |= num << 40; + GET_NUM(6) + data[0] |= num << 36; + GET_NUM(7) + data[0] |= num << 32; + GET_NUM(9) + data[0] |= num << 28; + GET_NUM(10) + data[0] |= num << 24; + GET_NUM(11) + data[0] |= num << 20; + GET_NUM(12) + data[0] |= num << 16; + GET_NUM(14) + data[0] |= num << 12; + GET_NUM(15) + data[0] |= num << 8; + GET_NUM(16) + data[0] |= num << 4; + GET_NUM(17) + data[0] |= num; + GET_NUM(19) + data[1] = num << 60; + GET_NUM(20) + data[1] |= num << 56; + GET_NUM(21) + data[1] |= num << 52; + GET_NUM(22) + data[1] |= num << 48; + GET_NUM(24) + data[1] |= num << 44; + GET_NUM(25) + data[1] |= num << 40; + GET_NUM(26) + data[1] |= num << 36; + GET_NUM(27) + data[1] |= num << 32; + GET_NUM(28) + data[1] |= num << 28; + GET_NUM(29) + data[1] |= num << 24; + GET_NUM(30) + data[1] |= num << 20; + GET_NUM(31) + data[1] |= num << 16; + GET_NUM(32) + data[1] |= num << 12; + GET_NUM(33) + data[1] |= num << 8; + GET_NUM(34) + data[1] |= num << 4; + GET_NUM(35) + data[1] |= num; + break; + default: return Error(errNotValid, "UUID should consist of 32 hexadecimal digits: '%s'", str); - } - uint64_t value; - switch (ch) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - value = ch - '0'; - break; - case 'a': - case 'A': - value = 0xA; - break; - case 'b': - case 'B': - value = 0xB; - break; - case 'c': - case 'C': - value = 0xC; - break; - case 'd': - case 'D': - value = 0xD; - break; - case 'e': - case 'E': - value = 0xE; - break; - case 'f': - case 'F': - value = 0xF; - break; - case '-': - continue; - default: - return Error(errNotValid, "UUID cannot contain char '%c': '%s'", ch, str); - } - data[i / 16] = (data[i / 16] << 4) | value; - ++i; } - if (i != 32) { - return Error(errNotValid, "UUID should consist of 32 hexadecimal digits: '%s'", str); - } - if ((data[0] != 0 || data[1] != 0) && (data[1] >> 63) == 0) { + if (rx_unlikely((data[0] != 0 || data[1] != 0) && (data[1] >> 63) == 0)) { return Error(errNotValid, "Variant 0 of UUID is unsupported: '%s'", str); } return {}; } +#undef GET_NUM + std::optional Uuid::TryParse(std::string_view str) noexcept { Uuid ret; const auto err = tryParse(str, ret.data_); - if (err.ok()) { + if (rx_likely(err.ok())) { return ret; } else { return std::nullopt; @@ -91,21 +205,43 @@ Uuid::operator std::string() const { void Uuid::PutToStr(span str) const noexcept { assertrx(str.size() >= kStrFormLen); - static constexpr char ch[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; - for (size_t i = 0, j = 0; i < kStrFormLen; ++i) { - switch (i) { - case 8: - case 13: - case 18: - case 23: - str[i] = '-'; - break; - default: - str[i] = ch[(data_[j / 16] >> ((15 - j % 16) * 4)) & 0xF]; - ++j; - break; - } - } + static constexpr char hexChars[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + str[0] = hexChars[(data_[0] >> 60) & 0xF]; + str[1] = hexChars[(data_[0] >> 56) & 0xF]; + str[2] = hexChars[(data_[0] >> 52) & 0xF]; + str[3] = hexChars[(data_[0] >> 48) & 0xF]; + str[4] = hexChars[(data_[0] >> 44) & 0xF]; + str[5] = hexChars[(data_[0] >> 40) & 0xF]; + str[6] = hexChars[(data_[0] >> 36) & 0xF]; + str[7] = hexChars[(data_[0] >> 32) & 0xF]; + str[8] = '-'; + str[9] = hexChars[(data_[0] >> 28) & 0xF]; + str[10] = hexChars[(data_[0] >> 24) & 0xF]; + str[11] = hexChars[(data_[0] >> 20) & 0xF]; + str[12] = hexChars[(data_[0] >> 16) & 0xF]; + str[13] = '-'; + str[14] = hexChars[(data_[0] >> 12) & 0xF]; + str[15] = hexChars[(data_[0] >> 8) & 0xF]; + str[16] = hexChars[(data_[0] >> 4) & 0xF]; + str[17] = hexChars[data_[0] & 0xF]; + str[18] = '-'; + str[19] = hexChars[(data_[1] >> 60) & 0xF]; + str[20] = hexChars[(data_[1] >> 56) & 0xF]; + str[21] = hexChars[(data_[1] >> 52) & 0xF]; + str[22] = hexChars[(data_[1] >> 48) & 0xF]; + str[23] = '-'; + str[24] = hexChars[(data_[1] >> 44) & 0xF]; + str[25] = hexChars[(data_[1] >> 40) & 0xF]; + str[26] = hexChars[(data_[1] >> 36) & 0xF]; + str[27] = hexChars[(data_[1] >> 32) & 0xF]; + str[28] = hexChars[(data_[1] >> 28) & 0xF]; + str[29] = hexChars[(data_[1] >> 24) & 0xF]; + str[30] = hexChars[(data_[1] >> 20) & 0xF]; + str[31] = hexChars[(data_[1] >> 16) & 0xF]; + str[32] = hexChars[(data_[1] >> 12) & 0xF]; + str[33] = hexChars[(data_[1] >> 8) & 0xF]; + str[34] = hexChars[(data_[1] >> 4) & 0xF]; + str[35] = hexChars[data_[1] & 0xF]; } } // namespace reindexer diff --git a/cpp_src/core/keyvalue/variant.cc b/cpp_src/core/keyvalue/variant.cc index 7fd879443..c1ee01540 100644 --- a/cpp_src/core/keyvalue/variant.cc +++ b/cpp_src/core/keyvalue/variant.cc @@ -23,10 +23,14 @@ Variant::Variant(PayloadValue &&v) : variant_{0, 1, KeyValueType::Composite{}} { Variant::Variant(const std::string &v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(v)); } +Variant::Variant(std::string &&v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(std::move(v))); } + Variant::Variant(std::string_view v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(v)); } Variant::Variant(const key_string &v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(v); } +Variant::Variant(key_string &&v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(std::move(v)); } + Variant::Variant(const char *v) : Variant(p_string(v)) {} Variant::Variant(p_string v, bool enableHold) : variant_{0, 0, KeyValueType::String{}} { @@ -64,7 +68,7 @@ Variant::Variant(Uuid uuid) noexcept : uuid_() { } } -static void serialize(WrSerializer &, const std::tuple<>) noexcept {} +static void serialize(WrSerializer &, const std::tuple<> &) noexcept {} template void serialize(WrSerializer &ser, const std::tuple &v) { @@ -174,10 +178,11 @@ std::string Variant::As() const { [&](KeyValueType::Int64) { return std::to_string(variant_.value_int64); }, [&](KeyValueType::Double) { return std::to_string(variant_.value_double); }, [&](KeyValueType::String) { - if (this->operator p_string().type() == p_string::tagCxxstr || this->operator p_string().type() == p_string::tagKeyString) { - return *(this->operator p_string().getCxxstr()); + const auto pstr = this->operator p_string(); + if (pstr.type() == p_string::tagCxxstr || pstr.type() == p_string::tagKeyString) { + return *(pstr.getCxxstr()); } - return this->operator p_string().toString(); + return pstr.toString(); }, [&](KeyValueType::Null) { return "null"s; }, [&](KeyValueType::Composite) { return std::string(); }, [&](KeyValueType::Tuple) { @@ -226,10 +231,14 @@ std::string Variant::As(const PayloadType &pt, const FieldsSet &fie template std::optional tryParseAs(std::string_view str) noexcept { - const auto end = str.data() + str.size(); + auto begin = str.data(); + const auto end = begin + str.size(); + while (begin != end && std::isspace(*begin)) { + ++begin; + } T res; - auto [ptr, err] = std::from_chars(str.data(), end, res); - if (ptr == str.data() || err == std::errc::invalid_argument || err == std::errc::result_out_of_range) { + auto [ptr, err] = std::from_chars(begin, end, res); + if (ptr == begin || err == std::errc::invalid_argument || err == std::errc::result_out_of_range) { return std::nullopt; } for (; ptr != end; ++ptr) { @@ -358,7 +367,7 @@ int Variant::Compare(const Variant &other, const CollateOpts &collateOpts) const : (variant_.value_double > other.variant_.value_double) ? 1 : -1; }, - [&](KeyValueType::Tuple) { return getCompositeValues() == other.getCompositeValues() ? 0 : 1; }, + [&](KeyValueType::Tuple) -> int { throw Error(errParams, "KeyValueType::Tuple comparison is not implemented"); }, [&](KeyValueType::String) { return collateCompare(this->operator p_string(), other.operator p_string(), collateOpts); }, [&](KeyValueType::Uuid) { return Uuid{*this}.Compare(Uuid{other}); }, [](KeyValueType::Null) -> int { @@ -538,7 +547,8 @@ size_t Variant::Hash() const noexcept { void Variant::EnsureUTF8() const { if (!isUuid() && variant_.type.Is()) { - if (!utf8::is_valid(operator p_string().data(), operator p_string().data() + operator p_string().size())) { + const auto pstr = this->operator p_string(); + if (!utf8::is_valid(pstr.data(), pstr.data() + pstr.size())) { throw Error(errParams, "Invalid UTF8 string passed to index with CollateUTF8 mode"); } } @@ -610,7 +620,7 @@ void Variant::convertToComposite(const PayloadType *payloadType, const FieldsSet for (auto field : *fields) { if (field != IndexValueType::SetByJsonPath) { - pl.Set(field, {ser.GetVariant()}); + pl.Set(field, ser.GetVariant()); } else { // TODO: will have to implement SetByJsonPath in PayloadIFace // or this "mixed" composite queries (by ordinary indexes + indexes @@ -687,6 +697,7 @@ void Variant::Dump(T &os) const { template void Variant::Dump(WrSerializer &) const; template void Variant::Dump(std::ostream &) const; +template void Variant::Dump(std::stringstream &) const; template void VariantArray::Dump(T &os) const { @@ -700,17 +711,18 @@ void VariantArray::Dump(T &os) const { template void VariantArray::Dump(WrSerializer &) const; template void VariantArray::Dump(std::ostream &) const; +template void VariantArray::Dump(std::stringstream &) const; VariantArray::VariantArray(Point p) noexcept { - emplace_back(p.x); - emplace_back(p.y); + emplace_back(p.X()); + emplace_back(p.Y()); } VariantArray::operator Point() const { if (size() != 2) { throw Error(errParams, "Can't convert array of %d elements to Point", size()); } - return {(*this)[0].As(), (*this)[1].As()}; + return Point{(*this)[0].As(), (*this)[1].As()}; } template diff --git a/cpp_src/core/keyvalue/variant.h b/cpp_src/core/keyvalue/variant.h index a8107fa05..42d447816 100644 --- a/cpp_src/core/keyvalue/variant.h +++ b/cpp_src/core/keyvalue/variant.h @@ -15,7 +15,7 @@ class FieldsSet; class VariantArray; class key_string; struct p_string; -struct Point; +class Point; class Uuid; enum class WithString : bool { No = false, Yes = true }; @@ -32,8 +32,10 @@ class Variant { explicit Variant(const char *v); explicit Variant(p_string v, bool enableHold = true); explicit Variant(const std::string &v); + explicit Variant(std::string &&v); explicit Variant(std::string_view v); explicit Variant(const key_string &v); + explicit Variant(key_string &&v); explicit Variant(const PayloadValue &v); explicit Variant(PayloadValue &&v); explicit Variant(const VariantArray &values); @@ -126,9 +128,8 @@ class Variant { KeyValueType Type() const noexcept { if (isUuid()) { return KeyValueType::Uuid{}; - } else { - return variant_.type; } + return variant_.type; } Variant &convert(KeyValueType type, const PayloadType * = nullptr, const FieldsSet * = nullptr) &; @@ -229,7 +230,14 @@ class VariantArray : public h_vector { } explicit VariantArray(Point) noexcept; explicit operator Point() const; - void MarkArray() noexcept { isArrayValue = true; } + VariantArray &MarkArray(bool v = true) &noexcept { + isArrayValue = v; + return *this; + } + VariantArray &&MarkArray(bool v = true) &&noexcept { + isArrayValue = v; + return std::move(*this); + } void MarkObject() noexcept { isObjectValue = true; } using h_vector::h_vector; using h_vector::operator==; @@ -254,6 +262,11 @@ class VariantArray : public h_vector { static VariantArray Create(Ts &&...vs) { return VariantArray{Variant{std::forward(vs)}...}; } + void Clear() noexcept { + clear(); + isArrayValue = false; + isObjectValue = false; + } private: bool isArrayValue = false; diff --git a/cpp_src/core/lrucache.cc b/cpp_src/core/lrucache.cc index 304e134c5..ecfc29ac8 100644 --- a/cpp_src/core/lrucache.cc +++ b/cpp_src/core/lrucache.cc @@ -13,16 +13,17 @@ const int kMaxHitCountToCache = 1024; template typename LRUCache::Iterator LRUCache::Get(const K &key) { - if (cacheSizeLimit_ == 0) return Iterator(); + if rx_unlikely (cacheSizeLimit_ == 0) return Iterator(); - std::lock_guard lk(lock_); + std::lock_guard lk(lock_); - auto it = items_.find(key); - if (it == items_.end()) { - it = items_.emplace(key, Entry{}).first; + auto [it, emplaced] = items_.try_emplace(key); + if (emplaced) { totalCacheSize_ += kElemSizeOverhead + sizeof(Entry) + key.Size(); it->second.lruPos = lru_.insert(lru_.end(), &it->first); - if (!eraseLRU()) return Iterator(); + if rx_unlikely (!eraseLRU()) { + return Iterator(); + } } else if (std::next(it->second.lruPos) != lru_.end()) { lru_.splice(lru_.end(), lru_, it->second.lruPos, std::next(it->second.lruPos)); it->second.lruPos = std::prev(lru_.end()); @@ -40,9 +41,9 @@ typename LRUCache::Iterator LRUCache::Get( template void LRUCache::Put(const K &key, V &&v) { - if (cacheSizeLimit_ == 0) return; + if rx_unlikely (cacheSizeLimit_ == 0) return; - std::lock_guard lk(lock_); + std::lock_guard lk(lock_); auto it = items_.find(key); if (it == items_.end()) return; @@ -55,7 +56,7 @@ void LRUCache::Put(const K &key, V &&v) { eraseLRU(); - if (eraseCount_ && putCount_ * 16 > getCount_) { + if rx_unlikely (putCount_ * 16 > getCount_ && eraseCount_) { logPrintf(LogWarning, "IdSetCache::eraseLRU () cache invalidates too fast eraseCount=%d,putCount=%d,getCount=%d", eraseCount_, putCount_, eraseCount_); eraseCount_ = 0; @@ -66,29 +67,29 @@ void LRUCache::Put(const K &key, V &&v) { } template -bool LRUCache::eraseLRU() { +RX_ALWAYS_INLINE bool LRUCache::eraseLRU() { typename LRUList::iterator it = lru_.begin(); while (totalCacheSize_ > cacheSizeLimit_) { // just to save us if totalCacheSize_ >0 and lru is empty // someone can make bad key or val with wrong size - if (lru_.empty()) { + // TODO: Probably we should remove this logic, since there is no access to sizes outside of the lrucache + if rx_unlikely (lru_.empty()) { clearAll(); logPrintf(LogError, "IdSetCache::eraseLRU () Cache restarted because wrong cache size totalCacheSize_=%d", totalCacheSize_); return false; } auto mIt = items_.find(**it); - assertrx(mIt != items_.end()); + assertrx_throw(mIt != items_.end()); - size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); + const size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); - if (oldSize > totalCacheSize_) { + if rx_unlikely (oldSize > totalCacheSize_) { clearAll(); logPrintf(LogError, "IdSetCache::eraseLRU () Cache restarted because wrong cache size totalCacheSize_=%d,oldSize=%d", totalCacheSize_, oldSize); return false; } - totalCacheSize_ = totalCacheSize_ - oldSize; items_.erase(mIt); it = lru_.erase(it); @@ -97,15 +98,10 @@ bool LRUCache::eraseLRU() { return !lru_.empty(); } -template -bool LRUCache::Clear() { - std::lock_guard lk(lock_); - return clearAll(); -} template bool LRUCache::clearAll() { - bool res = !items_.empty(); + const bool res = !items_.empty(); totalCacheSize_ = 0; std::unordered_map().swap(items_); LRUList().swap(lru_); @@ -117,8 +113,9 @@ bool LRUCache::clearAll() { template LRUCacheMemStat LRUCache::GetMemStat() { - std::lock_guard lk(lock_); LRUCacheMemStat ret; + + std::lock_guard lk(lock_); ret.totalSize = totalCacheSize_; ret.itemsCount = items_.size(); // for (auto &item : items_) { @@ -133,6 +130,5 @@ template class LRUCache; template class LRUCache; template class LRUCache; -template class LRUCache; } // namespace reindexer diff --git a/cpp_src/core/lrucache.h b/cpp_src/core/lrucache.h index 902a88554..e81fa741f 100644 --- a/cpp_src/core/lrucache.h +++ b/cpp_src/core/lrucache.h @@ -16,14 +16,14 @@ template class LRUCache { public: using Key = K; - LRUCache(size_t sizeLimit = kDefaultCacheSizeLimit, int hitCount = kDefaultHitCountToCache) + LRUCache(size_t sizeLimit = kDefaultCacheSizeLimit, int hitCount = kDefaultHitCountToCache) noexcept : totalCacheSize_(0), cacheSizeLimit_(sizeLimit), hitCountToCache_(hitCount) {} struct Iterator { Iterator(bool k = false, const V &v = V()) : valid(k), val(v) {} Iterator(const Iterator &other) = delete; Iterator &operator=(const Iterator &other) = delete; - Iterator(Iterator &&other) : valid(other.valid), val(std::move(other.val)) { other.valid = false; } - Iterator &operator=(Iterator &&other) { + Iterator(Iterator &&other) noexcept : valid(other.valid), val(std::move(other.val)) { other.valid = false; } + Iterator &operator=(Iterator &&other) noexcept { if (this != &other) { valid = other.valid; val = std::move(other.val); @@ -41,7 +41,10 @@ class LRUCache { LRUCacheMemStat GetMemStat(); - bool Clear(); + bool Clear() { + std::lock_guard lk(lock_); + return clearAll(); + } template void Dump(T &os, std::string_view step, std::string_view offset) const { @@ -75,7 +78,7 @@ class LRUCache { template void Clear(const F &cond) { - std::lock_guard lock{lock_}; + std::lock_guard lock(lock_); for (auto it = lru_.begin(); it != lru_.end();) { if (!cond(**it)) { ++it; @@ -84,7 +87,7 @@ class LRUCache { auto mIt = items_.find(**it); assertrx(mIt != items_.end()); const size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); - if (oldSize > totalCacheSize_) { + if rx_unlikely (oldSize > totalCacheSize_) { clearAll(); return; } @@ -96,12 +99,7 @@ class LRUCache { } protected: - bool eraseLRU(); - - bool clearAll(); - typedef std::list LRUList; - struct Entry { V val; typename LRUList::iterator lruPos; @@ -112,14 +110,17 @@ class LRUCache { } }; + bool eraseLRU(); + bool clearAll(); + std::unordered_map items_; LRUList lru_; mutable std::mutex lock_; size_t totalCacheSize_; - size_t cacheSizeLimit_; + const size_t cacheSizeLimit_; int hitCountToCache_; - int getCount_ = 0, putCount_ = 0, eraseCount_ = 0; + uint64_t getCount_ = 0, putCount_ = 0, eraseCount_ = 0; }; } // namespace reindexer diff --git a/cpp_src/core/namespace/asyncstorage.cc b/cpp_src/core/namespace/asyncstorage.cc index dafc5d74c..3bd989cab 100644 --- a/cpp_src/core/namespace/asyncstorage.cc +++ b/cpp_src/core/namespace/asyncstorage.cc @@ -23,6 +23,7 @@ AsyncStorage::AsyncStorage(const AsyncStorage& o, AsyncStorage::FullLockT& stora storage_ = o.storage_; path_ = o.path_; curUpdatesChunck_ = createUpdatesCollection(); + updateStatusCache(); // Do not copying lastFlushError_ and reopenTs_, because copied storage does not performs actual writes } @@ -41,6 +42,7 @@ Error AsyncStorage::Open(datastorage::StorageType storageType, const std::string path_ = path; curUpdatesChunck_ = createUpdatesCollection(); } + updateStatusCache(); return err; } @@ -87,15 +89,16 @@ void AsyncStorage::RemoveSync(const StorageOpts& opts, std::string_view key) { void AsyncStorage::Flush(const StorageFlushOpts& opts) { // Flush must be performed in single thread std::lock_guard flushLck(flushMtx_); + statusCache_.UpdatePart(bool(storage_.get()), path_); // Actualize cache part. Just in case flush(opts); } -std::string AsyncStorage::Path() const noexcept { +std::string AsyncStorage::GetPath() const noexcept { std::lock_guard lck(storageMtx_); return path_; } -datastorage::StorageType AsyncStorage::Type() const noexcept { +datastorage::StorageType AsyncStorage::GetType() const noexcept { std::lock_guard lck(storageMtx_); if (storage_) { return storage_->Type(); @@ -135,6 +138,7 @@ void AsyncStorage::InheritUpdatesFrom(AsyncStorage& src, AsyncStorage::FullLockT if (lastBatchWithSyncUpdates_ >= 0) ++lastBatchWithSyncUpdates_; } src.storage_.reset(); + // Do not update lockfree status here to avoid status flickering on ns copying } isCopiedNsStorage_ = false; } @@ -263,7 +267,7 @@ void AsyncStorage::tryReopenStorage() { throw lastFlushError_; } logPrintf(LogInfo, "Storage was reopened for '%s'", path_); - lastFlushError_ = Error(); + setLastFlushError(Error()); reopenTs_ = TimepointT(); } } diff --git a/cpp_src/core/namespace/asyncstorage.h b/cpp_src/core/namespace/asyncstorage.h index a7c5019b7..9571f98da 100644 --- a/cpp_src/core/namespace/asyncstorage.h +++ b/cpp_src/core/namespace/asyncstorage.h @@ -4,6 +4,7 @@ #include #include "core/storage/idatastorage.h" #include "estl/h_vector.h" +#include "estl/mutex.h" #include "tools/assertrx.h" #include "tools/flagguard.h" @@ -39,6 +40,7 @@ class AsyncStorage { using AdviceGuardT = CounterGuardAIRL32; using ClockT = std::chrono::system_clock; using TimepointT = ClockT::time_point; + using Mutex = MarkedMutex; struct Status { bool isEnabled = false; @@ -47,7 +49,7 @@ class AsyncStorage { class ConstCursor { public: - ConstCursor(std::unique_lock&& lck, std::unique_ptr&& c) noexcept + ConstCursor(std::unique_lock&& lck, std::unique_ptr&& c) noexcept : lck_(std::move(lck)), c_(std::move(c)) { assertrx(lck_.owns_lock()); assertrx(c_); @@ -58,13 +60,13 @@ class AsyncStorage { // NOTE: Cursor owns unique storage lock. I.e. nobody is able to read stroage or write into it, while cursor exists. // Currently the only place, where it matter is EnumMeta method. However, we should to consider switching to shared_mutex, if // the number of such concurrent Cursors will grow. - std::unique_lock lck_; + std::unique_lock lck_; std::unique_ptr c_; }; class Cursor : public ConstCursor { public: - Cursor(std::unique_lock&& lck, std::unique_ptr&& c, AsyncStorage& storage) noexcept + Cursor(std::unique_lock&& lck, std::unique_ptr&& c, AsyncStorage& storage) noexcept : ConstCursor(std::move(lck), std::move(c)), storage_(&storage) {} void RemoveThisKey(const StorageOpts& opts) { @@ -78,24 +80,19 @@ class AsyncStorage { class FullLockT { public: - FullLockT(std::mutex& flushMtx, std::mutex& updatesMtx) : flushLck_(flushMtx), storageLck_(updatesMtx) {} - ~FullLockT() { - if (flushLck_.owns_lock()) { - assertrx(storageLck_.owns_lock()); - unlock(); - } - } + using MutexType = Mutex; + FullLockT(MutexType& flushMtx, MutexType& updatesMtx) : flushLck_(flushMtx), storageLck_(updatesMtx) {} void unlock() { // Specify unlock order storageLck_.unlock(); flushLck_.unlock(); } - bool OwnsThisFlushMutex(std::mutex& mtx) const noexcept { return flushLck_.owns_lock() && flushLck_.mutex() == &mtx; } - bool OwnsThisStorageMutex(std::mutex& mtx) const noexcept { return storageLck_.owns_lock() && storageLck_.mutex() == &mtx; } + bool OwnsThisFlushMutex(MutexType& mtx) const noexcept { return flushLck_.owns_lock() && flushLck_.mutex() == &mtx; } + bool OwnsThisStorageMutex(MutexType& mtx) const noexcept { return storageLck_.owns_lock() && storageLck_.mutex() == &mtx; } private: - std::unique_lock flushLck_; - std::unique_lock storageLck_; + std::unique_lock flushLck_; + std::unique_lock storageLck_; }; AsyncStorage() = default; @@ -136,17 +133,15 @@ class AsyncStorage { } } } - bool IsValid() const { + bool IsValid() const noexcept { std::lock_guard lck(storageMtx_); return storage_.get(); } - Status GetStatus() const { - std::lock_guard lck(storageMtx_); - return Status{storage_.get() != nullptr, lastFlushError_}; - } + Status GetStatusCached() const noexcept { return statusCache_.GetStatus(); } FullLockT FullLock() { return FullLockT{flushMtx_, storageMtx_}; } - std::string Path() const noexcept; - datastorage::StorageType Type() const noexcept; + std::string GetPathCached() const noexcept { return statusCache_.GetPath(); } + std::string GetPath() const noexcept; + datastorage::StorageType GetType() const noexcept; void InheritUpdatesFrom(AsyncStorage& src, AsyncStorage::FullLockT& storageLock); AdviceGuardT AdviceBatching() noexcept { return AdviceGuardT(batchingAdvices_); } void SetForceFlushLimit(uint32_t limit) noexcept { forceFlushLimit_.store(limit, std::memory_order_relaxed); } @@ -244,8 +239,8 @@ class AsyncStorage { UpdatesPtrT createUpdatesCollection() noexcept; void recycleUpdatesCollection(UpdatesPtrT&& p) noexcept; - void scheduleFilesReopen(Error&& e) { - lastFlushError_ = std::move(e); + void scheduleFilesReopen(Error&& e) noexcept { + setLastFlushError(std::move(e)); reopenTs_ = ClockT::now() + kStorageReopenPeriod; } void reset() noexcept { @@ -254,16 +249,52 @@ class AsyncStorage { lastFlushError_ = Error(); reopenTs_ = TimepointT(); lastBatchWithSyncUpdates_ = -1; + updateStatusCache(); } void tryReopenStorage(); + void updateStatusCache() noexcept { statusCache_.Update(Status{bool(storage_.get()), lastFlushError_}, path_); } + void setLastFlushError(Error&& e) { + lastFlushError_ = std::move(e); + updateStatusCache(); + } + + class StatusCache { + public: + void Update(Status&& st, std::string path) noexcept { + std::lock_guard lck(mtx_); + status_ = std::move(st); + path_ = std::move(path); + } + void UpdatePart(bool isEnabled, std::string path) noexcept { + std::lock_guard lck(mtx_); + status_.isEnabled = isEnabled; + path_ = std::move(path); + } + Status GetStatus() const noexcept { + std::lock_guard lck(mtx_); + return status_; + } + std::string GetPath() const noexcept { + std::lock_guard lck(mtx_); + return path_; + } + + private: + mutable std::mutex mtx_; + Status status_; + std::string path_; + }; + StatusCache statusCache_; // Status cache to avoid any long locks std::deque finishedUpdateChuncks_; UpdatesPtrT curUpdatesChunck_; std::atomic totalUpdatesCount_ = {0}; + // path_ value and storage_ pointer may be changed under full lock only, so it's valid to read their values under any of flushMtx_ or + // storageMtx_ locks shared_ptr storage_; - mutable std::mutex storageMtx_; - mutable std::mutex flushMtx_; std::string path_; + mutable Mutex storageMtx_; + mutable Mutex flushMtx_; bool isCopiedNsStorage_ = false; h_vector recycled_; std::atomic batchingAdvices_ = {0}; diff --git a/cpp_src/core/namespace/bgnamespacedeleter.h b/cpp_src/core/namespace/bgnamespacedeleter.h new file mode 100644 index 000000000..77e0cfd3b --- /dev/null +++ b/cpp_src/core/namespace/bgnamespacedeleter.h @@ -0,0 +1,32 @@ +#pragma once + +#include "namespaceimpl.h" + +namespace reindexer { + +class BackgroundNamespaceDeleter { +public: + void Add(NamespaceImpl::Ptr ns) { + std::lock_guard lck(mtx_); + namespaces_.emplace_back(std::move(ns)); + } + void DeleteUnique() noexcept { + std::unique_lock lck(mtx_); + for (auto it = namespaces_.begin(); it != namespaces_.end();) { + if (it->unique()) { + lck.unlock(); + it->reset(); + lck.lock(); + it = namespaces_.erase(it); + } else { + ++it; + } + } + } + +private: + std::mutex mtx_; + std::list namespaces_; +}; + +} diff --git a/cpp_src/core/namespace/namespace.cc b/cpp_src/core/namespace/namespace.cc index c926db166..087306c8c 100644 --- a/cpp_src/core/namespace/namespace.cc +++ b/cpp_src/core/namespace/namespace.cc @@ -18,13 +18,15 @@ void Namespace::CommitTransaction(LocalTransaction& tx, LocalQueryResults& resul txStatsCounter_.Count(tx); } bool wasCopied = false; // NOLINT(*deadcode.DeadStores) - QueryStatCalculator statCalculator( - long_actions::Logger{tx, longTxLoggingParams_.load(std::memory_order_relaxed), wasCopied}); + auto params = longTxLoggingParams_.load(std::memory_order_relaxed); + QueryStatCalculator statCalculator(long_actions::MakeLogger(tx, params, wasCopied), params.thresholdUs >= 0); PerfStatCalculatorMT txCommitCalc(commitStatsCounter_, enablePerfCounters); if (needNamespaceCopy(nsl, tx)) { PerfStatCalculatorMT calc(nsl->updatePerfCounter_, enablePerfCounters); - contexted_unique_lock clonerLck(clonerMtx_, &ctx.rdxContext); + + auto clonerLck = statCalculator.CreateLock(clonerMtx_, &ctx.rdxContext); + nsl = ns_; if (needNamespaceCopy(nsl, tx)) { PerfStatCalculatorMT nsCopyCalc(copyStatsCounter_, enablePerfCounters); @@ -34,14 +36,15 @@ void Namespace::CommitTransaction(LocalTransaction& tx, LocalQueryResults& resul hasCopy_.store(true, std::memory_order_release); CounterGuardAIR32 cg(nsl->cancelCommitCnt_); try { - auto nsRlck = nsl->rLock(ctx.rdxContext); - auto storageLock = nsl->storage_.FullLock(); + auto nsRlck = statCalculator.CreateLock(*nsl, &NamespaceImpl::rLock, ctx.rdxContext); + auto storageLock = statCalculator.CreateLock(nsl->storage_, &AsyncStorage::FullLock); + cg.Reset(); nsCopy_.reset(new NamespaceImpl(*nsl, storageLock)); nsCopyCalc.HitManualy(); NsContext nsCtx(ctx); nsCtx.CopiedNsRequest(); - nsCopy_->CommitTransaction(tx, result, nsCtx); + nsCopy_->CommitTransaction(tx, result, nsCtx, statCalculator); nsCopy_->optimizeIndexes(nsCtx); nsCopy_->warmupFtIndexes(); try { @@ -90,14 +93,14 @@ void Namespace::CommitTransaction(LocalTransaction& tx, LocalQueryResults& resul if (clonerLck.owns_lock()) { nsl = ns_; clonerLck.unlock(); - nsl->storage_.TryForceFlush(); + statCalculator.LogFlushDuration(nsl->storage_, &AsyncStorage::TryForceFlush); } else { - getMainNs()->storage_.TryForceFlush(); + statCalculator.LogFlushDuration(getMainNs()->storage_, &AsyncStorage::TryForceFlush); } return; } } - handleInvalidation(NamespaceImpl::CommitTransaction)(tx, result, ctx); + handleInvalidation(NamespaceImpl::CommitTransaction)(tx, result, ctx, statCalculator); } NamespacePerfStat Namespace::GetPerfStat(const RdxContext& ctx) { @@ -161,7 +164,7 @@ void Namespace::doRename(const Namespace::Ptr& dst, const std::string& newName, } } dstNs->checkClusterRole(ctx); - dbpath = dstNs->storage_.Path(); + dbpath = dstNs->storage_.GetPath(); } else if (newName == srcNs.name_) { return; } @@ -175,9 +178,9 @@ void Namespace::doRename(const Namespace::Ptr& dst, const std::string& newName, const bool hadStorage = (srcNs.storage_.IsValid()); auto storageType = StorageType::LevelDB; - const auto srcDbpath = srcNs.storage_.Path(); + const auto srcDbpath = srcNs.storage_.GetPath(); if (hadStorage) { - storageType = srcNs.storage_.Type(); + storageType = srcNs.storage_.GetType(); srcNs.storage_.Close(); fs::RmDirAll(dbpath); int renameRes = fs::Rename(srcDbpath, dbpath); diff --git a/cpp_src/core/namespace/namespace.h b/cpp_src/core/namespace/namespace.h index 68b371f45..b3cbe1f56 100644 --- a/cpp_src/core/namespace/namespace.h +++ b/cpp_src/core/namespace/namespace.h @@ -2,7 +2,9 @@ #include #include +#include "bgnamespacedeleter.h" #include "core/queryresults/queryresults.h" +#include "core/querystat.h" #include "core/transaction/txstats.h" #include "estl/shared_mutex.h" #include "namespaceimpl.h" @@ -14,10 +16,11 @@ namespace reindexer { class Namespace { public: - Namespace(const std::string &name, std::optional stateToken, cluster::INsDataReplicator *clusterizator) - : ns_(std::make_shared(name, std::move(stateToken), clusterizator)) {} - Namespace(NamespaceImpl::Ptr ns) : ns_(std::move(ns)) {} - typedef shared_ptr Ptr; + using Ptr = shared_ptr; + + Namespace(const std::string &name, std::optional stateToken, cluster::INsDataReplicator *clusterizator, + BackgroundNamespaceDeleter &bgDeleter) + : ns_(make_intrusive(name, std::move(stateToken), clusterizator)), bgDeleter_(bgDeleter) {} void CommitTransaction(LocalTransaction &tx, LocalQueryResults &result, const NsContext &ctx); std::string GetName(const RdxContext &ctx) const { return handleInvalidation(NamespaceImpl::GetName)(ctx); } @@ -46,23 +49,23 @@ class Namespace { nsFuncWrapper(item, ctx); } void Update(Item &item, LocalQueryResults &qr, const RdxContext &ctx) { - nsFuncWrapper<&NamespaceImpl::modifyItem, ModeUpdate>(item, qr, ctx); + nsFuncWrapper<&NamespaceImpl::modifyItem, ItemModifyMode::ModeUpdate>(item, qr, ctx); } void Update(const Query &query, LocalQueryResults &result, const RdxContext &ctx) { - nsFuncWrapper<&NamespaceImpl::doUpdate>(query, result, ctx); + nsFuncWrapper<&NamespaceImpl::doUpdate, QueryType::QueryUpdate>(query, result, ctx); } void Upsert(Item &item, const RdxContext &ctx) { handleInvalidation(NamespaceImpl::Upsert)(item, ctx); } void Upsert(Item &item, LocalQueryResults &qr, const RdxContext &ctx) { - nsFuncWrapper<&NamespaceImpl::modifyItem, ModeUpsert>(item, qr, ctx); + nsFuncWrapper<&NamespaceImpl::modifyItem, ItemModifyMode::ModeUpsert>(item, qr, ctx); } void Delete(Item &item, const RdxContext &ctx) { nsFuncWrapper(item, ctx); } void Delete(Item &item, LocalQueryResults &qr, const RdxContext &ctx) { - nsFuncWrapper<&NamespaceImpl::modifyItem, ModeDelete>(item, qr, ctx); + nsFuncWrapper<&NamespaceImpl::modifyItem, ItemModifyMode::ModeDelete>(item, qr, ctx); } void Delete(const Query &query, LocalQueryResults &result, const RdxContext &ctx) { - nsFuncWrapper<&NamespaceImpl::doDelete>(query, result, ctx); + nsFuncWrapper<&NamespaceImpl::doDelete, QueryType::QueryDelete>(query, result, ctx); } void Truncate(const RdxContext &ctx) { handleInvalidation(NamespaceImpl::Truncate)(ctx); } void Select(LocalQueryResults &result, SelectCtx ¶ms, const RdxContext &ctx) { @@ -130,6 +133,7 @@ class Namespace { copyPolicyMultiplier_.store(configData.copyPolicyMultiplier, std::memory_order_relaxed); txSizeToAlwaysCopy_.store(configData.txSizeToAlwaysCopy, std::memory_order_relaxed); longTxLoggingParams_.store(configProvider.GetTxLoggingParams(), std::memory_order_relaxed); + longUpdDelLoggingParams_.store(configProvider.GetUpdDelLoggingParams(), std::memory_order_relaxed); handleInvalidation(NamespaceImpl::OnConfigUpdated)(configProvider, ctx); } StorageOpts GetStorageOpts(const RdxContext &ctx) { return handleInvalidation(NamespaceImpl::GetStorageOpts)(ctx); } @@ -190,22 +194,23 @@ class Namespace { } } - template + template void nsFuncWrapper(Item &item, LocalQueryResults &qr, const RdxContext &ctx) const { - nsFuncWrapper(item, qr, - ctx); + nsFuncWrapper(item, qr, ctx); } - template + template void nsFuncWrapper(const Query &query, LocalQueryResults &qr, const RdxContext &ctx) const { nsFuncWrapper(query, qr, ctx); + void (NamespaceImpl::*)(const Query &, LocalQueryResults &, NamespaceImpl::UpdatesContainer &, const NsContext &), fn, + queryType>(query, qr, ctx); } - template + template , ItemModifyMode, QueryType> enumVal> void nsFuncWrapper(T &v, LocalQueryResults &qr, const RdxContext &ctx) const { NsContext nsCtx(ctx); while (true) { - std::shared_ptr ns; + NamespaceImpl::Ptr ns; bool added = false; try { ns = atomicLoadMainNs(); @@ -214,18 +219,25 @@ class Namespace { NamespaceImpl::UpdatesContainer pendedRepl; CounterGuardAIR32 cg(ns->cancelCommitCnt_); - auto wlck = ns->dataWLock(nsCtx.rdxContext); - cg.Reset(); - - qr.AddNamespace(ns, true, nsCtx.rdxContext); - added = true; if constexpr (std::is_same_v) { - (*ns.*fn)(v, mode, pendedRepl, nsCtx); + auto wlck = ns->dataWLock(nsCtx.rdxContext); + cg.Reset(); + qr.AddNamespace(ns, true); + added = true; + (*ns.*fn)(v, enumVal, pendedRepl, nsCtx); qr.AddItem(v, true, false); + ns->replicate(std::move(pendedRepl), std::move(wlck), true, nullptr, nsCtx); } else { + auto params = longUpdDelLoggingParams_.load(std::memory_order_relaxed); + const bool isEnabled = params.thresholdUs >= 0 && !isSystemNamespaceNameFast(v._namespace); + auto statCalculator = QueryStatCalculator(long_actions::MakeLogger(v, std::move(params)), isEnabled); + auto wlck = statCalculator.CreateLock(*ns, &NamespaceImpl::dataWLock, nsCtx.rdxContext, false); + cg.Reset(); + qr.AddNamespace(ns, true); + added = true; (*ns.*fn)(v, qr, pendedRepl, nsCtx); + ns->replicate(std::move(pendedRepl), std::move(wlck), true, statCalculator, nsCtx); } - ns->replicate(std::move(pendedRepl), std::move(wlck), true, nsCtx); return; } catch (const Error &e) { if (e.code() != errNamespaceInvalidated) { @@ -250,10 +262,10 @@ class Namespace { ns_.reset(ns); } - std::shared_ptr ns_; + NamespaceImpl::Ptr ns_; std::unique_ptr nsCopy_; std::atomic hasCopy_ = {false}; - using Mutex = MarkedMutex; + using Mutex = MarkedMutex; mutable Mutex clonerMtx_; mutable spinlock nsPtrSpinlock_; std::atomic startCopyPolicyTxSize_; @@ -263,6 +275,8 @@ class Namespace { PerfStatCounterMT commitStatsCounter_; PerfStatCounterMT copyStatsCounter_; std::atomic longTxLoggingParams_; + std::atomic longUpdDelLoggingParams_; + BackgroundNamespaceDeleter &bgDeleter_; }; #undef handleInvalidation diff --git a/cpp_src/core/namespace/namespaceimpl.cc b/cpp_src/core/namespace/namespaceimpl.cc index d69a092bb..2f6b3d3a4 100644 --- a/cpp_src/core/namespace/namespaceimpl.cc +++ b/cpp_src/core/namespace/namespaceimpl.cc @@ -13,6 +13,7 @@ #include "core/itemmodifier.h" #include "core/nsselecter/nsselecter.h" #include "core/payload/payloadiface.h" +#include "core/querystat.h" #include "core/rdxcontext.h" #include "core/selectfunc/functionexecutor.h" #include "itemsloader.h" @@ -60,7 +61,8 @@ void NamespaceImpl::IndexesStorage::MoveBase(IndexesStorage&& src) { Base::opera // private implementation and NOT THREADSAFE of copy CTOR NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& storageLock) - : indexes_{*this}, + : intrusive_atomic_rc_base(), + indexes_{*this}, indexesNames_{src.indexesNames_}, indexesToComposites_{src.indexesToComposites_}, items_{src.items_}, @@ -104,7 +106,8 @@ NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& } NamespaceImpl::NamespaceImpl(const std::string& name, std::optional stateToken, cluster::INsDataReplicator* clusterizator) - : indexes_(*this), + : intrusive_atomic_rc_base(), + indexes_(*this), name_(name), payloadType_(name), tagsMatcher_(payloadType_, stateToken.has_value() ? stateToken.value() : tools::RandomGenerator::gets32()), @@ -236,7 +239,7 @@ void NamespaceImpl::OnConfigUpdated(DBConfigProvider& configProvider, const RdxC configProvider.GetNamespaceConfig(GetName(ctx), configData); const int serverId = configProvider.GetReplicationConfig().serverID; - enablePerfCounters_ = configProvider.GetProfilingConfig().perfStats; + enablePerfCounters_ = configProvider.PerfStatsEnabled(); // ! Updating storage under write lock auto wlck = simpleWLock(ctx); @@ -555,28 +558,8 @@ void NamespaceImpl::addToWAL(const IndexDef& indexDef, WALRecType type, const Ns void NamespaceImpl::addToWAL(std::string_view json, WALRecType type, const NsContext& ctx) { processWalRecord(WALRecord(type, json), ctx); } void NamespaceImpl::AddIndex(const IndexDef& indexDef, const RdxContext& ctx) { - if (!validateIndexName(indexDef.name_, indexDef.Type())) { - throw Error(errParams, - "Cannot add index '%s' in namespace '%s'. Index name contains invalid characters. Only alphas, digits, '+' (for " - "composite indexes only), '.', '_' " - "and '-' are allowed", - indexDef.name_, name_); - } else if (indexDef.opts_.IsPK()) { - if (indexDef.opts_.IsArray()) { - throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't be array", indexDef.name_, GetName(ctx)); - } else if (indexDef.opts_.IsSparse()) { - throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't be sparse", indexDef.name_, GetName(ctx)); - } else if (isStore(indexDef.Type())) { - throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't have '-' type", indexDef.name_, GetName(ctx)); - } else if (IsFullText(indexDef.Type())) { - throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't be fulltext index", indexDef.name_, - GetName(ctx)); - } - } else if (indexDef.Type() == IndexUuidHash) { - if (indexDef.opts_.IsSparse()) { - throw Error(errParams, "Cannot add index '%s' in namespace '%s'. UUID field can't be sparse", indexDef.name_, GetName(ctx)); - } - } + verifyAddIndex(indexDef, [this, &ctx]() { return GetName(ctx); }); + UpdatesContainer pendedRepl; auto wlck = dataWLock(ctx, true); @@ -587,7 +570,7 @@ void NamespaceImpl::AddIndex(const IndexDef& indexDef, const RdxContext& ctx) { if (ctx.HasEmmiterServer()) { // Make sure, that index was already replicated to emmiter pendedRepl.emplace_back(UpdateRecord::Type::EmptyUpdate, name_, ctx.EmmiterServerId()); - replicate(std::move(pendedRepl), std::move(wlck), false, ctx); + replicate(std::move(pendedRepl), std::move(wlck), false, nullptr, ctx); } return; } @@ -596,7 +579,7 @@ void NamespaceImpl::AddIndex(const IndexDef& indexDef, const RdxContext& ctx) { doAddIndex(indexDef, checkIdxEqualityNow, pendedRepl, ctx); saveIndexesToStorage(); - replicate(std::move(pendedRepl), std::move(wlck), false, ctx); + replicate(std::move(pendedRepl), std::move(wlck), false, nullptr, ctx); } void NamespaceImpl::DumpIndex(std::ostream& os, std::string_view index, const RdxContext& ctx) const { @@ -609,7 +592,7 @@ void NamespaceImpl::UpdateIndex(const IndexDef& indexDef, const RdxContext& ctx) auto wlck = dataWLock(ctx); doUpdateIndex(indexDef, pendedRepl, ctx); saveIndexesToStorage(); - replicate(std::move(pendedRepl), std::move(wlck), false, ctx); + replicate(std::move(pendedRepl), std::move(wlck), false, nullptr, ctx); } void NamespaceImpl::DropIndex(const IndexDef& indexDef, const RdxContext& ctx) { @@ -617,7 +600,7 @@ void NamespaceImpl::DropIndex(const IndexDef& indexDef, const RdxContext& ctx) { auto wlck = dataWLock(ctx); doDropIndex(indexDef, pendedRepl, ctx); saveIndexesToStorage(); - replicate(std::move(pendedRepl), std::move(wlck), false, ctx); + replicate(std::move(pendedRepl), std::move(wlck), false, nullptr, ctx); } void NamespaceImpl::SetSchema(std::string_view schema, const RdxContext& ctx) { @@ -629,7 +612,7 @@ void NamespaceImpl::SetSchema(std::string_view schema, const RdxContext& ctx) { if (ctx.HasEmmiterServer()) { // Make sure, that schema was already replicated to emmiter pendedRepl.emplace_back(UpdateRecord::Type::EmptyUpdate, name_, ctx.EmmiterServerId()); - replicate(std::move(pendedRepl), std::move(wlck), false, ctx); + replicate(std::move(pendedRepl), std::move(wlck), false, nullptr, ctx); } return; } @@ -639,7 +622,7 @@ void NamespaceImpl::SetSchema(std::string_view schema, const RdxContext& ctx) { setSchema(schema, pendedRepl, ctx); saveSchemaToStorage(); - replicate(std::move(pendedRepl), std::move(wlck), false, ctx); + replicate(std::move(pendedRepl), std::move(wlck), false, nullptr, ctx); } std::string NamespaceImpl::GetSchema(int format, const RdxContext& ctx) { @@ -764,18 +747,65 @@ static void verifyConvertTypes(KeyValueType from, KeyValueType to, const Payload void NamespaceImpl::verifyCompositeIndex(const IndexDef& indexDef) const { const auto type = indexDef.Type(); - const bool isSparse = indexDef.opts_.IsSparse(); + if (indexDef.opts_.IsSparse()) { + throw Error{errParams, "Composite index cannot be sparse. Use non-sparse composite instead"}; + } for (const auto& jp : indexDef.jsonPaths_) { const auto it = indexesNames_.find(jp); - if (it == indexesNames_.end()) continue; // TODO maybe error - const auto& idx = indexes_[it->second]; - if (idx->IsUuid()) { - if (type != IndexCompositeHash) { - throw Error{errParams, "Only hash index allowed on UUID field"}; - } - if (isSparse) { - throw Error{errParams, "Index on UUID field cannot be sparse"}; + if (it == indexesNames_.end()) { + if (!IsFullText(indexDef.Type())) { + throw Error(errParams, + "Composite indexes over non-indexed field ('%s') are not supported yet (except for full-text indexes). Create " + "at least column index('-') over each field inside the composite index", + jp); } + continue; + } + const auto& idx = indexes_[it->second]; + if (idx->IsUuid() && type != IndexCompositeHash) { + throw Error{errParams, "Only hash index allowed on UUID field"}; + } + if (IsComposite(idx->Type())) { + throw Error(errParams, "Cannot create composite index '%s' over the other composite '%s'", indexDef.name_, idx->Name()); + } + } +} + +template +void NamespaceImpl::verifyAddIndex(const IndexDef& indexDef, GetNameF&& getNameF) const { + const auto idxType = indexDef.Type(); + if (!validateIndexName(indexDef.name_, idxType)) { + throw Error(errParams, + "Cannot add index '%s' in namespace '%s'. Index name contains invalid characters. Only alphas, digits, '+' (for " + "composite indexes only), '.', '_' and '-' are allowed", + indexDef.name_, getNameF()); + } + if (indexDef.opts_.IsPK()) { + if (indexDef.opts_.IsArray()) { + throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't be array", indexDef.name_, getNameF()); + } else if (indexDef.opts_.IsSparse()) { + throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't be sparse", indexDef.name_, getNameF()); + } else if (isStore(idxType)) { + throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't have '-' type", indexDef.name_, getNameF()); + } else if (IsFullText(idxType)) { + throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't be fulltext index", indexDef.name_, getNameF()); + } + } + if ((idxType == IndexUuidHash || idxType == IndexUuidStore) && indexDef.opts_.IsSparse()) { + throw Error(errParams, "Cannot add index '%s' in namespace '%s'. UUID field can't be sparse", indexDef.name_, getNameF()); + } + if (indexDef.jsonPaths_.size() > 1 && !IsComposite(idxType) && !indexDef.opts_.IsArray()) { + throw Error(errParams, + "Cannot add index '%s' in namespace '%s'. Scalar (non-array and non-composite) index can not have multiple JSON-paths. " + "Use array index instead", + indexDef.name_, getNameF()); + } + if (indexDef.jsonPaths_.empty()) { + throw Error(errParams, "Cannot add index '%s' in namespace '%s'. JSON paths array can not be empty", indexDef.name_, getNameF()); + } + for (const auto& jp : indexDef.jsonPaths_) { + if (jp.empty()) { + throw Error(errParams, "Cannot add index '%s' in namespace '%s'. JSON path can not be empty", indexDef.name_, getNameF()); } } } @@ -802,6 +832,20 @@ void NamespaceImpl::verifyUpdateIndex(const IndexDef& indexDef) const { if (indexDef.opts_.IsPK() && isStore(indexDef.Type())) { throw Error(errParams, "Cannot add index '%s' in namespace '%s'. PK field can't have '-' type", indexDef.name_, name_); } + if (indexDef.jsonPaths_.size() > 1 && !IsComposite(indexDef.Type()) && !indexDef.opts_.IsArray()) { + throw Error( + errParams, + "Cannot update index '%s' in namespace '%s'. Scalar (non-array and non-composite) index can not have multiple JSON-paths", + indexDef.name_, name_); + } + if (indexDef.jsonPaths_.empty()) { + throw Error(errParams, "Cannot update index '%s' in namespace '%s'. JSON paths array can not be empty", indexDef.name_, name_); + } + for (const auto& jp : indexDef.jsonPaths_) { + if (jp.empty()) { + throw Error(errParams, "Cannot update index '%s' in namespace '%s'. JSON path can not be empty", indexDef.name_, name_); + } + } if (IsComposite(indexDef.Type())) { verifyUpdateCompositeIndex(indexDef); @@ -810,14 +854,11 @@ void NamespaceImpl::verifyUpdateIndex(const IndexDef& indexDef) const { const auto newIndex = std::unique_ptr(Index::New(indexDef, PayloadType(), FieldsSet())); if (indexDef.opts_.IsSparse()) { - const auto newSparseIndex = std::unique_ptr(Index::New(indexDef, payloadType_, {})); if (indexDef.jsonPaths_.size() != 1) { - throw Error(errParams, "Sparse index must have excatly 1 JSON-path, but %d paths found for '%s'", indexDef.jsonPaths_.size(), + throw Error(errParams, "Sparse index must have exactly 1 JSON-path, but %d paths found for '%s'", indexDef.jsonPaths_.size(), indexDef.name_); } - if (indexDef.jsonPaths_[0].empty()) { - throw Error(errParams, "JSON path for sparse index can not be empty ('%s')", indexDef.name_); - } + const auto newSparseIndex = std::unique_ptr(Index::New(indexDef, payloadType_, {})); } else { FieldsSet changedFields{idxNameIt->second}; PayloadType newPlType = payloadType_; @@ -969,20 +1010,17 @@ void NamespaceImpl::addIndex(const IndexDef& indexDef, bool disableTmVersionInc, } const int idxNo = payloadType_->NumFields(); - if (idxNo >= maxIndexes) { + if (idxNo >= kMaxIndexes) { throw Error(errConflict, "Cannot add index '%s.%s'. Too many non-composite indexes. %d non-composite indexes are allowed only", - name_, indexName, maxIndexes - 1); + name_, indexName, kMaxIndexes - 1); } const JsonPaths& jsonPaths = indexDef.jsonPaths_; RollBack_addIndex rollbacker{*this}; if (indexDef.opts_.IsSparse()) { if (jsonPaths.size() != 1) { - throw Error(errParams, "Sparse index must have excatly 1 JSON-path, but %d paths found for '%s':'%s'", jsonPaths.size(), name_, + throw Error(errParams, "Sparse index must have exactly 1 JSON-path, but %d paths found for '%s':'%s'", jsonPaths.size(), name_, indexDef.name_); } - if (jsonPaths[0].empty()) { - throw Error(errParams, "JSON path for sparse index('%s':'%s') can not be empty)", name_, indexDef.name_); - } FieldsSet fields; fields.push_back(jsonPaths[0]); TagsPath tagsPath = tagsMatcher_.path2tag(jsonPaths[0], true); @@ -1213,27 +1251,25 @@ bool NamespaceImpl::getIndexByNameOrJsonPath(std::string_view name, int& index) if (idx > 0) { index = idx; return true; - } else { - return false; } + return false; +} + +bool NamespaceImpl::getSparseIndexByJsonPath(std::string_view jsonPath, int& index) const { + // FIXME: Try to merge getIndexByNameOrJsonPath and getSparseIndexByJsonPath if it's possible + for (int i = indexes_.firstSparsePos(), end = indexes_.firstSparsePos() + indexes_.sparseIndexesSize(); i < end; ++i) { + if (indexes_[i]->Fields().contains(jsonPath)) { + index = i; + return true; + } + } + return false; } void NamespaceImpl::Insert(Item& item, const RdxContext& ctx) { ModifyItem(item, ModeInsert, ctx); } void NamespaceImpl::Update(Item& item, const RdxContext& ctx) { ModifyItem(item, ModeUpdate, ctx); } -void NamespaceImpl::Update(const Query& query, LocalQueryResults& result, const RdxContext& ctx) { - PerfStatCalculatorMT calc(updatePerfCounter_, enablePerfCounters_); - UpdatesContainer pendedRepl; - - CounterGuardAIR32 cg(cancelCommitCnt_); - auto wlck = dataWLock(ctx); - cg.Reset(); - - doUpdate(query, result, pendedRepl, ctx); - replicate(std::move(pendedRepl), std::move(wlck), true, ctx); -} - void NamespaceImpl::Upsert(Item& item, const RdxContext& ctx) { ModifyItem(item, ModeUpsert, ctx); } void NamespaceImpl::Delete(Item& item, const RdxContext& ctx) { ModifyItem(item, ModeDelete, ctx); } @@ -1339,20 +1375,7 @@ void NamespaceImpl::doTruncate(UpdatesContainer& pendedRepl, const NsContext& ct pendedRepl.emplace_back(UpdateRecord::Type::Truncate, name_, wal_.LastLSN(), repl_.nsVersion, ctx.rdxContext.EmmiterServerId()); } -void NamespaceImpl::Delete(const Query& q, LocalQueryResults& result, const RdxContext& ctx) { - PerfStatCalculatorMT calc(updatePerfCounter_, enablePerfCounters_); - UpdatesContainer pendedRepl; - - CounterGuardAIR32 cg(cancelCommitCnt_); - auto wlck = dataWLock(ctx); - cg.Reset(); - calc.LockHit(); - - doDelete(q, result, pendedRepl, NsContext(ctx)); - replicate(std::move(pendedRepl), std::move(wlck), true, ctx); -} - -void NamespaceImpl::ModifyItem(Item& item, int mode, const RdxContext& ctx) { +void NamespaceImpl::ModifyItem(Item& item, ItemModifyMode mode, const RdxContext& ctx) { PerfStatCalculatorMT calc(updatePerfCounter_, enablePerfCounters_); UpdatesContainer pendedRepl; @@ -1362,7 +1385,7 @@ void NamespaceImpl::ModifyItem(Item& item, int mode, const RdxContext& ctx) { calc.LockHit(); modifyItem(item, mode, pendedRepl, NsContext(ctx)); - replicate(std::move(pendedRepl), std::move(wlck), true, ctx); + replicate(std::move(pendedRepl), std::move(wlck), true, nullptr, ctx); } void NamespaceImpl::Truncate(const RdxContext& ctx) { @@ -1376,7 +1399,7 @@ void NamespaceImpl::Truncate(const RdxContext& ctx) { calc.LockHit(); doTruncate(pendedRepl, ctx); - replicate(std::move(pendedRepl), std::move(wlck), true, ctx); + replicate(std::move(pendedRepl), std::move(wlck), true, nullptr, ctx); } void NamespaceImpl::Refill(std::vector& items, const RdxContext& ctx) { @@ -1422,12 +1445,13 @@ LocalTransaction NamespaceImpl::NewTransaction(const RdxContext& ctx) { return LocalTransaction(name_, payloadType_, tagsMatcher_, pkFields(), schema_, ctx.GetOriginLSN()); } -void NamespaceImpl::CommitTransaction(LocalTransaction& tx, LocalQueryResults& result, const NsContext& ctx) { +void NamespaceImpl::CommitTransaction(LocalTransaction& tx, LocalQueryResults& result, const NsContext& ctx, + QueryStatCalculator& queryStatCalculator) { Locker::WLockT wlck; if (!ctx.isCopiedNsRequest) { PerfStatCalculatorMT calc(updatePerfCounter_, enablePerfCounters_); CounterGuardAIR32 cg(cancelCommitCnt_); - wlck = dataWLock(ctx.rdxContext, true); + wlck = queryStatCalculator.CreateLock(*this, &NamespaceImpl::dataWLock, ctx.rdxContext, true); cg.Reset(); calc.LockHit(); } @@ -1465,7 +1489,7 @@ void NamespaceImpl::CommitTransaction(LocalTransaction& tx, LocalQueryResults& r } case TransactionStep::Type::Query: { LocalQueryResults qr; - qr.AddNamespace(std::shared_ptr{this, [](NamespaceImpl*) {}}, true, ctx.rdxContext); + qr.AddNamespace(this, true); auto& data = std::get(step.data_); if (data.query->type_ == QueryDelete) { doDelete(*data.query, qr, pendedRepl, NsContext(ctx).InTransaction(step.lsn_)); @@ -1504,7 +1528,7 @@ void NamespaceImpl::CommitTransaction(LocalTransaction& tx, LocalQueryResults& r // If commit happens in ns copy, than the copier have to handle replication UpdatesContainer pendedRepl; pendedRepl.emplace_back(UpdateRecord::Type::CommitTx, name_, wal_.LastLSN(), repl_.nsVersion, ctx.rdxContext.EmmiterServerId()); - replicate(std::move(pendedRepl), std::move(wlck), true, ctx); + replicate(std::move(pendedRepl), std::move(wlck), true, queryStatCalculator, ctx); return; } else if (ctx.inSnapshot && ctx.isRequireResync) { replicateAsync( @@ -1513,7 +1537,7 @@ void NamespaceImpl::CommitTransaction(LocalTransaction& tx, LocalQueryResults& r ctx.rdxContext); } if (!ctx.isCopiedNsRequest) { - tryForceFlush(std::move(wlck)); + queryStatCalculator.LogFlushDuration(*this, &NamespaceImpl::tryForceFlush, std::move(wlck)); } } @@ -1667,7 +1691,7 @@ void NamespaceImpl::updateTagsMatcherFromItem(ItemImpl* ritem, const NsContext& } } -void NamespaceImpl::modifyItem(Item& item, int mode, UpdatesContainer& pendedRepl, const NsContext& ctx) { +void NamespaceImpl::modifyItem(Item& item, ItemModifyMode mode, UpdatesContainer& pendedRepl, const NsContext& ctx) { if (mode == ModeDelete) { deleteItem(item, pendedRepl, ctx); } else { @@ -1706,7 +1730,7 @@ void NamespaceImpl::deleteItem(Item& item, UpdatesContainer& pendedRepl, const N } } -void NamespaceImpl::doModifyItem(Item& item, int mode, UpdatesContainer& pendedRepl, const NsContext& ctx, IdType suggestedId) { +void NamespaceImpl::doModifyItem(Item& item, ItemModifyMode mode, UpdatesContainer& pendedRepl, const NsContext& ctx, IdType suggestedId) { // Item to doUpsert assertrx(mode != ModeDelete); const auto oldTmV = tagsMatcher_.version(); @@ -2161,7 +2185,7 @@ IndexDef NamespaceImpl::getIndexDefinition(size_t i) const { NamespaceDef NamespaceImpl::getDefinition() const { auto pt = this->payloadType_; - NamespaceDef nsDef(name_, StorageOpts().Enabled(storage_.IsValid())); + NamespaceDef nsDef(name_, StorageOpts().Enabled(storage_.GetStatusCached().isEnabled)); nsDef.indexes.reserve(indexes_.size()); for (size_t i = 1; i < indexes_.size(); ++i) { nsDef.AddIndex(getIndexDefinition(i)); @@ -2213,13 +2237,13 @@ NamespaceMemStat NamespaceImpl::GetMemStat(const RdxContext& ctx) { ret.Total.cacheSize += istat.idsetCache.totalSize; } - const auto storageStatus = storage_.GetStatus(); + const auto storageStatus = storage_.GetStatusCached(); ret.storageOK = storageStatus.isEnabled && storageStatus.err.ok(); ret.storageEnabled = storageStatus.isEnabled; if (storageStatus.isEnabled) { if (storageStatus.err.ok()) { ret.storageStatus = "OK"sv; - } else if (checkIfEndsWith("No space left on device"sv, storageStatus.err.what(), true)) { + } else if (checkIfEndsWith("No space left on device"sv, storageStatus.err.what())) { ret.storageStatus = "NO SPACE LEFT"sv; } else { ret.storageStatus = storageStatus.err.what(); @@ -2227,7 +2251,7 @@ NamespaceMemStat NamespaceImpl::GetMemStat(const RdxContext& ctx) { } else { ret.storageStatus = "DISABLED"sv; } - ret.storagePath = storage_.Path(); + ret.storagePath = storage_.GetPathCached(); ret.optimizationCompleted = (optimizationState_ == OptimizationCompleted); ret.stringsWaitingToBeDeletedSize = strHolder_->MemStat(); @@ -2378,6 +2402,7 @@ bool NamespaceImpl::loadIndexesFromStorage() { Error err = indexDef.FromJSON(giftStr(indexData)); if (err.ok()) { try { + verifyAddIndex(indexDef, [this]() { return this->name_; }); addIndex(indexDef, false); } catch (const Error& e) { err = e; @@ -2595,7 +2620,7 @@ void NamespaceImpl::SetTagsMatcher(TagsMatcher&& tm, const RdxContext& ctx) { UpdatesContainer pendedRepl; auto wlck = dataWLock(ctx); setTagsMatcher(std::move(tm), pendedRepl, ctx); - replicate(std::move(pendedRepl), std::move(wlck), true, ctx); + replicate(std::move(pendedRepl), std::move(wlck), true, nullptr, ctx); } void NamespaceImpl::LoadFromStorage(unsigned threadsCount, const RdxContext& ctx) { @@ -2656,10 +2681,10 @@ void NamespaceImpl::removeExpiredItems(RdxActivityContext* ctx) { std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count() - index->GetTTLValue(); LocalQueryResults qr; - qr.AddNamespace(std::shared_ptr{this, [](NamespaceImpl*) {}}, true, rdxCtx); + qr.AddNamespace(this, true); doDelete(Query(name_).Where(index->Name(), CondLt, expirationthreshold), qr, pendedRepl, NsContext(rdxCtx)); } - replicate(std::move(pendedRepl), std::move(wlck), true, RdxContext(ctx)); + replicate(std::move(pendedRepl), std::move(wlck), true, nullptr, RdxContext(ctx)); } void NamespaceImpl::removeExpiredStrings(RdxActivityContext* ctx) { @@ -2711,16 +2736,6 @@ void NamespaceImpl::setTagsMatcher(TagsMatcher&& tm, UpdatesContainer& pendedRep saveTagsMatcherToStorage(true); } -StringsHolderPtr NamespaceImpl::StrHolder(bool noLock, const RdxContext& ctx) { - assertrx(noLock); - Locker::RLockT rlck; - if (!noLock) { - rlck = rLock(ctx); - } - StringsHolderPtr ret{strHolder_}; - return ret; -} - void NamespaceImpl::BackgroundRoutine(RdxActivityContext* ctx) { const RdxContext rdxCtx(ctx); const NsContext nsCtx(rdxCtx); @@ -2818,7 +2833,7 @@ void NamespaceImpl::PutMeta(const std::string& key, std::string_view data, const auto wlck = dataWLock(ctx); putMeta(key, data, pendedRepl, ctx); - replicate(std::move(pendedRepl), std::move(wlck), false, ctx); + replicate(std::move(pendedRepl), std::move(wlck), false, nullptr, ctx); } // Put meta data to storage by key @@ -3076,47 +3091,6 @@ void NamespaceImpl::replicateAsync(NamespaceImpl::UpdatesContainer&& recs, const } } -void NamespaceImpl::replicate(UpdateRecord&& rec, Locker::WLockT&& wlck, bool tryForceFlush, const RdxContext& ctx) { - if (!repl_.temporary) { - auto err = clusterizator_->Replicate( - std::move(rec), - [&wlck]() { - assertrx(wlck.isClusterLck()); - wlck.unlock(); - }, - ctx); - if (tryForceFlush) { - storage_.TryForceFlush(); - } - if (!err.ok()) { - throw Error(errUpdateReplication, err.what()); - } - } else if (tryForceFlush && wlck.owns_lock()) { - wlck.unlock(); - storage_.TryForceFlush(); - } -} - -void NamespaceImpl::replicate(UpdatesContainer&& recs, NamespaceImpl::Locker::WLockT&& wlck, bool tryForceFlush, const NsContext& ctx) { - if (!repl_.temporary) { - assertrx(!ctx.isCopiedNsRequest); - auto err = clusterizator_->Replicate( - std::move(recs), - [&wlck]() { - assertrx(wlck.isClusterLck()); - wlck.unlock(); - }, - ctx.rdxContext); - storage_.TryForceFlush(); - if (!err.ok()) { - throw Error(errUpdateReplication, err.what()); - } - } else if (tryForceFlush && wlck.owns_lock()) { - wlck.unlock(); - storage_.TryForceFlush(); - } -} - std::set NamespaceImpl::GetFTIndexes(const RdxContext& ctx) const { auto rlck = rLock(ctx); std::set ret; @@ -3145,6 +3119,7 @@ std::set NamespaceImpl::GetFTIndexes(const RdxContext& ctx) const { case IndexTtl: case IndexRTree: case IndexUuidHash: + case IndexUuidStore: break; } } diff --git a/cpp_src/core/namespace/namespaceimpl.h b/cpp_src/core/namespace/namespaceimpl.h index 7c10e0c57..86f124c62 100644 --- a/cpp_src/core/namespace/namespaceimpl.h +++ b/cpp_src/core/namespace/namespaceimpl.h @@ -56,6 +56,21 @@ class LocalQueryResults; class SnapshotRecord; class Snapshot; struct SnapshotOpts; + +namespace long_actions { +template +struct Logger; + +template +struct QueryEnum2Type; +} // namespace long_actions + +template class> +class QueryStatCalculator; + +template +using QueryStatCalculatorUpdDel = QueryStatCalculator, long_actions::Logger>; + namespace SortExprFuncs { struct DistanceBetweenJoinedIndexesSameNs; } // namespace SortExprFuncs @@ -100,7 +115,7 @@ namespace composite_substitution_helpers { class CompositeSearcher; } -class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter for this class +class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance.Padding) Padding does not matter for this class class RollBack_insertIndex; class RollBack_addIndex; template @@ -109,12 +124,15 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f class RollBack_updateItems; class IndexesCacheCleaner { public: - explicit IndexesCacheCleaner(NamespaceImpl &ns) : ns_{ns} {} + explicit IndexesCacheCleaner(NamespaceImpl &ns) noexcept : ns_{ns} {} IndexesCacheCleaner(const IndexesCacheCleaner &) = delete; IndexesCacheCleaner(IndexesCacheCleaner &&) = delete; IndexesCacheCleaner &operator=(const IndexesCacheCleaner &) = delete; IndexesCacheCleaner &operator=(IndexesCacheCleaner &&) = delete; void Add(SortType s) { + if rx_unlikely (s >= sorts_.size()) { + throw Error(errLogic, "Index sort type overflow: %d. Limit is %d", s, sorts_.size() - 1); + } if (s > 0) { sorts_.set(s); } @@ -123,7 +141,7 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f private: NamespaceImpl &ns_; - std::bitset<64> sorts_; + std::bitset sorts_; }; friend class NsSelecter; @@ -206,22 +224,24 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f using UpdatesContainer = h_vector; enum OptimizationState : int { NotOptimized, OptimizedPartially, OptimizationCompleted }; - typedef shared_ptr Ptr; + using Ptr = intrusive_ptr; using Mutex = MarkedMutex; class Locker { public: class NsWLock { public: + using MutexType = Mutex; + NsWLock() = default; - NsWLock(Mutex &mtx, const RdxContext &ctx, bool isCL) : impl_(mtx, &ctx), isClusterLck_(isCL) {} + NsWLock(MutexType &mtx, const RdxContext &ctx, bool isCL) : impl_(mtx, &ctx), isClusterLck_(isCL) {} void lock() { impl_.lock(); } void unlock() { impl_.unlock(); } bool owns_lock() const { return impl_.owns_lock(); } bool isClusterLck() const noexcept { return isClusterLck_; } private: - contexted_unique_lock impl_; + contexted_unique_lock impl_; bool isClusterLck_ = false; }; typedef contexted_shared_lock RLockT; @@ -313,13 +333,11 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f void SetSchema(std::string_view schema, const RdxContext &ctx); std::string GetSchema(int format, const RdxContext &ctx); - void Insert(Item &item, const RdxContext &ctx); - void Update(Item &item, const RdxContext &ctx); - void Update(const Query &query, LocalQueryResults &result, const RdxContext &); + void Insert(Item &item, const RdxContext &); + void Update(Item &item, const RdxContext &); void Upsert(Item &item, const RdxContext &); void Delete(Item &item, const RdxContext &); - void Delete(const Query &query, LocalQueryResults &result, const RdxContext &); - void ModifyItem(Item &item, int mode, const RdxContext &ctx); + void ModifyItem(Item &item, ItemModifyMode mode, const RdxContext &); void Truncate(const RdxContext &); void Refill(std::vector &, const RdxContext &); @@ -335,7 +353,8 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f void CloseStorage(const RdxContext &); LocalTransaction NewTransaction(const RdxContext &ctx); - void CommitTransaction(LocalTransaction &tx, LocalQueryResults &result, const NsContext &ctx); + void CommitTransaction(LocalTransaction &tx, LocalQueryResults &result, const NsContext &ctx, + QueryStatCalculator &queryStatCalculator); Item NewItem(const RdxContext &ctx); void ToPool(ItemImpl *item); @@ -349,6 +368,7 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f int getIndexByNameOrJsonPath(std::string_view name) const; bool getIndexByName(std::string_view name, int &index) const; bool getIndexByNameOrJsonPath(std::string_view name, int &index) const; + bool getSparseIndexByJsonPath(std::string_view jsonPath, int &index) const; PayloadType getPayloadType(const RdxContext &ctx) const; void FillResult(LocalQueryResults &result, const IdSet &ids) const; @@ -394,9 +414,9 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f void doUpdate(const Query &query, LocalQueryResults &result, UpdatesContainer &pendedRepl, const NsContext &); void doDelete(const Query &query, LocalQueryResults &result, UpdatesContainer &pendedRepl, const NsContext &); void doUpsert(ItemImpl *ritem, IdType id, bool doUpdate); - void modifyItem(Item &item, int mode, UpdatesContainer &pendedRepl, const NsContext &ctx); + void modifyItem(Item &item, ItemModifyMode mode, UpdatesContainer &pendedRepl, const NsContext &ctx); void deleteItem(Item &item, UpdatesContainer &pendedRepl, const NsContext &ctx); - void doModifyItem(Item &item, int mode, UpdatesContainer &pendedRepl, const NsContext &ctx, IdType suggestedId = -1); + void doModifyItem(Item &item, ItemModifyMode mode, UpdatesContainer &pendedRepl, const NsContext &ctx, IdType suggestedId = -1); void updateTagsMatcherFromItem(ItemImpl *ritem, const NsContext &ctx); template [[nodiscard]] RollBack_updateItems updateItems(const PayloadType &oldPlType, const FieldsSet &changedFields, @@ -413,6 +433,8 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f template void createFieldsSet(const std::string &idxName, IndexType type, const PathsT &paths, FieldsSet &fields); void verifyCompositeIndex(const IndexDef &indexDef) const; + template + void verifyAddIndex(const IndexDef &indexDef, GetNameF &&) const; void verifyUpdateIndex(const IndexDef &indexDef) const; void verifyUpdateCompositeIndex(const IndexDef &indexDef) const; void updateIndex(const IndexDef &indexDef, bool disableTmVersionInc); @@ -497,7 +519,7 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f Locker locker_; std::shared_ptr schema_; - StringsHolderPtr StrHolder(bool noLock, const RdxContext &); + StringsHolderPtr strHolder() const noexcept { return strHolder_; } std::set GetFTIndexes(const RdxContext &) const; size_t ItemsCount() const noexcept { return items_.size() - free_.size(); } const NamespaceConfigData &Config() const noexcept { return config_; } @@ -512,8 +534,35 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f void processWalRecord(WALRecord &&wrec, const NsContext &ctx, lsn_t itemLsn = lsn_t(), Item *item = nullptr); void replicateAsync(cluster::UpdateRecord &&rec, const RdxContext &ctx); void replicateAsync(UpdatesContainer &&recs, const RdxContext &ctx); - void replicate(cluster::UpdateRecord &&rec, Locker::WLockT &&wlck, bool tryForceFlush, const RdxContext &ctx); - void replicate(UpdatesContainer &&recs, Locker::WLockT &&wlck, bool tryForceFlush, const NsContext &ctx); + template + void replicate(UpdatesContainer &&recs, NamespaceImpl::Locker::WLockT &&wlck, bool tryForceFlush, + QueryStatsCalculatorT &&statCalculator, const NsContext &ctx) { + if (!repl_.temporary) { + assertrx(!ctx.isCopiedNsRequest); + auto err = clusterizator_->Replicate( + std::move(recs), + [&wlck]() { + assertrx(wlck.isClusterLck()); + wlck.unlock(); + }, + ctx.rdxContext); + if constexpr (std::is_same_v) { + storage_.TryForceFlush(); + } else { + statCalculator.LogFlushDuration(storage_, &AsyncStorage::TryForceFlush); + } + if (!err.ok()) { + throw Error(errUpdateReplication, err.what()); + } + } else if (tryForceFlush && wlck.owns_lock()) { + wlck.unlock(); + if constexpr (std::is_same_v) { + storage_.TryForceFlush(); + } else { + statCalculator.LogFlushDuration(storage_, &AsyncStorage::TryForceFlush); + } + } + } void setTemporary() noexcept { repl_.temporary = true; } @@ -557,5 +606,4 @@ class NamespaceImpl { // NOLINT(*performance.Padding) Padding does not matter f cluster::INsDataReplicator *clusterizator_; std::atomic dbDestroyed_{false}; }; - } // namespace reindexer diff --git a/cpp_src/core/namespace/snapshot/snapshothandler.cc b/cpp_src/core/namespace/snapshot/snapshothandler.cc index 4196f54f7..ddf1312aa 100644 --- a/cpp_src/core/namespace/snapshot/snapshothandler.cc +++ b/cpp_src/core/namespace/snapshot/snapshothandler.cc @@ -175,11 +175,11 @@ Error SnapshotHandler::applyRealRecord(lsn_t lsn, const SnapshotRecord& snRec, c q.FromSQL(rec.data); switch (q.type_) { case QueryDelete: - result.AddNamespace(std::shared_ptr{&ns_, [](NamespaceImpl*) {}}, true, ctx.rdxContext); + result.AddNamespace(&ns_, true); ns_.doDelete(q, result, pendedRepl, ctx); break; case QueryUpdate: - result.AddNamespace(std::shared_ptr{&ns_, [](NamespaceImpl*) {}}, true, ctx.rdxContext); + result.AddNamespace(&ns_, true); ns_.doUpdate(q, result, pendedRepl, ctx); break; case QueryTruncate: diff --git a/cpp_src/core/nsselecter/aggregator.cc b/cpp_src/core/nsselecter/aggregator.cc index eff26cab3..81194c7d3 100644 --- a/cpp_src/core/nsselecter/aggregator.cc +++ b/cpp_src/core/nsselecter/aggregator.cc @@ -8,10 +8,10 @@ namespace reindexer { template static void moveFrames(It &begin, It &end, size_t size, size_t offset, size_t limit) { - if (offset > 0) { + if (offset > QueryEntry::kDefaultOffset) { std::advance(begin, offset); } - if (limit != UINT_MAX && offset + limit < size) { + if (limit != QueryEntry::kDefaultLimit && offset + limit < size) { end = begin; std::advance(end, limit); } diff --git a/cpp_src/core/nsselecter/aggregator.h b/cpp_src/core/nsselecter/aggregator.h index bee9c5244..e952d3f4e 100644 --- a/cpp_src/core/nsselecter/aggregator.h +++ b/cpp_src/core/nsselecter/aggregator.h @@ -3,6 +3,7 @@ #include #include #include "core/index/payload_map.h" +#include "core/query/queryentry.h" #include "estl/one_of.h" #include "vendor/cpp-btree/btree_map.h" @@ -19,7 +20,8 @@ class Aggregator { }; Aggregator(const PayloadType &, const FieldsSet &, AggType aggType, const h_vector &names, - const h_vector &sort = {}, size_t limit = UINT_MAX, size_t offset = 0, bool compositeIndexFields = false); + const h_vector &sort = {}, size_t limit = QueryEntry::kDefaultLimit, + size_t offset = QueryEntry::kDefaultOffset, bool compositeIndexFields = false); Aggregator(); Aggregator(Aggregator &&) noexcept; ~Aggregator(); @@ -52,8 +54,8 @@ class Aggregator { int hitCount_ = 0; AggType aggType_; h_vector names_; - size_t limit_ = UINT_MAX; - size_t offset_ = 0; + size_t limit_ = QueryEntry::kDefaultLimit; + size_t offset_ = QueryEntry::kDefaultOffset; std::unique_ptr facets_; diff --git a/cpp_src/core/nsselecter/btreeindexiterator.h b/cpp_src/core/nsselecter/btreeindexiterator.h index b859f4d55..a805955a8 100644 --- a/cpp_src/core/nsselecter/btreeindexiterator.h +++ b/cpp_src/core/nsselecter/btreeindexiterator.h @@ -13,7 +13,7 @@ class BtreeIndexIterator final : public IndexIterator { explicit BtreeIndexIterator(const T& idxMap) : idxMap_(idxMap), first_(idxMap.begin()), last_(idxMap.end()) {} BtreeIndexIterator(const T& idxMap, const typename T::iterator& first, const typename T::iterator& last) : idxMap_(idxMap), first_(first), last_(last) {} - ~BtreeIndexIterator() override final {} + ~BtreeIndexIterator() override final = default; void Start(bool reverse) final override { if (reverse) { @@ -26,7 +26,7 @@ class BtreeIndexIterator final : public IndexIterator { impl_->shiftIdsetToBegin(); } - bool Next() final override { + bool Next() noexcept final override { assertrx(impl_); if (impl_->isOver()) { return impl_->finishIteration(); @@ -41,12 +41,12 @@ class BtreeIndexIterator final : public IndexIterator { return true; } - void ExcludeLastSet() override { + void ExcludeLastSet() noexcept override { assertrx(impl_); impl_->shiftToNextIdset(); } - IdType Value() const override final { + IdType Value() const noexcept override final { assertrx(impl_); return impl_->getValue(); } @@ -54,7 +54,7 @@ class BtreeIndexIterator final : public IndexIterator { if (cachedIters_ != std::numeric_limits::max()) return cachedIters_; return BtreeIndexForwardIteratorImpl(idxMap_, first_, last_).getMaxIterations(limitIters); } - void SetMaxIterations(size_t iters) final { cachedIters_ = iters; } + void SetMaxIterations(size_t iters) noexcept final { cachedIters_ = iters; } private: std::shared_ptr> impl_; diff --git a/cpp_src/core/nsselecter/btreeindexiteratorimpl.h b/cpp_src/core/nsselecter/btreeindexiteratorimpl.h index c303fb66e..e2ce9c82b 100644 --- a/cpp_src/core/nsselecter/btreeindexiteratorimpl.h +++ b/cpp_src/core/nsselecter/btreeindexiteratorimpl.h @@ -15,14 +15,14 @@ class BtreeIndexIteratorImpl { public: enum class IdsetType { Plain = 0, Btree }; - explicit BtreeIndexIteratorImpl(const T& idxMap) : idxMap_(idxMap){}; - virtual ~BtreeIndexIteratorImpl(){}; + explicit BtreeIndexIteratorImpl(const T& idxMap) : idxMap_(idxMap) {} + virtual ~BtreeIndexIteratorImpl() = default; - virtual bool isOver() const = 0; - virtual void shiftToBegin() = 0; - virtual void next() = 0; + virtual bool isOver() const noexcept = 0; + virtual void shiftToBegin() noexcept = 0; + virtual void next() noexcept = 0; - bool shiftToNextIdset() { + bool shiftToNextIdset() noexcept { if (isOver()) return false; for (next(); !isOver() && getCurrentIdsetSize() == 0;) { next(); @@ -54,7 +54,7 @@ class BtreeIndexIteratorImpl { break; } } - bool isIdsetOver() const { + bool isIdsetOver() const noexcept { switch (currentIdsetType_) { case IdsetType::Btree: return isBtreeIdsetOver(); @@ -64,7 +64,7 @@ class BtreeIndexIteratorImpl { std::abort(); } } - void updateCurrentValue() { + void updateCurrentValue() noexcept { switch (currentIdsetType_) { case IdsetType::Btree: currVal_ = getBtreeIdsetCurrentValue(); @@ -76,13 +76,13 @@ class BtreeIndexIteratorImpl { std::abort(); } } - bool finishIteration() { + bool finishIteration() noexcept { currVal_ = INT_MAX; return false; } template - void detectCurrentIdsetType(const TIdSet& idset) { + void detectCurrentIdsetType(const TIdSet& idset) noexcept { if (std::is_same() && !idset.IsCommited()) { currentIdsetType_ = IdsetType::Btree; } else { @@ -90,8 +90,8 @@ class BtreeIndexIteratorImpl { } } - size_t getSize() const { return idxMap_.size(); } - size_t getCurrentIdsetSize() const { + size_t getSize() const noexcept { return idxMap_.size(); } + size_t getCurrentIdsetSize() const noexcept { switch (currentIdsetType_) { case IdsetType::Btree: return getBtreeIdsetSize(); @@ -138,9 +138,9 @@ class BtreeIndexForwardIteratorImpl : public BtreeIndexIteratorImpl { this->idxMapItEnd_ = last; this->idxMapIt_ = this->idxMapItBegin_; } - ~BtreeIndexForwardIteratorImpl() override {} + ~BtreeIndexForwardIteratorImpl() override = default; - void shiftToBegin() override { + void shiftToBegin() noexcept override { this->idxMapIt_ = this->idxMapItBegin_; if (this->getSize() > 0) { this->detectCurrentIdsetType(this->idxMapIt_->second.Unsorted()); @@ -148,33 +148,39 @@ class BtreeIndexForwardIteratorImpl : public BtreeIndexIteratorImpl { } } - void next() override { + void next() noexcept override { ++this->idxMapIt_; if (!isOver()) { this->detectCurrentIdsetType(this->idxMapIt_->second.Unsorted()); } } - void shiftPlainIdsetToNext() override { + void shiftPlainIdsetToNext() noexcept override { const auto& idset = this->idxMapIt_->second.Unsorted(); for (; it_ != idset.end() && *it_ <= this->currVal_; ++it_) { } } - void shiftBtreeIdsetToNext() override { + void shiftBtreeIdsetToNext() noexcept override { const IdSet& sortedIdset = static_cast(this->idxMapIt_->second.Unsorted()); for (; itset_ != sortedIdset.set_->end() && *itset_ <= this->currVal_; ++itset_) { } } - bool isOver() const override { return this->idxMapIt_ == this->idxMapItEnd_; } - void shiftPlainIdsetToBegin() override { it_ = this->idxMapIt_->second.Unsorted().begin(); } - void shiftBtreeIdsetToBegin() override { itset_ = static_cast(this->idxMapIt_->second.Unsorted()).set_->begin(); } - bool isPlainIdsetOver() const override { return it_ == this->idxMapIt_->second.Unsorted().end(); } - bool isBtreeIdsetOver() const override { return itset_ == static_cast(this->idxMapIt_->second.Unsorted()).set_->end(); } - IdType getPlainIdsetCurrentValue() const override { return *it_; } - IdType getBtreeIdsetCurrentValue() const override { return *itset_; } - size_t getPlainIdsetSize() const override { return this->idxMapIt_->second.Unsorted().size(); } - size_t getBtreeIdsetSize() const override { return static_cast(this->idxMapIt_->second.Unsorted()).set_->size(); } + bool isOver() const noexcept override { return this->idxMapIt_ == this->idxMapItEnd_; } + void shiftPlainIdsetToBegin() noexcept override { it_ = this->idxMapIt_->second.Unsorted().begin(); } + void shiftBtreeIdsetToBegin() noexcept override { + itset_ = static_cast(this->idxMapIt_->second.Unsorted()).set_->begin(); + } + bool isPlainIdsetOver() const noexcept override { return it_ == this->idxMapIt_->second.Unsorted().end(); } + bool isBtreeIdsetOver() const noexcept override { + return itset_ == static_cast(this->idxMapIt_->second.Unsorted()).set_->end(); + } + IdType getPlainIdsetCurrentValue() const noexcept override { return *it_; } + IdType getBtreeIdsetCurrentValue() const noexcept override { return *itset_; } + size_t getPlainIdsetSize() const noexcept override { return this->idxMapIt_->second.Unsorted().size(); } + size_t getBtreeIdsetSize() const noexcept override { + return static_cast(this->idxMapIt_->second.Unsorted()).set_->size(); + } size_t getMaxIterations(size_t limitIters) noexcept { size_t cnt = 0; for (auto it = idxMapItBegin_; cnt < limitIters && it != idxMapItEnd_; ++it) { @@ -219,9 +225,9 @@ class BtreeIndexReverseIteratorImpl : public BtreeIndexIteratorImpl { idxMapRit_ = idxMapRitBegin_; } - ~BtreeIndexReverseIteratorImpl() override {} + ~BtreeIndexReverseIteratorImpl() override = default; - void shiftToBegin() override { + void shiftToBegin() noexcept override { this->idxMapRit_ = this->idxMapRitBegin_; if (this->getSize() > 0) { this->detectCurrentIdsetType(this->idxMapRit_->second.Unsorted()); @@ -229,36 +235,40 @@ class BtreeIndexReverseIteratorImpl : public BtreeIndexIteratorImpl { } } - void shiftPlainIdsetToNext() override { + void shiftPlainIdsetToNext() noexcept override { const auto& idset = this->idxMapRit_->second.Unsorted(); for (; rit_ != idset.rend() && *rit_ >= this->currVal_; ++rit_) { } } - void shiftBtreeIdsetToNext() override { + void shiftBtreeIdsetToNext() noexcept override { const IdSet& sortedIdset = static_cast(this->idxMapRit_->second.Unsorted()); for (; ritset_ != sortedIdset.set_->rend() && *ritset_ >= this->currVal_; ++ritset_) { } } - void next() override { + void next() noexcept override { ++this->idxMapRit_; if (!isOver()) { this->detectCurrentIdsetType(this->idxMapRit_->second.Unsorted()); } } - bool isOver() const override { return idxMapRit_ == idxMapRitEnd_; } - void shiftPlainIdsetToBegin() override { rit_ = this->idxMapRit_->second.Unsorted().rbegin(); } - void shiftBtreeIdsetToBegin() override { ritset_ = static_cast(this->idxMapRit_->second.Unsorted()).set_->rbegin(); } - bool isPlainIdsetOver() const override { return rit_ == this->idxMapRit_->second.Unsorted().rend(); } - bool isBtreeIdsetOver() const override { + bool isOver() const noexcept override { return idxMapRit_ == idxMapRitEnd_; } + void shiftPlainIdsetToBegin() noexcept override { rit_ = this->idxMapRit_->second.Unsorted().rbegin(); } + void shiftBtreeIdsetToBegin() noexcept override { + ritset_ = static_cast(this->idxMapRit_->second.Unsorted()).set_->rbegin(); + } + bool isPlainIdsetOver() const noexcept override { return rit_ == this->idxMapRit_->second.Unsorted().rend(); } + bool isBtreeIdsetOver() const noexcept override { return ritset_ == static_cast(this->idxMapRit_->second.Unsorted()).set_->rend(); } - IdType getPlainIdsetCurrentValue() const override { return *rit_; } - IdType getBtreeIdsetCurrentValue() const override { return *ritset_; } - size_t getPlainIdsetSize() const override { return this->idxMapRit_->second.Unsorted().size(); } - size_t getBtreeIdsetSize() const override { return static_cast(this->idxMapRit_->second.Unsorted()).set_->size(); } + IdType getPlainIdsetCurrentValue() const noexcept override { return *rit_; } + IdType getBtreeIdsetCurrentValue() const noexcept override { return *ritset_; } + size_t getPlainIdsetSize() const noexcept override { return this->idxMapRit_->second.Unsorted().size(); } + size_t getBtreeIdsetSize() const noexcept override { + return static_cast(this->idxMapRit_->second.Unsorted()).set_->size(); + } private: union { diff --git a/cpp_src/core/nsselecter/explaincalc.cc b/cpp_src/core/nsselecter/explaincalc.cc index 71b74fdf4..c7d9506de 100644 --- a/cpp_src/core/nsselecter/explaincalc.cc +++ b/cpp_src/core/nsselecter/explaincalc.cc @@ -1,9 +1,8 @@ #include "explaincalc.h" + #include -#include "core/cbinding/reindexer_ctypes.h" + #include "core/cjson/jsonbuilder.h" -#include "core/namespace/namespaceimpl.h" -#include "core/query/sql/sqlencoder.h" #include "nsselecter.h" #include "tools/logger.h" @@ -36,9 +35,9 @@ void ExplainCalc::LogDump(int logLevel) { if (jselectors_) { for (auto &js : *jselectors_) { if (js.Type() == JoinType::LeftJoin || js.Type() == JoinType::Merge) { - logPrintf(LogInfo, "%s %s: called %d", SQLEncoder::JoinTypeName(js.Type()), js.RightNsName(), js.Called()); + logPrintf(LogInfo, "%s %s: called %d", JoinTypeName(js.Type()), js.RightNsName(), js.Called()); } else { - logPrintf(LogInfo, "%s %s: called %d, matched %d", SQLEncoder::JoinTypeName(js.Type()), js.RightNsName(), js.Called(), + logPrintf(LogInfo, "%s %s: called %d, matched %d", JoinTypeName(js.Type()), js.RightNsName(), js.Called(), js.Matched()); } } @@ -46,7 +45,7 @@ void ExplainCalc::LogDump(int logLevel) { } } -static const char *joinTypeName(JoinType type) { +constexpr inline const char *joinTypeName(JoinType type) noexcept { switch (type) { case JoinType::InnerJoin: return "inner_join "; @@ -61,7 +60,7 @@ static const char *joinTypeName(JoinType type) { } } -static const char *opName(OpType op, bool first = true) { +constexpr inline const char *opName(OpType op, bool first = true) { switch (op) { case OpAnd: return first ? "" : "and "; @@ -70,17 +69,32 @@ static const char *opName(OpType op, bool first = true) { case OpNot: return "not "; default: - abort(); + throw Error(errLogic, "Unexpected op type %d", int(op)); + } +} + +constexpr std::string_view fieldKind(IteratorFieldKind fk) { + using namespace std::string_view_literals; + switch (fk) { + case IteratorFieldKind::NonIndexed: + return "non-indexed"sv; + case IteratorFieldKind::Indexed: + return "indexed"sv; + case IteratorFieldKind::None: + return ""sv; + default: + throw Error(errLogic, "Unexpected field type %d", int(fk)); } } static std::string addToJSON(JsonBuilder &builder, const JoinedSelector &js, OpType op = OpAnd) { + using namespace std::string_view_literals; auto jsonSel = builder.Object(); std::string name{joinTypeName(js.Type()) + js.RightNsName()}; - jsonSel.Put("field", opName(op) + name); - jsonSel.Put("matched", js.Matched()); - jsonSel.Put("selects_count", js.Called()); - jsonSel.Put("join_select_total", ExplainCalc::To_us(js.PreResult()->selectTime)); + jsonSel.Put("field"sv, opName(op) + name); + jsonSel.Put("matched"sv, js.Matched()); + jsonSel.Put("selects_count"sv, js.Called()); + jsonSel.Put("join_select_total"sv, ExplainCalc::To_us(js.PreResult()->selectTime)); switch (js.Type()) { case JoinType::InnerJoin: case JoinType::OrInnerJoin: @@ -88,25 +102,25 @@ static std::string addToJSON(JsonBuilder &builder, const JoinedSelector &js, OpT assertrx(js.PreResult()); switch (js.PreResult()->dataMode) { case JoinPreResult::ModeValues: - jsonSel.Put("method", "preselected_values"); - jsonSel.Put("keys", js.PreResult()->values.size()); + jsonSel.Put("method"sv, "preselected_values"sv); + jsonSel.Put("keys"sv, js.PreResult()->values.size()); break; case JoinPreResult::ModeIdSet: - jsonSel.Put("method", "preselected_rows"); - jsonSel.Put("keys", js.PreResult()->ids.size()); + jsonSel.Put("method"sv, "preselected_rows"sv); + jsonSel.Put("keys"sv, js.PreResult()->ids.size()); break; case JoinPreResult::ModeIterators: - jsonSel.Put("method", "no_preselect"); - jsonSel.Put("keys", js.PreResult()->iterators.Size()); + jsonSel.Put("method"sv, "no_preselect"sv); + jsonSel.Put("keys"sv, js.PreResult()->iterators.Size()); break; default: break; } if (!js.PreResult()->explainPreSelect.empty()) { - jsonSel.Raw("explain_preselect", js.PreResult()->explainPreSelect); + jsonSel.Raw("explain_preselect"sv, js.PreResult()->explainPreSelect); } if (!js.PreResult()->explainOneSelect.empty()) { - jsonSel.Raw("explain_select", js.PreResult()->explainOneSelect); + jsonSel.Raw("explain_select"sv, js.PreResult()->explainOneSelect); } break; case JoinType::Merge: @@ -115,31 +129,101 @@ static std::string addToJSON(JsonBuilder &builder, const JoinedSelector &js, OpT return name; } +static void addToJSON(JsonBuilder &builder, const ConditionInjection &injCond) { + auto jsonSel = builder.Object(); + using namespace std::string_view_literals; + using namespace std::string_literals; + + jsonSel.Put("condition"sv, injCond.initCond); + jsonSel.Put("total_time_us"sv, ExplainCalc::To_us(injCond.totalTime_)); + jsonSel.Put("success"sv, injCond.succeed); + if (!injCond.succeed) { + if (injCond.reason.empty()) { + if (injCond.orChainPart_) { + jsonSel.Put("reason"sv, "Skipped as Or-chain part."sv); + } else { + jsonSel.Put("reason"sv, "Unknown"sv); + } + } else { + std::string reason{injCond.reason}; + if (injCond.orChainPart_) { + reason += " Or-chain part."; + } + jsonSel.Put("reason"sv, reason); + } + } + + if (!injCond.explain.empty()) { + jsonSel.Raw("explain_select"sv, injCond.explain); + } + if (injCond.aggType != AggType::AggUnknown) { + jsonSel.Put("agg_type"sv, AggTypeToStr(injCond.aggType)); + } + jsonSel.Put("values_count"sv, injCond.valuesCount); + jsonSel.Put("new_condition"sv, injCond.newCond); +} + +static std::string addToJSON(JsonBuilder &builder, const JoinOnInjection &injCond) { + auto jsonSel = builder.Object(); + std::string name{injCond.rightNsName}; + using namespace std::string_view_literals; + + jsonSel.Put("namespace"sv, injCond.rightNsName); + jsonSel.Put("on_condition"sv, injCond.joinCond); + jsonSel.Put("type"sv, injCond.type == JoinOnInjection::ByValue ? "by_value"sv : "select"sv); + jsonSel.Put("total_time_us"sv, ExplainCalc::To_us(injCond.totalTime_)); + jsonSel.Put("success"sv, injCond.succeed); + if (!injCond.reason.empty()) { + jsonSel.Put("reason"sv, injCond.reason); + } + jsonSel.Put("injected_condition"sv, injCond.injectedCond.Slice()); + if (!injCond.conditions.empty()) { + auto jsonCondInjections = jsonSel.Array("conditions"sv); + for (const auto &cond : injCond.conditions) { + addToJSON(jsonCondInjections, cond); + } + } + + return name; +} + std::string ExplainCalc::GetJSON() { + using namespace std::string_view_literals; WrSerializer ser; { JsonBuilder json(ser); if (enabled_) { - json.Put("total_us", To_us(total_)); - json.Put("prepare_us", To_us(prepare_)); - json.Put("indexes_us", To_us(select_)); - json.Put("postprocess_us", To_us(postprocess_)); - json.Put("loop_us", To_us(loop_)); - json.Put("general_sort_us", To_us(sort_)); + json.Put("total_us"sv, To_us(total_)); + json.Put("preselect_us"sv, To_us(preselect_)); + json.Put("prepare_us"sv, To_us(prepare_)); + json.Put("indexes_us"sv, To_us(select_)); + json.Put("postprocess_us"sv, To_us(postprocess_)); + json.Put("loop_us"sv, To_us(loop_)); + json.Put("general_sort_us"sv, To_us(sort_)); } - json.Put("sort_index", sortIndex_); - json.Put("sort_by_uncommitted_index", sortOptimization_); + json.Put("sort_index"sv, sortIndex_); + json.Put("sort_by_uncommitted_index"sv, sortOptimization_); - auto jsonSelArr = json.Array("selectors"); + { + auto jsonSelArr = json.Array("selectors"sv); - if (selectors_) { - selectors_->ExplainJSON(iters_, jsonSelArr, jselectors_); + if (selectors_) { + selectors_->ExplainJSON(iters_, jsonSelArr, jselectors_); + } + + if (jselectors_) { + // adding explain for LeftJoin-s and Merge subqueries + for (const JoinedSelector &js : *jselectors_) { + if (js.Type() == JoinType::InnerJoin || js.Type() == JoinType::OrInnerJoin) continue; + addToJSON(jsonSelArr, js); + } + } } - if (jselectors_) { - for (JoinedSelector &js : *jselectors_) { - if (js.Type() == JoinType::InnerJoin || js.Type() == JoinType::OrInnerJoin) continue; - addToJSON(jsonSelArr, js); + if (onInjections_ && !onInjections_->empty()) { + auto jsonOnInjections = json.Array("on_conditions_injections"sv); + for (const JoinOnInjection &injCond : *onInjections_) { + addToJSON(jsonOnInjections, injCond); } } } @@ -150,6 +234,8 @@ std::string ExplainCalc::GetJSON() { std::string SelectIteratorContainer::explainJSON(const_iterator begin, const_iterator end, int iters, JsonBuilder &builder, const JoinedSelectors *jselectors) { using namespace std::string_literals; + using namespace std::string_view_literals; + std::stringstream name; name << '('; for (const_iterator it = begin; it != end; ++it) { @@ -157,26 +243,29 @@ std::string SelectIteratorContainer::explainJSON(const_iterator begin, const_ite it->InvokeAppropriate( [&](const SelectIteratorsBracket &) { auto jsonSel = builder.Object(); - auto jsonSelArr = jsonSel.Array("selectors"); + auto jsonSelArr = jsonSel.Array("selectors"sv); const std::string brName{explainJSON(it.cbegin(), it.cend(), iters, jsonSelArr, jselectors)}; jsonSelArr.End(); - jsonSel.Put("field", opName(it->operation) + brName); + jsonSel.Put("field"sv, opName(it->operation) + brName); name << opName(it->operation, it == begin) << brName; }, [&](const SelectIterator &siter) { auto jsonSel = builder.Object(); - const bool isScanIterator{std::string_view(siter.name) == "-scan"}; + const bool isScanIterator{std::string_view(siter.name) == "-scan"sv}; if (!isScanIterator) { - jsonSel.Put("keys", siter.size()); - jsonSel.Put("comparators", siter.comparators_.size()); - jsonSel.Put("cost", siter.Cost(iters)); + jsonSel.Put("keys"sv, siter.size()); + jsonSel.Put("comparators"sv, siter.comparators_.size()); + jsonSel.Put("cost"sv, siter.Cost(iters)); } else { - jsonSel.Put("items", siter.GetMaxIterations(iters)); + jsonSel.Put("items"sv, siter.GetMaxIterations(iters)); } - jsonSel.Put("field", opName(it->operation) + siter.name); - jsonSel.Put("matched", siter.GetMatchedCount()); - jsonSel.Put("method", isScanIterator || siter.comparators_.size() ? "scan" : "index"); - jsonSel.Put("type", siter.TypeName()); + jsonSel.Put("field"sv, opName(it->operation) + siter.name); + if (siter.fieldKind != IteratorFieldKind::None) { + jsonSel.Put("field_type"sv, fieldKind(siter.fieldKind)); + } + jsonSel.Put("matched"sv, siter.GetMatchedCount()); + jsonSel.Put("method"sv, isScanIterator || siter.comparators_.size() ? "scan"sv : "index"sv); + jsonSel.Put("type"sv, siter.TypeName()); name << opName(it->operation, it == begin) << siter.name; }, [&](const JoinSelectIterator &jiter) { @@ -186,71 +275,66 @@ std::string SelectIteratorContainer::explainJSON(const_iterator begin, const_ite }, [&](const FieldsComparator &c) { auto jsonSel = builder.Object(); - jsonSel.Put("comparators", 1); - jsonSel.Put("field", opName(it->operation) + c.Name()); - jsonSel.Put("cost", c.Cost(iters)); - jsonSel.Put("method", "scan"); - jsonSel.Put("items", iters); - jsonSel.Put("matched", c.GetMatchedCount()); - jsonSel.Put("type", "TwoFieldsComparison"); + jsonSel.Put("comparators"sv, 1); + jsonSel.Put("field"sv, opName(it->operation) + c.Name()); + jsonSel.Put("cost"sv, c.Cost(iters)); + jsonSel.Put("method"sv, "scan"sv); + jsonSel.Put("items"sv, iters); + jsonSel.Put("matched"sv, c.GetMatchedCount()); + jsonSel.Put("type"sv, "TwoFieldsComparison"sv); name << opName(it->operation, it == begin) << c.Name(); }, [&](const AlwaysFalse &) { auto jsonSkiped = builder.Object(); - jsonSkiped.Put("type", "Skipped"); - jsonSkiped.Put("description", "always "s + (it->operation == OpNot ? "true" : "false")); - name << opName(it->operation == OpNot ? OpAnd : it->operation, it == begin) << "Always" - << (it->operation == OpNot ? "True" : "False"); + jsonSkiped.Put("type"sv, "Skipped"sv); + jsonSkiped.Put("description"sv, "always "s + (it->operation == OpNot ? "true" : "false")); + name << opName(it->operation == OpNot ? OpAnd : it->operation, it == begin) << "Always"sv + << (it->operation == OpNot ? "True"sv : "False"sv); }); } name << ')'; return name.str(); } -ExplainCalc::Duration ExplainCalc::lap() { +ExplainCalc::Duration ExplainCalc::lap() noexcept { auto now = Clock::now(); Duration d = now - last_point_; last_point_ = now; return d; } -int ExplainCalc::To_us(const ExplainCalc::Duration &d) { return duration_cast(d).count(); } +int ExplainCalc::To_us(const ExplainCalc::Duration &d) noexcept { return duration_cast(d).count(); } -void reindexer::ExplainCalc::StartTiming() { +void ExplainCalc::StartTiming() noexcept { if (enabled_) lap(); } -void reindexer::ExplainCalc::StopTiming() { - if (enabled_) total_ = prepare_ + select_ + postprocess_ + loop_; +void ExplainCalc::StopTiming() noexcept { + if (enabled_) total_ = preselect_ + prepare_ + select_ + postprocess_ + loop_; } -void reindexer::ExplainCalc::AddPrepareTime() { +void ExplainCalc::AddPrepareTime() noexcept { if (enabled_) prepare_ += lap(); } -void reindexer::ExplainCalc::AddSelectTime() { +void ExplainCalc::AddSelectTime() noexcept { if (enabled_) select_ += lap(); } -void reindexer::ExplainCalc::AddPostprocessTime() { +void ExplainCalc::AddPostprocessTime() noexcept { if (enabled_) postprocess_ += lap(); } -void reindexer::ExplainCalc::AddLoopTime() { +void ExplainCalc::AddLoopTime() noexcept { if (enabled_) loop_ += lap(); } -void reindexer::ExplainCalc::StartSort() { +void ExplainCalc::StartSort() noexcept { if (enabled_) sort_start_point_ = Clock::now(); } -void reindexer::ExplainCalc::StopSort() { +void ExplainCalc::StopSort() noexcept { if (enabled_) sort_ = Clock::now() - sort_start_point_; } -void reindexer::ExplainCalc::AddIterations(int iters) { iters_ += iters; } -void reindexer::ExplainCalc::PutSortIndex(std::string_view index) { sortIndex_ = index; } -void ExplainCalc::PutSelectors(SelectIteratorContainer *qres) { selectors_ = qres; } -void ExplainCalc::PutJoinedSelectors(JoinedSelectors *jselectors) { jselectors_ = jselectors; } - } // namespace reindexer diff --git a/cpp_src/core/nsselecter/explaincalc.h b/cpp_src/core/nsselecter/explaincalc.h index 46dd2df11..837dfafde 100644 --- a/cpp_src/core/nsselecter/explaincalc.h +++ b/cpp_src/core/nsselecter/explaincalc.h @@ -5,13 +5,17 @@ #include #include "core/type_consts.h" -#include "estl/h_vector.h" +#include "tools/serializer.h" namespace reindexer { class SelectIteratorContainer; class JoinedSelector; +struct JoinOnInjection; +struct ConditionInjection; + typedef std::vector JoinedSelectors; +typedef std::vector OnConditionInjections; class ExplainCalc { public: @@ -22,50 +26,94 @@ class ExplainCalc { typedef Clock::time_point time_point; public: - ExplainCalc(bool enable) : enabled_(enable) {} + ExplainCalc() = default; + ExplainCalc(bool enable) noexcept : enabled_(enable) {} - void StartTiming(); - void StopTiming(); + void StartTiming() noexcept; + void StopTiming() noexcept; - void AddPrepareTime(); - void AddSelectTime(); - void AddPostprocessTime(); - void AddLoopTime(); - void AddIterations(int iters); - void StartSort(); - void StopSort(); + void AddPrepareTime() noexcept; + void AddSelectTime() noexcept; + void AddPostprocessTime() noexcept; + void AddLoopTime() noexcept; + void AddIterations(int iters) noexcept { iters_ += iters; } + void StartSort() noexcept; + void StopSort() noexcept; - void PutCount(int cnt) { count_ = cnt; } - void PutSortIndex(std::string_view index); - void PutSelectors(SelectIteratorContainer *qres); - void PutJoinedSelectors(JoinedSelectors *jselectors); - void SetSortOptimization(bool enable) { sortOptimization_ = enable; } + void PutCount(int cnt) noexcept { count_ = cnt; } + void PutSortIndex(std::string_view index) noexcept { sortIndex_ = index; } + void PutSelectors(const SelectIteratorContainer *qres) noexcept { selectors_ = qres; } + void PutJoinedSelectors(const JoinedSelectors *jselectors) noexcept { jselectors_ = jselectors; } + void SetPreselectTime(Duration preselectTime) noexcept { preselect_ = preselectTime; } + void PutOnConditionInjections(const OnConditionInjections *onCondInjections) noexcept { onInjections_ = onCondInjections; } + void SetSortOptimization(bool enable) noexcept { sortOptimization_ = enable; } void LogDump(int logLevel); std::string GetJSON(); + Duration Total() const noexcept { return total_; } + Duration Prepare() const noexcept { return prepare_; } + Duration Indexes() const noexcept { return select_; } + Duration Postprocess() const noexcept { return postprocess_; } + Duration Loop() const noexcept { return loop_; } + Duration Sort() const noexcept { return sort_; } + size_t Iterations() const noexcept { return iters_; } - static int To_us(const Duration &d); + static int To_us(const Duration &d) noexcept; + bool IsEnabled() const noexcept { return enabled_; } -protected: - Duration lap(); - static const char *JoinTypeName(JoinType jtype); +private: + Duration lap() noexcept; -protected: time_point last_point_, sort_start_point_; Duration total_, prepare_ = Duration::zero(); + Duration preselect_ = Duration::zero(); Duration select_ = Duration::zero(); Duration postprocess_ = Duration::zero(); Duration loop_ = Duration::zero(); Duration sort_ = Duration::zero(); std::string_view sortIndex_; - SelectIteratorContainer *selectors_ = nullptr; - JoinedSelectors *jselectors_ = nullptr; - bool sortOptimization_ = false; + const SelectIteratorContainer *selectors_ = nullptr; + const JoinedSelectors *jselectors_ = nullptr; + const OnConditionInjections *onInjections_ = nullptr; ///< Optional + int iters_ = 0; int count_ = 0; - bool enabled_; + bool sortOptimization_ = false; + bool enabled_ = false; +}; + +/** + * @brief Describes the process of a single JOIN-query ON-conditions injection into the Where clause of a main query + */ +struct JoinOnInjection { + std::string_view rightNsName; ///< joinable ns name + std::string joinCond; ///< original ON-conditions clause. SQL-like string + ExplainCalc::Duration totalTime_ = + ExplainCalc::Duration::zero(); ///< total amount of time spent on checking and substituting all conditions + bool succeed = false; ///< result of injection attempt + std::string_view reason; ///< optional{succeed==false}. Explains condition injection failure + enum { ByValue, Select } type = ByValue; ///< byValue or Select + WrSerializer injectedCond; ///< injected condition. SQL-like string + std::vector conditions; ///< individual conditions processing results +}; + +/** + * @brief Describes an injection attempt of a single condition from the ON-clause of a JOIN-query + */ +struct ConditionInjection { + std::string initCond; ///< single condition from Join ON section. SQL-like string + ExplainCalc::Duration totalTime_ = + ExplainCalc::Duration::zero(); ///< total time elapsed from injection attempt start till the end of substitution or rejection + std::string explain; ///< optoinal{JoinOnInjection.type == Select}. Explain raw string from Select subquery. + AggType aggType = AggType::AggUnknown; ///< aggregation type used in subquery + bool succeed = false; ///< result of injection attempt + std::string_view reason; ///< optional{succeed==false}. Explains condition injection failure + bool orChainPart_ = false; ///< additional failure reason flag. Used in case if condition field was filled before and + ///< also it does not fit because it is an OR chain part + std::string newCond; ///< substituted condition in QueryEntry. SQL-like string + size_t valuesCount = 0; ///< resulting size of query values set }; } // namespace reindexer diff --git a/cpp_src/core/nsselecter/fieldscomparator.h b/cpp_src/core/nsselecter/fieldscomparator.h index 2d6c35410..c6e5908b5 100644 --- a/cpp_src/core/nsselecter/fieldscomparator.h +++ b/cpp_src/core/nsselecter/fieldscomparator.h @@ -19,17 +19,18 @@ class FieldsComparator { bool hasNonIdxFields = true; if (ctx_.size()) { if (ctx_[0].lCtx_.fields_.getTagsPathsLength() > 0) { - cost += expectedIterations * kNonIdxFieldComparatorCostMultiplier; + cost += double(expectedIterations) * kNonIdxFieldComparatorCostMultiplier; hasNonIdxFields = false; } if (ctx_[0].rCtx_.fields_.getTagsPathsLength() > 0) { - cost += expectedIterations * kNonIdxFieldComparatorCostMultiplier; + cost += double(expectedIterations) * kNonIdxFieldComparatorCostMultiplier; hasNonIdxFields = false; } } - return hasNonIdxFields ? cost : expectedIterations + cost; + return hasNonIdxFields ? cost : double(expectedIterations) + cost; } - const std::string& Name() const noexcept { return name_; } + const std::string& Name() const& noexcept { return name_; } + const std::string& Name() const&& = delete; std::string Dump() const { return Name(); } int GetMatchedCount() const noexcept { return matchedCount_; } void SetLeftField(const TagsPath& tpath) { diff --git a/cpp_src/core/nsselecter/joinedselector.cc b/cpp_src/core/nsselecter/joinedselector.cc index 09da4933d..bd5e92680 100644 --- a/cpp_src/core/nsselecter/joinedselector.cc +++ b/cpp_src/core/nsselecter/joinedselector.cc @@ -1,4 +1,5 @@ #include "joinedselector.h" + #include "core/namespace/namespaceimpl.h" #include "core/queryresults/joinresults.h" #include "nsselecter.h" @@ -230,7 +231,8 @@ void JoinedSelector::AppendSelectIteratorOfJoinIndexData(SelectIteratorContainer bool was = false; for (SelectKeyResult &res : leftIndex->SelectKey(values, CondSet, sortId, opts, ctx, rdxCtx)) { if (!res.comparators_.empty()) continue; - SelectIterator selIter{res, false, joinEntry.index_, false}; + SelectIterator selIter{res, false, joinEntry.index_, + (joinEntry.idxNo < 0 ? IteratorFieldKind::NonIndexed : IteratorFieldKind::Indexed), false}; selIter.Bind(leftNs_->payloadType_, joinEntry.idxNo); const int curIterations = selIter.GetMaxIterations(); if (curIterations && curIterations < *maxIterations) *maxIterations = curIterations; diff --git a/cpp_src/core/nsselecter/joinedselector.h b/cpp_src/core/nsselecter/joinedselector.h index 6202ee27f..355a51655 100644 --- a/cpp_src/core/nsselecter/joinedselector.h +++ b/cpp_src/core/nsselecter/joinedselector.h @@ -1,5 +1,6 @@ #pragma once #include "core/joincache.h" +#include "core/namespace/namespaceimpl.h" #include "explaincalc.h" #include "selectiteratorcontainer.h" @@ -69,10 +70,9 @@ class JoinedSelector { friend QueryPreprocessor; public: - JoinedSelector(JoinType joinType, std::shared_ptr leftNs, std::shared_ptr rightNs, JoinCacheRes &&joinRes, - Query &&itemQuery, LocalQueryResults &result, const JoinedQuery &joinQuery, JoinPreResult::Ptr preResult, - uint32_t joinedFieldIdx, SelectFunctionsHolder &selectFunctions, uint32_t joinedSelectorsCount, bool inTransaction, - const RdxContext &rdxCtx) + JoinedSelector(JoinType joinType, NamespaceImpl::Ptr leftNs, NamespaceImpl::Ptr rightNs, JoinCacheRes &&joinRes, Query &&itemQuery, + LocalQueryResults &result, const JoinedQuery &joinQuery, JoinPreResult::Ptr preResult, uint32_t joinedFieldIdx, + SelectFunctionsHolder &selectFunctions, uint32_t joinedSelectorsCount, bool inTransaction, const RdxContext &rdxCtx) : joinType_(joinType), called_(0), matched_(0), @@ -106,7 +106,7 @@ class JoinedSelector { const RdxContext &); static constexpr int MaxIterationsForPreResultStoreValuesOptimization() noexcept { return 200; } JoinPreResult::CPtr PreResult() const noexcept { return preResult_; } - const std::shared_ptr &RightNs() const noexcept { return rightNs_; } + const NamespaceImpl::Ptr &RightNs() const noexcept { return rightNs_; } private: template @@ -118,8 +118,8 @@ class JoinedSelector { JoinType joinType_; int called_, matched_; - std::shared_ptr leftNs_; - std::shared_ptr rightNs_; + NamespaceImpl::Ptr leftNs_; + NamespaceImpl::Ptr rightNs_; JoinCacheRes joinRes_; Query itemQuery_; LocalQueryResults &result_; diff --git a/cpp_src/core/nsselecter/nsselecter.cc b/cpp_src/core/nsselecter/nsselecter.cc index d4fd38987..30b4144a3 100644 --- a/cpp_src/core/nsselecter/nsselecter.cc +++ b/cpp_src/core/nsselecter/nsselecter.cc @@ -1,11 +1,13 @@ #include "nsselecter.h" + +#include "core/cjson/jsonbuilder.h" #include "core/namespace/namespaceimpl.h" #include "core/queryresults/joinresults.h" #include "core/sorting/sortexpression.h" #include "crashqueryreporter.h" #include "estl/multihash_map.h" -#include "explaincalc.h" #include "itemcomparator.h" +#include "qresexplainholder.h" #include "querypreprocessor.h" #include "tools/logger.h" @@ -26,30 +28,35 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx const_cast(&ctx.query)->debugLevel = ns_->config_.logLevel; } - ExplainCalc explain(ctx.query.explain_ || ctx.query.debugLevel >= LogInfo); + auto &explain = ctx.explain; + explain = ExplainCalc(ctx.query.explain_ || ctx.query.debugLevel >= LogInfo); ActiveQueryScope queryScope(ctx, ns_->optimizationState_, explain, ns_->locker_.IsReadOnly(), ns_->strHolder_.get()); + explain.SetPreselectTime(ctx.preResultTimeTotal); explain.StartTiming(); - auto containSomeAggCount = [&ctx](const AggType &type) { - auto it = std::find_if(ctx.query.aggregations_.begin(), ctx.query.aggregations_.end(), - [&type](const AggregateEntry &agg) { return agg.Type() == type; }); - return it != ctx.query.aggregations_.end(); + const auto &aggregationQueryRef = ctx.isMergeQuerySubQuery() ? *ctx.parentQuery : ctx.query; + + auto containSomeAggCount = [&aggregationQueryRef](AggType type) noexcept { + auto it = std::find_if(aggregationQueryRef.aggregations_.begin(), aggregationQueryRef.aggregations_.end(), + [type](const AggregateEntry &agg) { return agg.Type() == type; }); + return it != aggregationQueryRef.aggregations_.end(); }; bool needPutCachedTotal = false; + const auto initTotalCount = result.totalCount; bool containAggCount = containSomeAggCount(AggCount); bool containAggCountCached = containAggCount ? false : containSomeAggCount(AggCountCached); - bool needCalcTotal = ctx.query.calcTotal == ModeAccurateTotal || containAggCount; + bool needCalcTotal = aggregationQueryRef.calcTotal == ModeAccurateTotal || containAggCount; QueryCacheKey ckey; - if (ctx.query.calcTotal == ModeCachedTotal || containAggCountCached) { + if (aggregationQueryRef.calcTotal == ModeCachedTotal || containAggCountCached) { ckey = QueryCacheKey{ctx.query}; auto cached = ns_->queryTotalCountCache_->Get(ckey); if (cached.valid && cached.val.total_count >= 0) { - result.totalCount = cached.val.total_count; + result.totalCount += cached.val.total_count; logPrintf(LogTrace, "[%s] using value from cache: %d", ns_->name_, result.totalCount); } else { needPutCachedTotal = cached.valid; @@ -58,14 +65,17 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx } } + OnConditionInjections explainInjectedOnConditions; QueryPreprocessor qPreproc((ctx.preResult && ctx.preResult->executionMode == JoinPreResult::ModeExecute) ? const_cast(&ctx.query.entries)->MakeLazyCopy() : QueryEntries{ctx.query.entries}, - ctx.query, ns_, ctx.reqMatchedOnceFlag, ctx.inTransaction); + ns_, ctx); if (ctx.joinedSelectors) { - qPreproc.InjectConditionsFromJoins(*ctx.joinedSelectors, rdxCtx); + qPreproc.InjectConditionsFromJoins(*ctx.joinedSelectors, explainInjectedOnConditions, rdxCtx); + explain.PutOnConditionInjections(&explainInjectedOnConditions); } - auto aggregators = getAggregators(ctx.query); + auto aggregators = getAggregators(aggregationQueryRef.aggregations_, aggregationQueryRef.strictMode); + qPreproc.AddDistinctEntries(aggregators); const bool aggregationsOnly = aggregators.size() > 1 || (aggregators.size() == 1 && aggregators[0].Type() != AggDistinct); qPreproc.InitIndexNumbers(); @@ -73,18 +83,28 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx if (isFt && rdxCtx.IsShardingParallelExecution()) { throw Error{errLogic, "Full text query by several sharding hosts"}; } + if (ctx.isMergeQuery == IsMergeQuery::Yes && ctx.query.sortingEntries_.empty()) { + if (ctx.isFtQuery == IsFTQuery::NotSet) { + ctx.isFtQuery = isFt ? IsFTQuery::Yes : IsFTQuery::No; + } else { + if (isFt != (ctx.isFtQuery == IsFTQuery::Yes)) { + throw Error{errNotValid, + "In merge query without sorting all subqueries should be fulltext or not fulltext at the same time"}; + } + } + } // Prepare data for select functions if (ctx.functions) { fnc_ = ctx.functions->AddNamespace(ctx.query, *ns_, isFt); } - if (isFt) { - qPreproc.CheckUniqueFtQuery(); - qPreproc.ExcludeFtQuery(*fnc_, rdxCtx); - } if (!ctx.skipIndexesLookup) { qPreproc.Reduce(isFt); } + if (isFt) { + qPreproc.CheckUniqueFtQuery(); + qPreproc.ExcludeFtQuery(rdxCtx); + } qPreproc.ConvertWhereValues(); explain.AddPrepareTime(); @@ -103,6 +123,8 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx } SelectIteratorContainer qres(ns_->payloadType_, &ctx); + QresExplainHolder qresHolder(qres, (explain.IsEnabled() || ctx.query.debugLevel >= LogTrace) ? QresExplainHolder::ExplainEnabled::Yes + : QresExplainHolder::ExplainEnabled::No); LoopCtx lctx(qres, ctx, qPreproc, aggregators, explain); if (!ctx.query.forcedSortOrder_.empty() && !qPreproc.MoreThanOneEvaluation()) { ctx.isForceAll = true; @@ -111,8 +133,8 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx do { isFt = qPreproc.ContainsFullTextIndexes(); qres.Clear(); - lctx.start = 0; - lctx.count = UINT_MAX; + lctx.start = QueryEntry::kDefaultOffset; + lctx.count = QueryEntry::kDefaultLimit; ctx.isForceAll = isForceAll; if (ctx.preResult) { @@ -160,7 +182,8 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx SelectKeyResult res; res.emplace_back(std::move(ctx.preResult->ids)); static const std::string pr = "-preresult"; - qres.Append(OpAnd, SelectIterator(std::move(res), false, pr)); + // Iterator Field Kind: Preselect IdSet -> None + qres.Append(OpAnd, SelectIterator(std::move(res), false, pr, IteratorFieldKind::None)); } break; case JoinPreResult::ModeIterators: qres.LazyAppend(ctx.preResult->iterators.begin(), ctx.preResult->iterators.end()); @@ -192,7 +215,7 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx !ctx.sortingContext.sortIndex()) // 2. We have sorted query, by unordered index || ctx.preResult->btreeIndexOptimizationEnabled) { // 3. We have btree-index that is not committed yet ctx.preResult->iterators.Append(qres.cbegin(), qres.cend()); - if (ctx.query.debugLevel >= LogInfo) { + if rx_unlikely (ctx.query.debugLevel >= LogInfo) { logPrintf(LogInfo, "Built preResult (expected %d iterations) with %d iterators, q='%s'", maxIterations, qres.Size(), ctx.query.GetSQL()); } @@ -238,7 +261,7 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx if (ctx.sortingContext.isOptimizationEnabled()) { auto it = ns_->indexes_[ctx.sortingContext.uncommitedIndex]->CreateIterator(); it->SetMaxIterations(ns_->items_.size()); - scan.emplace_back(it); + scan.emplace_back(std::move(it)); maxIterations = ns_->items_.size(); } else { // special case - no idset in query @@ -251,7 +274,8 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx scan.emplace_back(0, limit); maxIterations = limit; } - qres.AppendFront(OpAnd, SelectIterator{std::move(scan), false, "-scan", true}); + // Iterator Field Kind: -scan. Sorting Context! -> None + qres.AppendFront(OpAnd, SelectIterator{std::move(scan), false, "-scan", IteratorFieldKind::None, true}); } // Get maximum iterations count, for right calculation comparators costs qres.SortByCost(maxIterations); @@ -259,7 +283,7 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx // Check idset must be 1st qres.CheckFirstQuery(); - // Rewing all results iterators + // Rewind all results iterators qres.ExecuteAppropriateForEach(Skip{}, [reverse, maxIterations](SelectIterator &it) { it.Start(reverse, maxIterations); }); @@ -274,27 +298,41 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx (hasComparators || qPreproc.MoreThanOneEvaluation() || qres.Size() > 1 || qres.Get(0).size() > 1); if (qPreproc.IsFtExcluded()) { - if (reverse && hasComparators) selectLoop(lctx, qPreproc.GetFtMergeStatuses(), rdxCtx); - if (!reverse && hasComparators) selectLoop(lctx, qPreproc.GetFtMergeStatuses(), rdxCtx); - if (reverse && !hasComparators) selectLoop(lctx, qPreproc.GetFtMergeStatuses(), rdxCtx); - if (!reverse && !hasComparators) selectLoop(lctx, qPreproc.GetFtMergeStatuses(), rdxCtx); + if (reverse && hasComparators) { + selectLoop(lctx, qPreproc.GetFtMergeStatuses(), rdxCtx); + } else if (!reverse && hasComparators) { + selectLoop(lctx, qPreproc.GetFtMergeStatuses(), rdxCtx); + } else if (reverse && !hasComparators) { + selectLoop(lctx, qPreproc.GetFtMergeStatuses(), rdxCtx); + } else { + selectLoop(lctx, qPreproc.GetFtMergeStatuses(), rdxCtx); + } } else { - if (reverse && hasComparators && aggregationsOnly) selectLoop(lctx, result, rdxCtx); - if (!reverse && hasComparators && aggregationsOnly) selectLoop(lctx, result, rdxCtx); - if (reverse && !hasComparators && aggregationsOnly) selectLoop(lctx, result, rdxCtx); - if (!reverse && !hasComparators && aggregationsOnly) selectLoop(lctx, result, rdxCtx); - if (reverse && hasComparators && !aggregationsOnly) selectLoop(lctx, result, rdxCtx); - if (!reverse && hasComparators && !aggregationsOnly) selectLoop(lctx, result, rdxCtx); - if (reverse && !hasComparators && !aggregationsOnly) selectLoop(lctx, result, rdxCtx); - if (!reverse && !hasComparators && !aggregationsOnly) selectLoop(lctx, result, rdxCtx); + if (reverse && hasComparators && aggregationsOnly) { + selectLoop(lctx, result, rdxCtx); + } else if (!reverse && hasComparators && aggregationsOnly) { + selectLoop(lctx, result, rdxCtx); + } else if (reverse && !hasComparators && aggregationsOnly) { + selectLoop(lctx, result, rdxCtx); + } else if (!reverse && !hasComparators && aggregationsOnly) { + selectLoop(lctx, result, rdxCtx); + } else if (reverse && hasComparators && !aggregationsOnly) { + selectLoop(lctx, result, rdxCtx); + } else if (!reverse && hasComparators && !aggregationsOnly) { + selectLoop(lctx, result, rdxCtx); + } else if (reverse && !hasComparators && !aggregationsOnly) { + selectLoop(lctx, result, rdxCtx); + } else if (!reverse && !hasComparators && !aggregationsOnly) { + selectLoop(lctx, result, rdxCtx); + } } // Get total count for simple query with 1 condition and 1 idset if (needCalcTotal && !lctx.calcTotal) { if (!ctx.query.entries.Empty()) { - result.totalCount = qres.Get(0).GetMaxIterations(); + result.totalCount += qres.Get(0).GetMaxIterations(); } else { - result.totalCount = ns_->items_.size() - ns_->free_.size(); + result.totalCount += ns_->items_.size() - ns_->free_.size(); } } explain.AddLoopTime(); @@ -302,7 +340,7 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx if (!ctx.inTransaction) { ThrowOnCancel(rdxCtx); } - } while (qPreproc.NeedNextEvaluation(lctx.start, lctx.count, ctx.matchedAtLeastOnce)); + } while (qPreproc.NeedNextEvaluation(lctx.start, lctx.count, ctx.matchedAtLeastOnce, qresHolder)); processLeftJoins(result, ctx, resultInitSize, rdxCtx); if (!ctx.sortingContext.expressions.empty()) { @@ -319,16 +357,27 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx } } } - for (auto &aggregator : aggregators) { - result.aggregationResults.push_back(aggregator.GetResult()); + if (rx_unlikely(ctx.isMergeQuerySubQuery())) { + writeAggregationResultMergeSubQuery(result, aggregators, ctx); + } else { + for (auto &aggregator : aggregators) { + result.aggregationResults.push_back(aggregator.GetResult()); + } } // Put count/count_cached to aggretions - if (ctx.query.calcTotal != ModeNoTotal || containAggCount || containAggCountCached) { + if (aggregationQueryRef.calcTotal != ModeNoTotal || containAggCount || containAggCountCached) { AggregationResult ret; ret.fields = {"*"}; - ret.type = (ctx.query.calcTotal == ModeAccurateTotal || containAggCount) ? AggCount : AggCountCached; - ret.SetValue(result.totalCount); - result.aggregationResults.push_back(ret); + ret.type = (aggregationQueryRef.calcTotal == ModeAccurateTotal || containAggCount) ? AggCount : AggCountCached; + if (ctx.isMergeQuerySubQuery()) { + assertrx_throw(!result.aggregationResults.empty()); + auto &agg = result.aggregationResults.back(); + assertrx_throw(agg.type == ret.type); + agg.SetValue(result.totalCount); + } else { + ret.SetValue(result.totalCount); + result.aggregationResults.emplace_back(std::move(ret)); + } } explain.AddPostprocessTime(); @@ -338,10 +387,10 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx explain.PutCount((ctx.preResult && ctx.preResult->executionMode == JoinPreResult::ModeBuild) ? (ctx.preResult->dataMode == JoinPreResult::ModeIdSet ? ctx.preResult->ids.size() : ctx.preResult->values.size()) : result.Count()); - explain.PutSelectors(&qres); + explain.PutSelectors(&qresHolder.GetResultsRef()); explain.PutJoinedSelectors(ctx.joinedSelectors); - if (ctx.query.debugLevel >= LogInfo) { + if rx_unlikely (ctx.query.debugLevel >= LogInfo) { logPrintf(LogInfo, "%s", ctx.query.GetSQL()); explain.LogDump(ctx.query.debugLevel); } @@ -352,24 +401,24 @@ void NsSelecter::operator()(LocalQueryResults &result, SelectCtx &ctx, const Rdx result.explainResults = explain.GetJSON(); } } - if (ctx.query.debugLevel >= LogTrace) { + if rx_unlikely (ctx.query.debugLevel >= LogTrace) { logPrintf(LogInfo, "Query returned: [%s]; total=%d", result.Dump(), result.totalCount); } if (needPutCachedTotal) { logPrintf(LogTrace, "[%s] put totalCount value into query cache: %d ", ns_->name_, result.totalCount); - ns_->queryTotalCountCache_->Put(ckey, {static_cast(result.totalCount)}); + ns_->queryTotalCountCache_->Put(ckey, {static_cast(result.totalCount - initTotalCount)}); } if (ctx.preResult && ctx.preResult->executionMode == JoinPreResult::ModeBuild) { switch (ctx.preResult->dataMode) { case JoinPreResult::ModeIdSet: - if (ctx.query.debugLevel >= LogInfo) { + if rx_unlikely (ctx.query.debugLevel >= LogInfo) { logPrintf(LogInfo, "Built idset preResult (expected %d iterations) with %d ids, q = '%s'", explain.Iterations(), ctx.preResult->ids.size(), ctx.query.GetSQL()); } break; case JoinPreResult::ModeValues: - if (ctx.query.debugLevel >= LogInfo) { + if rx_unlikely (ctx.query.debugLevel >= LogInfo) { logPrintf(LogInfo, "Built values preResult (expected %d iterations) with %d values, q = '%s'", explain.Iterations(), ctx.preResult->values.size(), ctx.query.GetSQL()); } @@ -562,60 +611,107 @@ struct RelaxedHasher { }; class ForcedSortMap { - using MultiMap = MultiHashMap; - struct SingleTypeMap { +public: + using mapped_type = size_t; + +private: + using MultiMap = MultiHashMap; + struct SingleTypeMap : tsl::hopscotch_sc_map { KeyValueType type_; - fast_hash_map map_; }; using DataType = std::variant; + class Iterator : private std::variant { + using Base = std::variant; + + public: + using Base::Base; + const auto *operator->() const { + return std::visit(overloaded{[](MultiMap::Iterator it) { return it.operator->(); }, + [](SingleTypeMap::const_iterator it) { return it.operator->(); }}, + static_cast(*this)); + } + const auto &operator*() const { + return std::visit(overloaded{[](MultiMap::Iterator it) -> const auto &{ return *it; + } + , [](SingleTypeMap::const_iterator it) -> const auto & { return *it; }}, + static_cast(*this)); +} +}; // namespace reindexer + +public: +ForcedSortMap(Variant k, mapped_type v, size_t size) + : data_{k.Type().Is() || k.Type().Is() || k.Type().IsNumeric() + ? DataType{MultiMap{size}} + : DataType{SingleTypeMap{{}, k.Type()}}} { + std::visit(overloaded{[&](MultiMap &m) { m.insert(std::move(k), v); }, [&](SingleTypeMap &m) { m.emplace(std::move(k), v); }}, data_); +} +std::pair emplace(Variant k, mapped_type v) & { + return std::visit(overloaded{[&](MultiMap &m) { + const auto [iter, success] = m.insert(std::move(k), v); + return std::make_pair(Iterator{iter}, success); + }, + [&](SingleTypeMap &m) { + if (!m.type_.IsSame(k.Type())) { + throw Error{errQueryExec, "Items of different types in forced sort list"}; + } + const auto [iter, success] = m.emplace(std::move(k), v); + return std::make_pair(Iterator{iter}, success); + }}, + data_); +} +bool contain(const Variant &k) const { + return std::visit(overloaded{[&k](const MultiMap &m) { return m.find(k) != m.cend(); }, + [&k](const SingleTypeMap &m) { + if (!m.type_.IsSame(k.Type())) { + throw Error{errQueryExec, "Items of different types in forced sort list"}; + } + return m.find(k) != m.end(); + }}, + data_); +} +mapped_type get(const Variant &k) const { + return std::visit(overloaded{[&k](const MultiMap &m) { + const auto it = m.find(k); + assertrx_throw(it != m.cend()); + return it->second; + }, + [&k](const SingleTypeMap &m) { + if (!m.type_.IsSame(k.Type())) { + throw Error{errQueryExec, "Items of different types in forced sort list"}; + } + const auto it = m.find(k); + assertrx_throw(it != m.end()); + return it->second; + }}, + data_); +} + +private: +DataType data_; +} +; +template +class ForcedMapInserter { public: - ForcedSortMap(Variant k, size_t v, size_t size) - : data_{k.Type().Is() || k.Type().Is() || k.Type().IsNumeric() - ? DataType{MultiMap{size}} - : DataType{SingleTypeMap{k.Type(), {}}}} { - std::visit(overloaded{[&](MultiMap &m) { m.insert(std::move(k), v); }, [&](SingleTypeMap &m) { m.map_.emplace(std::move(k), v); }}, - data_); - } - bool insert(Variant k, size_t v) { - return std::visit(overloaded{[&](MultiMap &m) { return m.insert(std::move(k), v); }, - [&](SingleTypeMap &m) { - if (!m.type_.IsSame(k.Type())) { - throw Error{errQueryExec, "Items of different types in forced sort list"}; - } - return m.map_.emplace(std::move(k), v).second; - }}, - data_); - } - bool contain(const Variant &k) const { - return std::visit(overloaded{[&k](const MultiMap &m) { return m.find(k) != m.cend(); }, - [&k](const SingleTypeMap &m) { - if (!m.type_.IsSame(k.Type())) { - throw Error{errQueryExec, "Items of different types in forced sort list"}; - } - return m.map_.find(k) != m.map_.end(); - }}, - data_); - } - size_t get(const Variant &k) const { - return std::visit(overloaded{[&k](const MultiMap &m) { - const auto it = m.find(k); - assertrx_throw(it != m.cend()); - return it->second; - }, - [&k](const SingleTypeMap &m) { - if (!m.type_.IsSame(k.Type())) { - throw Error{errQueryExec, "Items of different types in forced sort list"}; - } - const auto it = m.map_.find(k); - assertrx_throw(it != m.map_.end()); - return it->second; - }}, - data_); + ForcedMapInserter(Map &m) noexcept : map_{m} {} + template + void Insert(V &&value) { + if (const auto [iter, success] = map_.emplace(std::forward(value), cost_); success) { + ++cost_; + } else if (iter->second != cost_ - 1) { + static constexpr auto errMsg = "Forced sort value '%s' is dublicated. Deduplicated by the first occurrence."; + if constexpr (std::is_same_v) { + logPrintf(LogInfo, errMsg, value.template As()); + } else { + logPrintf(LogInfo, errMsg, Variant{std::forward(value)}.template As()); + } + } } private: - DataType data_; + Map &map_; + typename Map::mapped_type cost_ = 1; }; template @@ -629,14 +725,9 @@ It NsSelecter::applyForcedSortImpl(NamespaceImpl &ns, It begin, It end, const It if (idx < ns.indexes_.firstCompositePos()) { // implementation for regular indexes fast_hash_map sortMap; - ItemRefVector::difference_type cost = 0; - for (auto value : forcedSortOrder) { - value.convert(fieldType); - if (!sortMap.emplace(std::move(value), cost).second) { - // NOLINTNEXTLINE(bugprone-use-after-move) - throw Error(errQueryExec, "Value '%s' used twice in forced sorting", value.As()); - } - cost++; + ForcedMapInserter inserter{sortMap}; + for (const auto &value : forcedSortOrder) { + inserter.Insert(value.convert(fieldType)); } VariantArray keyRefs; @@ -696,13 +787,10 @@ It NsSelecter::applyForcedSortImpl(NamespaceImpl &ns, It begin, It end, const It const auto &payloadType = ns.payloadType_; const FieldsSet &fields = ns.indexes_[idx]->Fields(); unordered_payload_map sortMap(0, payloadType, fields); - ItemRefVector::difference_type cost = 0; + ForcedMapInserter inserter{sortMap}; for (auto value : forcedSortOrder) { value.convert(fieldType, &payloadType, &fields); - if (!sortMap.insert({static_cast(value), cost}).second) { - throw Error(errQueryExec, "Value '%s' used twice in forced sorting", value.As()); - } - cost++; + inserter.Insert(static_cast(value)); } const auto boundary = std::stable_partition(begin, end, [&](const ItemRef &itemRef) { @@ -746,11 +834,9 @@ It NsSelecter::applyForcedSortImpl(NamespaceImpl &ns, It begin, It end, const It } } else { ForcedSortMap sortMap{forcedSortOrder[0], 0, forcedSortOrder.size()}; + ForcedMapInserter inserter{sortMap}; for (size_t i = 1, s = forcedSortOrder.size(); i < s; ++i) { - const auto &value = forcedSortOrder[i]; - if (!sortMap.insert(value, i)) { - throw Error(errQueryExec, "Value '%s' used twice in forced sorting", value.As()); - } + inserter.Insert(forcedSortOrder[i]); } VariantArray keyRefs; @@ -1041,6 +1127,8 @@ void NsSelecter::selectLoop(LoopCtx &ctx, ResultsT &result, const RdxContext &rd sctx.nsid < result.joined_.size() ? &result.joined_[sctx.nsid] : nullptr); } setLimitAndOffset(result.Items(), offset, ctx.qPreproc.Count() + initCount); + } else if (sctx.isForceAll) { + setLimitAndOffset(result.Items(), ctx.qPreproc.Start(), ctx.qPreproc.Count() + initCount); } if (sctx.isForceAll) { @@ -1148,12 +1236,12 @@ void NsSelecter::checkStrictModeAgg(StrictMode strictMode, const std::string &na } } -h_vector NsSelecter::getAggregators(const Query &q) const { +h_vector NsSelecter::getAggregators(const std::vector &aggEntries, StrictMode strictMode) const { static constexpr int NotFilled = -2; h_vector ret; h_vector distinctIndexes; - for (const auto &ag : q.aggregations_) { + for (const auto &ag : aggEntries) { if (ag.Type() == AggCount || ag.Type() == AggCountCached) { continue; } @@ -1167,7 +1255,7 @@ h_vector NsSelecter::getAggregators(const Query &q) const { } int idx = -1; for (size_t i = 0; i < ag.Fields().size(); ++i) { - checkStrictModeAgg(q.strictMode == StrictModeNotSet ? ns_->config_.strictMode : q.strictMode, ag.Fields()[i], ns_->name_, + checkStrictModeAgg(strictMode == StrictModeNotSet ? ns_->config_.strictMode : strictMode, ag.Fields()[i], ns_->name_, ns_->tagsMatcher_); for (size_t j = 0; j < sortingEntries.size(); ++j) { @@ -1376,101 +1464,299 @@ void NsSelecter::prepareSortingContext(SortingEntries &sortBy, SelectCtx &ctx, b ctx.sortingContext.exprResults.resize(ctx.sortingContext.expressions.size()); } -bool NsSelecter::isSortOptimizatonEffective(const QueryEntries &qentries, SelectCtx &ctx, const RdxContext &rdxCtx) { - if (qentries.Size() == 0) { - return true; +enum class CostCountingPolicy : bool { Any, ExceptTargetSortIdxSeq }; + +template +class CostCalculator { +public: + CostCalculator(size_t _totalCost) noexcept : totalCost_(_totalCost) {} + void BeginSequence() noexcept { + isInSequence_ = true; + hasInappositeEntries_ = false; + onlyTargetSortIdxInSequence_ = true; + curCost_ = 0; } - if (qentries.Size() == 1 && qentries.HoldsOrReferTo(0)) { - const auto &qe = qentries.Get(0); - if (qe.idxNo == ctx.sortingContext.uncommitedIndex && SelectIteratorContainer::IsExpectingOrderedResults(qe)) { - return true; + void EndSequence() noexcept { + if (isInSequence_ && !hasInappositeEntries_) { + if constexpr (countingPolicy == CostCountingPolicy::Any) { + totalCost_ = std::min(curCost_, totalCost_); + } else if (!onlyTargetSortIdxInSequence_) { + totalCost_ = std::min(curCost_, totalCost_); + } } + isInSequence_ = false; + onlyTargetSortIdxInSequence_ = true; + curCost_ = 0; } - - size_t costNormal = ns_->items_.size() - ns_->free_.size(); - enum { SortIndexNotFound = 0, SortIndexFound, SortIndexHasUnorderedConditions } sortIndexSearchState = SortIndexNotFound; - - qentries.ExecuteAppropriateForEach( - Skip{}, - [this, &ctx, &rdxCtx, &costNormal, &sortIndexSearchState](const QueryEntry &qe) { - if (qe.idxNo < 0) return; - if (qe.idxNo == ctx.sortingContext.uncommitedIndex) { - if (sortIndexSearchState == SortIndexNotFound && !SelectIteratorContainer::IsExpectingOrderedResults(qe)) { - sortIndexSearchState = SortIndexHasUnorderedConditions; + bool IsInOrSequence() const noexcept { return isInSequence_; } + void Add(const SelectKeyResults &results, bool isTargetSortIndex) noexcept { + if constexpr (countingPolicy == CostCountingPolicy::ExceptTargetSortIdxSeq) { + if (!isInSequence_ && isTargetSortIndex) { + return; + } + } + onlyTargetSortIdxInSequence_ = onlyTargetSortIdxInSequence_ && isTargetSortIndex; + Add(results); + } + void Add(const SelectKeyResults &results) noexcept { + for (const SelectKeyResult &res : results) { + if (res.comparators_.empty()) { + if (isInSequence_) { + curCost_ += res.GetMaxIterations(totalCost_); } else { - sortIndexSearchState = SortIndexFound; + totalCost_ = std::min(totalCost_, res.GetMaxIterations(totalCost_)); } - return; + } else { + hasInappositeEntries_ = true; + break; } - if (costNormal == 0) return; - - auto &index = ns_->indexes_[qe.idxNo]; - if (IsFullText(index->Type())) return; - - Index::SelectOpts opts; - opts.disableIdSetCache = 1; - opts.itemsCountInNamespace = ns_->items_.size() - ns_->free_.size(); - opts.indexesNotOptimized = !ctx.sortingContext.enableSortOrders; - opts.inTransaction = ctx.inTransaction; - - try { - SelectKeyResults reslts = index->SelectKey(qe.values, qe.condition, 0, opts, nullptr, rdxCtx); - for (const SelectKeyResult &res : reslts) { - if (res.comparators_.empty()) { - costNormal = std::min(costNormal, res.GetMaxIterations(costNormal)); - } + } + } + size_t TotalCost() const noexcept { return totalCost_; } + void MarkInapposite() noexcept { hasInappositeEntries_ = true; } + bool OnNewEntry(const QueryEntries &qentries, size_t i, size_t next) { + const OpType op = qentries.GetOperation(i); + switch (op) { + case OpAnd: { + EndSequence(); + if (next != qentries.Size() && qentries.GetOperation(next) == OpOr) { + BeginSequence(); } - } catch (const Error &) { + return true; } - }); + case OpOr: { + if (hasInappositeEntries_) { + return false; + } + if (next != qentries.Size() && qentries.GetOperation(next) == OpOr) { + BeginSequence(); + } + return true; + } + case OpNot: { + if (next != qentries.Size() && qentries.GetOperation(next) == OpOr) { + BeginSequence(); + } + hasInappositeEntries_ = true; + return false; + } + } + throw Error(errLogic, "Unexpected op value: %d", int(op)); + } + +private: + bool isInSequence_ = false; + bool onlyTargetSortIdxInSequence_ = true; + bool hasInappositeEntries_ = false; + size_t curCost_ = 0; + size_t totalCost_ = std::numeric_limits::max(); +}; + +size_t NsSelecter::calculateNormalCost(const QueryEntries &qentries, SelectCtx &ctx, const RdxContext &rdxCtx) { + const size_t totalItemsCount = ns_->items_.size() - ns_->free_.size(); + CostCalculator costCalculator(totalItemsCount); + enum { SortIndexNotFound = 0, SortIndexFound, SortIndexHasUnorderedConditions } sortIndexSearchState = SortIndexNotFound; + for (size_t next, i = 0, sz = qentries.Size(); i != sz; i = next) { + next = qentries.Next(i); + const bool calculateEntry = costCalculator.OnNewEntry(qentries, i, next); + qentries.InvokeAppropriate( + i, Skip{}, [&costCalculator](const QueryEntriesBracket &) { costCalculator.MarkInapposite(); }, + [&costCalculator](const JoinQueryEntry &) { costCalculator.MarkInapposite(); }, + [&costCalculator](const BetweenFieldsQueryEntry &) { costCalculator.MarkInapposite(); }, + [&](const QueryEntry &qe) { + if (qe.idxNo < 0) { + costCalculator.MarkInapposite(); + return; + } + if (qe.idxNo == ctx.sortingContext.uncommitedIndex) { + if (sortIndexSearchState == SortIndexNotFound) { + const bool isExpectingIdSet = + qentries.GetOperation(i) == OpAnd && (next == sz || qentries.GetOperation(next) != OpOr); + if (isExpectingIdSet && !SelectIteratorContainer::IsExpectingOrderedResults(qe)) { + sortIndexSearchState = SortIndexHasUnorderedConditions; + return; + } else { + sortIndexSearchState = SortIndexFound; + } + } + if (!costCalculator.IsInOrSequence()) { + // Count cost only for the OR-sequences with mixed indexes: 'ANY_IDX OR TARGET_SORT_IDX', + // 'TARGET_SORT_IDX OR ANY_IDX1 OR ANY_IDX2', etc. + return; + } + } + + if (!calculateEntry || costCalculator.TotalCost() == 0 || sortIndexSearchState == SortIndexHasUnorderedConditions) { + return; + } + + auto &index = ns_->indexes_[qe.idxNo]; + if (IsFullText(index->Type())) { + costCalculator.MarkInapposite(); + return; + } + + Index::SelectOpts opts; + opts.disableIdSetCache = 1; + opts.itemsCountInNamespace = totalItemsCount; + opts.indexesNotOptimized = !ctx.sortingContext.enableSortOrders; + opts.inTransaction = ctx.inTransaction; + + try { + SelectKeyResults reslts = index->SelectKey(qe.values, qe.condition, 0, opts, nullptr, rdxCtx); + costCalculator.Add(reslts, qe.idxNo == ctx.sortingContext.uncommitedIndex); + } catch (const Error &) { + costCalculator.MarkInapposite(); + } + }); + } + costCalculator.EndSequence(); if (sortIndexSearchState == SortIndexHasUnorderedConditions) { - return false; + return 0; + } + return costCalculator.TotalCost(); +} + +size_t NsSelecter::calculateOptimizedCost(size_t costNormal, const QueryEntries &qentries, SelectCtx &ctx, const RdxContext &rdxCtx) { + // 'costOptimized == costNormal + 1' reduces internal iterations count for the tree in the res.GetMaxIterations() call + CostCalculator costCalculator(costNormal + 1); + for (size_t next, i = 0, sz = qentries.Size(); i != sz; i = next) { + next = qentries.Next(i); + if (!costCalculator.OnNewEntry(qentries, i, next)) { + continue; + } + qentries.InvokeAppropriate( + i, Skip{}, [&costCalculator](const QueryEntriesBracket &) { costCalculator.MarkInapposite(); }, + [&costCalculator](const JoinQueryEntry &) { costCalculator.MarkInapposite(); }, + [&costCalculator](const BetweenFieldsQueryEntry &) { costCalculator.MarkInapposite(); }, + [&](const QueryEntry &qe) { + if (qe.idxNo < 0 || qe.idxNo != ctx.sortingContext.uncommitedIndex) { + costCalculator.MarkInapposite(); + return; + } + + Index::SelectOpts opts; + opts.itemsCountInNamespace = ns_->items_.size() - ns_->free_.size(); + opts.disableIdSetCache = 1; + opts.unbuiltSortOrders = 1; + opts.indexesNotOptimized = !ctx.sortingContext.enableSortOrders; + opts.inTransaction = ctx.inTransaction; + + try { + SelectKeyResults reslts = ns_->indexes_[qe.idxNo]->SelectKey(qe.values, qe.condition, 0, opts, nullptr, rdxCtx); + costCalculator.Add(reslts); + } catch (const Error &) { + costCalculator.MarkInapposite(); + } + }); } - if (costNormal == 0) { + costCalculator.EndSequence(); + return costCalculator.TotalCost(); +} + +bool NsSelecter::isSortOptimizatonEffective(const QueryEntries &qentries, SelectCtx &ctx, const RdxContext &rdxCtx) { + if (qentries.Size() == 0) { + return true; + } + if (qentries.Size() == 1 && qentries.HoldsOrReferTo(0)) { + const auto &qe = qentries.Get(0); + if (qe.idxNo == ctx.sortingContext.uncommitedIndex) { + return SelectIteratorContainer::IsExpectingOrderedResults(qe); + } + } + + const size_t expectedMaxIterationsNormal = calculateNormalCost(qentries, ctx, rdxCtx); + if (expectedMaxIterationsNormal == 0) { return false; } - size_t costOptimized = ns_->items_.size() - ns_->free_.size(); - costNormal = size_t(double(costNormal) * log2(costNormal)); - if (costNormal < costOptimized) { - costOptimized = costNormal + 1; - qentries.ExecuteAppropriateForEach(Skip{}, - [this, &ctx, &rdxCtx, &costOptimized](const QueryEntry &qe) { - if (qe.idxNo < 0 || qe.idxNo != ctx.sortingContext.uncommitedIndex) return; - - Index::SelectOpts opts; - opts.itemsCountInNamespace = ns_->items_.size() - ns_->free_.size(); - opts.disableIdSetCache = 1; - opts.unbuiltSortOrders = 1; - opts.indexesNotOptimized = !ctx.sortingContext.enableSortOrders; - opts.inTransaction = ctx.inTransaction; - - try { - SelectKeyResults reslts = ns_->indexes_[qe.idxNo]->SelectKey(qe.values, qe.condition, 0, - opts, nullptr, rdxCtx); - for (const SelectKeyResult &res : reslts) { - if (res.comparators_.empty()) { - costOptimized = std::min(costOptimized, res.GetMaxIterations(costOptimized)); - } - } - } catch (const Error &) { - } - }); - } else { + const size_t totalItemsCount = ns_->items_.size() - ns_->free_.size(); + const size_t costNormal = size_t(double(expectedMaxIterationsNormal) * log2(expectedMaxIterationsNormal)); + if (costNormal >= totalItemsCount) { + // Check if it's more effective to iterate over all the items via btree, than select and sort ids via the most effective index return true; } - if (costNormal < costOptimized && !ctx.isForceAll && ctx.query.HasLimit()) { + size_t costOptimized = calculateOptimizedCost(costNormal, qentries, ctx, rdxCtx); + if (costNormal >= costOptimized) { + return true; // If max iterations count with btree indexes is better than with any other condition (including sort overhead) + } + if (expectedMaxIterationsNormal <= 150) { + return false; // If there is very good filtering condition (case for the issues #1489) + } + if (ctx.isForceAll || ctx.query.HasLimit()) { + if (expectedMaxIterationsNormal < 2000) { + return false; // Skip attempt to check limit if there is good enough unordered filtering condition + } + } + if (!ctx.isForceAll && ctx.query.HasLimit()) { // If optimization will be disabled, selecter will must to iterate over all the results, ignoring limit // Experimental value. It was chosen during debugging request from issue #1402. // TODO: It's possible to evaluate this multiplier, based on the query conditions, but the only way to avoid corner cases is to // allow user to hint this optimization. - constexpr static unsigned kLimitMultiplier = 20; + const size_t limitMultiplier = std::max(size_t(20), size_t(totalItemsCount / expectedMaxIterationsNormal) * 4); const auto offset = ctx.query.HasOffset() ? ctx.query.start : 1; - costOptimized = kLimitMultiplier * (ctx.query.count + offset); + costOptimized = limitMultiplier * (ctx.query.count + offset); } - return costOptimized <= costNormal; } +void NsSelecter::writeAggregationResultMergeSubQuery(LocalQueryResults &result, h_vector &aggregators, SelectCtx &ctx) { + if (result.aggregationResults.size() < aggregators.size()) { + throw Error(errQueryExec, "Merged query(%s) aggregators count (%d) does not match to the parent query aggregations (%d)", + ctx.query.GetSQL(false), aggregators.size(), result.aggregationResults.size()); + } + for (size_t i = 0; i < aggregators.size(); i++) { + AggregationResult r = aggregators[i].GetResult(); + AggregationResult &parentRes = result.aggregationResults[i]; + if (r.type != parentRes.type || r.fields != parentRes.fields) { + std::stringstream strParentRes; + std::stringstream strR; + throw Error(errQueryExec, "Aggregation incorrect ns %s type of parent %s type of query %s parent field %s query field %s", + ns_->name_, AggTypeToStr(parentRes.type), AggTypeToStr(r.type), parentRes.DumpFields(strParentRes).str(), + r.DumpFields(strR).str()); + } + switch (r.type) { + case AggSum: { + std::optional newVal = r.GetValue(); + std::optional curVal = parentRes.GetValue(); + if (newVal.has_value()) { + double cur = 0.0; + if (curVal.has_value()) { + cur = curVal.value(); + } + parentRes.SetValue(newVal.value() + cur); + } + break; + } + case AggMin: { + std::optional newVal = r.GetValue(); + std::optional curVal = parentRes.GetValue(); + if (newVal.has_value()) { + if (!curVal.has_value() || newVal.value() < curVal.value()) { + parentRes.SetValue(newVal.value()); + } + } + break; + } + case AggMax: { + std::optional newVal = r.GetValue(); + std::optional curVal = parentRes.GetValue(); + if (newVal.has_value()) { + if (!curVal.has_value() || newVal.value() > curVal.value()) { + parentRes.SetValue(newVal.value()); + } + } + break; + } + case AggAvg: + case AggFacet: + case AggDistinct: + case AggCount: + case AggCountCached: + case AggUnknown: + assertrx_throw(false); + } + } +} } // namespace reindexer diff --git a/cpp_src/core/nsselecter/nsselecter.h b/cpp_src/core/nsselecter/nsselecter.h index 53444f649..667f8640d 100644 --- a/cpp_src/core/nsselecter/nsselecter.h +++ b/cpp_src/core/nsselecter/nsselecter.h @@ -1,11 +1,15 @@ #pragma once #include "aggregator.h" #include "core/index/index.h" +#include "explaincalc.h" #include "joinedselector.h" #include "sortingcontext.h" namespace reindexer { +enum class IsMergeQuery : bool { Yes = true, No = false }; +enum class IsFTQuery { Yes, No, NotSet }; + struct SelectCtx { explicit SelectCtx(const Query &query_, const Query *parentQuery_) : query(query_), parentQuery(parentQuery_) {} const Query &query; @@ -13,6 +17,7 @@ struct SelectCtx { SelectFunctionsHolder *functions = nullptr; JoinPreResult::Ptr preResult; + ExplainCalc::Duration preResultTimeTotal = ExplainCalc::Duration::zero(); SortingContext sortingContext; uint8_t nsid = 0; bool isForceAll = false; @@ -21,9 +26,14 @@ struct SelectCtx { bool reqMatchedOnceFlag = false; bool contextCollectingMode = false; bool inTransaction = false; + IsMergeQuery isMergeQuery = IsMergeQuery::No; + IsFTQuery isFtQuery = IsFTQuery::NotSet; const Query *parentQuery = nullptr; + ExplainCalc explain; bool requiresCrashTracking = false; + + RX_ALWAYS_INLINE bool isMergeQuerySubQuery() const noexcept { return isMergeQuery == IsMergeQuery::Yes && parentQuery; } }; class ItemComparator; @@ -50,8 +60,8 @@ class NsSelecter { const QueryPreprocessor &qPreproc; h_vector &aggregators; ExplainCalc &explain; - unsigned start = 0; - unsigned count = UINT_MAX; + unsigned start = QueryEntry::kDefaultOffset; + unsigned count = QueryEntry::kDefaultLimit; bool preselectForFt = false; }; @@ -70,7 +80,7 @@ class NsSelecter { void addSelectResult(uint8_t proc, IdType rowId, IdType properRowId, SelectCtx &sctx, h_vector &aggregators, LocalQueryResults &result, bool preselectForFt); - h_vector getAggregators(const Query &) const; + h_vector getAggregators(const std::vector &aggEntrys, StrictMode strictMode) const; void setLimitAndOffset(ItemRefVector &result, size_t offset, size_t limit); void prepareSortingContext(SortingEntries &sortBy, SelectCtx &ctx, bool isFt, bool availableSelectBySortIndex); static void prepareSortIndex(const NamespaceImpl &, std::string &column, int &index, bool &skipSortingEntry, StrictMode); @@ -83,11 +93,14 @@ class NsSelecter { template void sortResults(LoopCtx &sctx, It begin, It end, const SortingOptions &sortingOptions, const joins::NamespaceResults *); + size_t calculateNormalCost(const QueryEntries &qe, SelectCtx &ctx, const RdxContext &rdxCtx); + size_t calculateOptimizedCost(size_t costNormal, const QueryEntries &qe, SelectCtx &ctx, const RdxContext &rdxCtx); bool isSortOptimizatonEffective(const QueryEntries &qe, SelectCtx &ctx, const RdxContext &rdxCtx); static bool validateField(StrictMode strictMode, std::string_view name, std::string_view nsName, const TagsMatcher &tagsMatcher); void checkStrictModeAgg(StrictMode strictMode, const std::string &name, const std::string &nsName, const TagsMatcher &tagsMatcher) const; + void writeAggregationResultMergeSubQuery(LocalQueryResults &result, h_vector &aggregators, SelectCtx &ctx); NamespaceImpl *ns_; SelectFunction::Ptr fnc_; FtCtx::Ptr ft_ctx_; diff --git a/cpp_src/core/nsselecter/qresexplainholder.h b/cpp_src/core/nsselecter/qresexplainholder.h new file mode 100644 index 000000000..249d9ea0c --- /dev/null +++ b/cpp_src/core/nsselecter/qresexplainholder.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include "selectiteratorcontainer.h" + +namespace reindexer { + +class QresExplainHolder { +public: + enum class ExplainEnabled : bool { Yes, No }; + + QresExplainHolder(SelectIteratorContainer& current, ExplainEnabled explainEnabled) noexcept + : current_(current), explainEnabled_(explainEnabled) {} + + void BackupContainer() { + if (explainEnabled_ == ExplainEnabled::Yes) { + if (data_) { + throw Error(errLogic, "Attempt to create second backup of the query results. This should not happen"); + } + data_ = std::make_unique(current_); + } + } + SelectIteratorContainer& GetResultsRef() noexcept { + if (!data_) { + return current_; + } + if (data_->result.Empty()) { + data_->result.OpenBracket(OpAnd); + data_->result.Append(data_->backup.begin(), data_->backup.end()); + data_->result.CloseBracket(); + data_->result.Append(current_.begin(), current_.end()); + } + return data_->result; + } + +private: + SelectIteratorContainer& current_; + class Data { + public: + Data(const SelectIteratorContainer& c) : backup(c) {} + + SelectIteratorContainer backup; + SelectIteratorContainer result; + }; + std::unique_ptr data_; + const ExplainEnabled explainEnabled_; +}; + +} // namespace reindexer diff --git a/cpp_src/core/nsselecter/querypreprocessor.cc b/cpp_src/core/nsselecter/querypreprocessor.cc index 5d7b92502..a7bffb666 100644 --- a/cpp_src/core/nsselecter/querypreprocessor.cc +++ b/cpp_src/core/nsselecter/querypreprocessor.cc @@ -1,36 +1,40 @@ #include "querypreprocessor.h" + #include "core/index/index.h" #include "core/index/indextext/indextext.h" #include "core/namespace/namespaceimpl.h" #include "core/nsselecter/joinedselector.h" #include "core/nsselecter/selectiteratorcontainer.h" #include "core/payload/fieldsset.h" +#include "core/query/dsl/dslencoder.h" #include "core/query/queryentry.h" #include "core/sorting/sortexpression.h" #include "estl/overloaded.h" #include "nsselecter.h" +#include "qresexplainholder.h" #include "substitutionhelpers.h" namespace reindexer { -QueryPreprocessor::QueryPreprocessor(QueryEntries &&queries, const Query &query, NamespaceImpl *ns, bool reqMatchedOnce, bool inTransaction) +QueryPreprocessor::QueryPreprocessor(QueryEntries &&queries, NamespaceImpl *ns, const SelectCtx &ctx) : QueryEntries(std::move(queries)), ns_(*ns), - strictMode_(inTransaction ? StrictModeNone : ((query.strictMode == StrictModeNotSet) ? ns_.config_.strictMode : query.strictMode)), - start_(query.start), - count_(query.count), - forcedSortOrder_(!query.forcedSortOrder_.empty()), - reqMatchedOnce_(reqMatchedOnce), - query_{query} { - if (forcedSortOrder_ && (start_ > 0 || count_ < UINT_MAX)) { - assertrx(!query.sortingEntries_.empty()); + query_{ctx.query}, + strictMode_(ctx.inTransaction ? StrictModeNone + : ((query_.strictMode == StrictModeNotSet) ? ns_.config_.strictMode : query_.strictMode)), + start_(query_.start), + count_(query_.count), + forcedSortOrder_(!query_.forcedSortOrder_.empty()), + reqMatchedOnce_(ctx.reqMatchedOnceFlag) { + if (forcedSortOrder_ && (start_ > QueryEntry::kDefaultOffset || count_ < QueryEntry::kDefaultLimit)) { + assertrx(!query_.sortingEntries_.empty()); static const std::vector emptyJoinedSelectors; - const auto &sEntry = query.sortingEntries_[0]; + const auto &sEntry = query_.sortingEntries_[0]; if (SortExpression::Parse(sEntry.expression, emptyJoinedSelectors).ByField()) { QueryEntry qe; - qe.values.reserve(query.forcedSortOrder_.size()); - for (const auto &v : query.forcedSortOrder_) qe.values.push_back(v); - qe.condition = query.forcedSortOrder_.size() == 1 ? CondEq : CondSet; + qe.values.reserve(query_.forcedSortOrder_.size()); + for (const auto &v : query_.forcedSortOrder_) qe.values.push_back(v); + qe.condition = query_.forcedSortOrder_.size() == 1 ? CondEq : CondSet; qe.index = sEntry.expression; if (!ns_.getIndexByNameOrJsonPath(qe.index, qe.idxNo)) { qe.idxNo = IndexValueType::SetByJsonPath; @@ -40,9 +44,17 @@ QueryPreprocessor::QueryPreprocessor(QueryEntries &&queries, const Query &query, queryEntryAddedByForcedSortOptimization_ = true; } } + if (ctx.isMergeQuery == IsMergeQuery::Yes) { + if (QueryEntry::kDefaultLimit - start_ > count_) { + count_ += start_; + } else { + count_ = QueryEntry::kDefaultLimit; + } + start_ = QueryEntry::kDefaultOffset; + } } -void QueryPreprocessor::ExcludeFtQuery(const SelectFunction &fnCtx, const RdxContext &rdxCtx) { +void QueryPreprocessor::ExcludeFtQuery(const RdxContext &rdxCtx) { if (queryEntryAddedByForcedSortOptimization_ || Size() <= 1) return; for (auto it = begin(), next = it, endIt = end(); it != endIt; it = next) { ++next; @@ -51,29 +63,20 @@ void QueryPreprocessor::ExcludeFtQuery(const SelectFunction &fnCtx, const RdxCon auto &index = ns_.indexes_[indexNo]; if (!IsFastFullText(index->Type())) continue; if (it->operation != OpAnd || (next != endIt && next->operation == OpOr) || !index->EnablePreselectBeforeFt()) break; - ftPreselect_ = index->FtPreselect(*this, indexNo, fnCtx, rdxCtx); - std::visit(overloaded{[&](const FtMergeStatuses &) { - start_ = 0; - count_ = UINT_MAX; - forcedSortOrder_ = false; - ftEntry_ = std::move(it->Value()); - const size_t pos = it.PlainIterator() - cbegin().PlainIterator(); - Erase(pos, pos + 1); - }, - [&](const PreselectedFtIdSetCache::Iterator &) { - const size_t pos = it.PlainIterator() - cbegin().PlainIterator(); - if (pos != 0) { - container_[0] = std::move(container_[pos]); - } - Erase(1, Size()); - }}, - *ftPreselect_); + ftPreselect_ = index->FtPreselect(rdxCtx); + start_ = QueryEntry::kDefaultOffset; + count_ = QueryEntry::kDefaultLimit; + forcedSortOrder_ = false; + ftEntry_ = std::move(it->Value()); + const size_t pos = it.PlainIterator() - cbegin().PlainIterator(); + Erase(pos, pos + 1); break; } } } -bool QueryPreprocessor::NeedNextEvaluation(unsigned start, unsigned count, bool &matchedAtLeastOnce) noexcept { +bool QueryPreprocessor::NeedNextEvaluation(unsigned start, unsigned count, bool &matchedAtLeastOnce, + QresExplainHolder &qresHolder) noexcept { if (evaluationsCount_++) return false; if (queryEntryAddedByForcedSortOptimization_) { container_.back().operation = desc_ ? OpAnd : OpNot; @@ -84,12 +87,12 @@ bool QueryPreprocessor::NeedNextEvaluation(unsigned start, unsigned count, bool return count_ || (reqMatchedOnce_ && !matchedAtLeastOnce); } else if (ftEntry_) { if (!matchedAtLeastOnce) return false; + qresHolder.BackupContainer(); start_ = query_.start; count_ = query_.count; forcedSortOrder_ = !query_.forcedSortOrder_.empty(); - Erase(1, container_.size()); - container_[0].SetValue(std::move(*ftEntry_)); - container_[0].operation = OpAnd; + clear(); + Append(OpAnd, std::move(*ftEntry_)); ftEntry_ = std::nullopt; matchedAtLeastOnce = false; equalPositions.clear(); @@ -118,6 +121,19 @@ void QueryPreprocessor::checkStrictMode(const std::string &index, int idxNo) con } } +class JoinOnExplainEnabled; +class JoinOnExplainDisabled; + +void QueryPreprocessor::InjectConditionsFromJoins(JoinedSelectors &js, OnConditionInjections &expalainOnInjections, + const RdxContext &rdxCtx) { + bool needExplain = query_.explain_ || query_.debugLevel >= LogInfo; + if (needExplain) { + injectConditionsFromJoins(0, container_.size(), js, expalainOnInjections, rdxCtx); + } else { + injectConditionsFromJoins(0, container_.size(), js, expalainOnInjections, rdxCtx); + } +} + void QueryPreprocessor::Reduce(bool isFt) { bool changed; do { @@ -190,10 +206,9 @@ void QueryPreprocessor::InitIndexNumbers() { }); } -size_t QueryPreprocessor::lookupQueryIndexes(size_t dst, const size_t srcBegin, const size_t srcEnd) { +size_t QueryPreprocessor::lookupQueryIndexes(uint16_t dst, uint16_t srcBegin, uint16_t srcEnd) { assertrx(dst <= srcBegin); - h_vector iidx(maxIndexes); - std::fill(iidx.begin(), iidx.begin() + maxIndexes, -1); + h_vector iidx(kMaxIndexes, uint16_t(0)); size_t merged = 0; for (size_t src = srcBegin, nextSrc; src < srcEnd; src = nextSrc) { nextSrc = Next(src); @@ -206,22 +221,24 @@ size_t QueryPreprocessor::lookupQueryIndexes(size_t dst, const size_t srcBegin, return true; }, [&](QueryEntry &entry) { - const bool isIndexField = (entry.idxNo != IndexValueType::SetByJsonPath); + const bool isIndexField = (entry.idxNo >= 0); if (isIndexField) { // try merge entries with AND opetator if ((GetOperation(src) == OpAnd) && (nextSrc >= srcEnd || GetOperation(nextSrc) != OpOr)) { - if (static_cast(entry.idxNo) >= iidx.size()) { + if (size_t(entry.idxNo) >= iidx.size()) { const auto oldSize = iidx.size(); - iidx.resize(entry.idxNo + 1); - std::fill(iidx.begin() + oldSize, iidx.begin() + iidx.size(), -1); + iidx.resize(size_t(entry.idxNo) + 1); + std::fill(iidx.begin() + oldSize, iidx.begin() + iidx.size(), 0); } - if (iidx[entry.idxNo] >= 0 && !ns_.indexes_[entry.idxNo]->Opts().IsArray()) { - if (mergeQueryEntries(iidx[entry.idxNo], src)) { + auto &iidxRef = iidx[entry.idxNo]; + if (iidxRef > 0 && !ns_.indexes_[entry.idxNo]->Opts().IsArray()) { + if (mergeQueryEntries(iidxRef - 1, src)) { ++merged; return false; } } else { - iidx[entry.idxNo] = dst; + assertrx_throw(dst < std::numeric_limits::max() - 1); + iidxRef = dst + 1; } } } @@ -289,29 +306,32 @@ const std::vector *QueryPreprocessor::getCompositeIndex(int field) const { return nullptr; } -static void createCompositeKeyValues(const h_vector, 4> &values, const PayloadType &plType, Payload *pl, +static void createCompositeKeyValues(const h_vector, 4> &values, const PayloadType &plType, Payload &pl, VariantArray &ret, unsigned n) { - PayloadValue d(plType.TotalSize()); - Payload pl1(plType, d); - if (!pl) pl = &pl1; - - assertrx(n < values.size()); const auto &v = values[n]; for (auto it = v.second.cbegin(), end = v.second.cend(); it != end; ++it) { - pl->Set(v.first, {*it}); + pl.Set(v.first, *it); if (n + 1 < values.size()) { createCompositeKeyValues(values, plType, pl, ret, n + 1); } else { - PayloadValue pv(*pl->Value()); + PayloadValue pv(*(pl.Value())); pv.Clone(); - ret.push_back(Variant(std::move(pv))); + ret.emplace_back(std::move(pv)); } } } +static void createCompositeKeyValues(const h_vector, 4> &values, const PayloadType &plType, + VariantArray &ret) { + PayloadValue d(plType.TotalSize()); + Payload pl(plType, d); + createCompositeKeyValues(values, plType, pl, ret, 0); +} + size_t QueryPreprocessor::substituteCompositeIndexes(const size_t from, const size_t to) { using composite_substitution_helpers::CompositeSearcher; using composite_substitution_helpers::EntriesRanges; + using composite_substitution_helpers::CompositeValuesCountLimits; size_t deleted = 0; CompositeSearcher searcher(ns_); @@ -342,25 +362,44 @@ size_t QueryPreprocessor::substituteCompositeIndexes(const size_t from, const si } EntriesRanges deleteRanges; - for (auto resIdx = searcher.GetResult(); resIdx >= 0; resIdx = searcher.RemoveAndGetNext(resIdx)) { + h_vector, 4> values; + auto resIdx = searcher.GetResult(); + while (resIdx >= 0) { auto &res = searcher[resIdx]; - h_vector, 4> values; + values.clear(); + uint32_t resultSetSize = 0; + uint32_t maxSetSize = 0; for (auto i : res.entries) { auto &qe = Get(i); - if (!res.fields.contains(qe.idxNo)) { + if rx_unlikely (!res.fields.contains(qe.idxNo)) { throw Error(errLogic, "Error during composite index's fields substitution (this should not happen)"); } - if (qe.condition == CondEq && qe.values.size() == 0) { - throw Error(errParams, "Condition EQ must have at least 1 argument, but provided 0"); + + maxSetSize = std::max(maxSetSize, qe.values.size()); + resultSetSize = (resultSetSize == 0) ? qe.values.size() : (resultSetSize * qe.values.size()); + } + static const CompositeValuesCountLimits kCompositeSetLimits; + if (resultSetSize != maxSetSize) { + // Do not perform substitution if result set size becoms larger than initial indexes set size + // and this size is greater than limit + // TODO: This is potential customization point for the user's hints system + if (resultSetSize > kCompositeSetLimits[res.entries.size()]) { + resIdx = searcher.RemoveUnusedAndGetNext(resIdx); + continue; } + } + for (auto i : res.entries) { + auto &qe = Get(i); + const auto idxKeyType = ns_.indexes_[qe.idxNo]->KeyType(); for (auto &v : qe.values) { - v.convert(ns_.indexes_[qe.idxNo]->KeyType()); + v.convert(idxKeyType); } values.emplace_back(qe.idxNo, std::move(qe.values)); } { QueryEntry ce(CondSet, ns_.indexes_[res.idx]->Name(), res.idx); - createCompositeKeyValues(values, ns_.payloadType_, nullptr, ce.values, 0); + ce.values.reserve(resultSetSize); + createCompositeKeyValues(values, ns_.payloadType_, ce.values); if (ce.values.size() == 1) { ce.condition = CondEq; } @@ -369,6 +408,7 @@ size_t QueryPreprocessor::substituteCompositeIndexes(const size_t from, const si container_[first].SetValue(std::move(ce)); } deleteRanges.Add(span(res.entries.data() + 1, res.entries.size() - 1)); + resIdx = searcher.RemoveUsedAndGetNext(resIdx); } for (auto rit = deleteRanges.rbegin(); rit != deleteRanges.rend(); ++rit) { Erase(rit->From(), rit->To()); @@ -471,30 +511,51 @@ bool QueryPreprocessor::mergeQueryEntries(size_t lhs, size_t rhs) { QueryEntry *lqe = &Get(lhs); QueryEntry &rqe = Get(rhs); if ((lqe->condition == CondEq || lqe->condition == CondSet) && (rqe.condition == CondEq || rqe.condition == CondSet)) { - // intersect 2 queryenries on same index - - convertWhereValues(lqe); - std::sort(lqe->values.begin(), lqe->values.end()); - lqe->values.erase(std::unique(lqe->values.begin(), lqe->values.end()), lqe->values.end()); - - convertWhereValues(&rqe); - std::sort(rqe.values.begin(), rqe.values.end()); - rqe.values.erase(std::unique(rqe.values.begin(), rqe.values.end()), rqe.values.end()); - + // intersect 2 queryentries on the same index + if rx_unlikely (lqe->values.empty()) { + return true; + } + if (container_[lhs].IsRef()) { + container_[lhs].SetValue(const_cast(*lqe)); + lqe = &Get(lhs); + } VariantArray setValues; - setValues.reserve(std::min(lqe->values.size(), rqe.values.size())); - std::set_intersection(lqe->values.begin(), lqe->values.end(), rqe.values.begin(), rqe.values.end(), std::back_inserter(setValues)); - if (setValues.empty()) { - container_[lhs].SetValue(AlwaysFalse{}); - } else { - if (container_[lhs].IsRef()) { - container_[lhs].SetValue(const_cast(*lqe)); - lqe = &Get(lhs); + if (rx_likely(!rqe.values.empty())) { + convertWhereValues(lqe); + convertWhereValues(&rqe); + VariantArray *first = &lqe->values; + VariantArray *second = &rqe.values; + if (lqe->values.size() > rqe.values.size()) { + std::swap(first, second); + } + setValues.reserve(first->size()); + constexpr size_t kMinArraySizeToUseHashSet = 250; + if (second->size() < kMinArraySizeToUseHashSet) { + // Intersect via binary search + sort for small vectors + std::sort(first->begin(), first->end()); + for (auto &&v : *second) { + if (std::binary_search(first->begin(), first->end(), v)) { + setValues.emplace_back(std::move(v)); + } + } + } else { + // Intersect via hash_set for large vectors + reindexer::fast_hash_set set; + set.reserve(first->size() * 2); + for (auto &&v : *first) { + set.emplace(std::move(v)); + } + for (auto &&v : *second) { + if (set.erase(v)) { + setValues.emplace_back(std::move(v)); + } + } } - lqe->condition = (setValues.size() == 1) ? CondEq : CondSet; - lqe->values = std::move(setValues); - lqe->distinct |= rqe.distinct; } + + lqe->values = std::move(setValues); + lqe->condition = (lqe->values.size() == 1) ? CondEq : CondSet; + lqe->distinct |= rqe.distinct; return true; } else if (rqe.condition == CondAny) { if (!lqe->distinct && rqe.distinct) { @@ -506,12 +567,13 @@ bool QueryPreprocessor::mergeQueryEntries(size_t lhs, size_t rhs) { } return true; } else if (lqe->condition == CondAny) { - rqe.distinct |= lqe->distinct; + const bool distinct = lqe->distinct || rqe.distinct; if (container_[rhs].IsRef()) { container_[lhs].SetValue(const_cast(rqe)); } else { container_[lhs].SetValue(std::move(rqe)); } + Get(lhs).distinct = distinct; return true; } @@ -532,9 +594,9 @@ void QueryPreprocessor::AddDistinctEntries(const h_vector &aggreg } } -void QueryPreprocessor::fillQueryEntryFromOnCondition(QueryEntry &queryEntry, NamespaceImpl &rightNs, Query joinQuery, - std::string joinIndex, CondType condition, KeyValueType valuesType, - const RdxContext &rdxCtx) { +void QueryPreprocessor::fillQueryEntryFromOnCondition(QueryEntry &queryEntry, std::string &explainStr, AggType &oAggType, + NamespaceImpl &rightNs, Query joinQuery, std::string joinIndex, CondType condition, + KeyValueType valuesType, const RdxContext &rdxCtx) { size_t limit; const auto &rNsCfg = rightNs.Config(); if (rNsCfg.maxPreselectSize == 0) { @@ -545,6 +607,7 @@ void QueryPreprocessor::fillQueryEntryFromOnCondition(QueryEntry &queryEntry, Na limit = std::min(std::max(rNsCfg.minPreselectSize, rightNs.ItemsCount() * rNsCfg.maxPreselectPart), rNsCfg.maxPreselectSize); } + joinQuery.explain_ = query_.explain_; joinQuery.count = limit + 2; joinQuery.start = 0; joinQuery.sortingEntries_.clear(); @@ -554,14 +617,17 @@ void QueryPreprocessor::fillQueryEntryFromOnCondition(QueryEntry &queryEntry, Na case CondEq: case CondSet: joinQuery.Distinct(std::move(joinIndex)); + oAggType = AggType::AggDistinct; break; case CondLt: case CondLe: joinQuery.Aggregate(AggMax, {std::move(joinIndex)}); + oAggType = AggType::AggMax; break; case CondGt: case CondGe: joinQuery.Aggregate(AggMin, {std::move(joinIndex)}); + oAggType = AggType::AggMin; break; case CondAny: case CondRange: @@ -571,11 +637,13 @@ void QueryPreprocessor::fillQueryEntryFromOnCondition(QueryEntry &queryEntry, Na case CondDWithin: throw Error(errParams, "Unsupported condition in ON statment: %s", CondTypeToStr(condition)); } + SelectCtx ctx{joinQuery, nullptr}; LocalQueryResults qr; rightNs.Select(qr, ctx, rdxCtx); if (qr.Count() > limit) return; assertrx(qr.aggregationResults.size() == 1); + explainStr = qr.explainResults; switch (condition) { case CondEq: case CondSet: { @@ -676,23 +744,70 @@ void QueryPreprocessor::fillQueryEntryFromOnCondition(QueryEntry &queryEntry, st } } -void QueryPreprocessor::injectConditionsFromJoins(size_t from, size_t to, JoinedSelectors &js, const RdxContext &rdxCtx) { +template +void QueryPreprocessor::briefDump(size_t from, size_t to, const std::vector &joinedSelectors, WrSerializer &ser) const { + { + for (auto it = from; it < to; it = Next(it)) { + if (it != from || container_[it].operation != OpAnd) { + ser << container_[it].operation << ' '; + } + container_[it].InvokeAppropriate( + [&](const QueryEntriesBracket &b) { + ser << "("; + briefDump(it + 1, Next(it), joinedSelectors, ser); + dumpEqualPositions(0, ser, b.equalPositions); + ser << ")"; + }, + [&ser](const QueryEntry &qe) { ser << qe.DumpBrief() << ' '; }, + [&joinedSelectors, &ser](const JoinQueryEntry &jqe) { ser << jqe.Dump(joinedSelectors) << ' '; }, + [&ser](const BetweenFieldsQueryEntry &qe) { ser << qe.Dump() << ' '; }, + [&ser](const AlwaysFalse &) { ser << "AlwaysFalse" << ' '; }); + } + } +} + +template +size_t QueryPreprocessor::injectConditionsFromJoins(size_t from, size_t to, JoinedSelectors &js, OnConditionInjections &explainOnInjections, + const RdxContext &rdxCtx) { + using namespace std::string_view_literals; + + size_t injectedCount = 0; for (size_t cur = from; cur < to; cur = Next(cur)) { container_[cur].InvokeAppropriate( Skip{}, - [&js, cur, this, &rdxCtx](const QueryEntriesBracket &) { injectConditionsFromJoins(cur + 1, Next(cur), js, rdxCtx); }, + [&](const QueryEntriesBracket &) { + size_t injCount = injectConditionsFromJoins(cur + 1, Next(cur), js, explainOnInjections, rdxCtx); + to += injCount; + injectedCount += injCount; + assertrx_throw(to <= container_.size()); + }, [&](const JoinQueryEntry &jqe) { assertrx(js.size() > jqe.joinIndex); + JoinedSelector &joinedSelector = js[jqe.joinIndex]; - const bool byValues = joinedSelector.PreResult()->dataMode == JoinPreResult::ModeValues; + const bool byValues = joinedSelector.PreResult() && joinedSelector.PreResult()->dataMode == JoinPreResult::ModeValues; + + auto explainJoinOn = ExplainPolicy::AppendJoinOnExplain(explainOnInjections); + explainJoinOn.Init(jqe, js, byValues); + + // Checking if we are able to preselect something from RightNs, or there are preselected results if (!byValues) { const auto &rNsCfg = joinedSelector.RightNs()->Config(); - if (rNsCfg.maxPreselectSize == 0 && rNsCfg.maxPreselectPart == 0.0) return; + if (rNsCfg.maxPreselectSize == 0 && rNsCfg.maxPreselectPart == 0.0) { + explainJoinOn.Skipped("maxPreselectSize and maxPreselectPart == 0"sv); + return; + } } else { - if (!joinedSelector.PreResult()->values.IsPreselectAllowed()) return; + if (!joinedSelector.PreResult()->values.IsPreselectAllowed()) { + explainJoinOn.Skipped("Preselect is not allowed"sv); + return; + } } - assertrx(joinedSelector.Type() == InnerJoin || joinedSelector.Type() == OrInnerJoin); + const auto &joinEntries = joinedSelector.joinQuery_.joinEntries_; + // LeftJoin-s shall not be in QueryEntries container_ by construction + assertrx(joinedSelector.Type() == InnerJoin || joinedSelector.Type() == OrInnerJoin); + // Checking if we have anything to inject into main Where clause bool foundANDOrOR = false; for (const auto &je : joinEntries) { if (je.op_ != OpNot) { @@ -700,21 +815,35 @@ void QueryPreprocessor::injectConditionsFromJoins(size_t from, size_t to, Joined break; } } - if (!foundANDOrOR) return; + if (!foundANDOrOR) { + explainJoinOn.Skipped("And or Or operators not found"sv); + return; + } + OpType op = GetOperation(cur); if (joinedSelector.Type() == OrInnerJoin) { if (op == OpNot) throw Error(errParams, "OR INNER JOIN with operation NOT"); op = OpOr; joinedSelector.SetType(InnerJoin); } + + // inserting Bracket for JoinQuery itself into ExpressionTree SetOperation(OpAnd, cur); + // !!!Warning jqe reference will be invalidated after EncloseInBracket EncloseInBracket(cur, cur + 1, op); ++cur; + + explainJoinOn.ReserveOnEntries(joinEntries.size()); + size_t count = 0; bool prevIsSkipped = false; size_t orChainLength = 0; for (size_t i = 0, s = joinEntries.size(); i < s; ++i) { const QueryJoinEntry &joinEntry = joinEntries[i]; + + auto explainEntry = explainJoinOn.AppendOnEntryExplain(); + explainEntry.InitialCondition(joinEntry, joinedSelector); + CondType condition = joinEntry.condition_; OpType operation = joinEntry.op_; switch (operation) { @@ -736,6 +865,7 @@ void QueryPreprocessor::injectConditionsFromJoins(size_t from, size_t to, Joined case CondEq: case CondSet: prevIsSkipped = true; + explainEntry.Skipped("Skipped due to condition Eq|Set with operation Not."sv); continue; case CondAny: case CondRange: @@ -748,13 +878,17 @@ void QueryPreprocessor::injectConditionsFromJoins(size_t from, size_t to, Joined operation = OpAnd; break; case OpOr: - if (prevIsSkipped) continue; + explainEntry.OrChainPart(true); + if (prevIsSkipped) { + continue; + } ++orChainLength; break; case OpAnd: orChainLength = 0; break; } + QueryEntry newEntry; newEntry.index = joinEntry.index_; newEntry.idxNo = IndexValueType::SetByJsonPath; @@ -765,6 +899,7 @@ void QueryPreprocessor::injectConditionsFromJoins(size_t from, size_t to, Joined valuesType = index.SelectKeyType(); collate = index.Opts().collateOpts_; } + if (byValues) { assertrx(joinedSelector.itemQuery_.entries.HoldsOrReferTo(i)); const QueryEntry &qe = joinedSelector.itemQuery_.entries.Get(i); @@ -786,6 +921,9 @@ void QueryPreprocessor::injectConditionsFromJoins(size_t from, size_t to, Joined case CondGe: { const QueryEntry &qe = joinedSelector.itemQuery_.entries.Get(i); skip = qe.idxNo != IndexValueType::SetByJsonPath && joinedSelector.RightNs()->indexes_[qe.idxNo]->IsUuid(); + if (skip) { + explainEntry.Skipped("Skipped due to condition Lt|Le|Gt|Ge with UUID index field."sv); + } break; } case CondEq: @@ -799,30 +937,157 @@ void QueryPreprocessor::injectConditionsFromJoins(size_t from, size_t to, Joined break; } if (!skip) { - fillQueryEntryFromOnCondition(newEntry, *joinedSelector.RightNs(), joinedSelector.JoinQuery(), - joinEntry.joinIndex_, condition, valuesType, rdxCtx); + std::string explainSelect; + AggType selectAggType; + fillQueryEntryFromOnCondition(newEntry, explainSelect, selectAggType, *joinedSelector.RightNs(), + joinedSelector.JoinQuery(), joinEntry.joinIndex_, condition, valuesType, rdxCtx); + explainEntry.ExplainSelect(std::move(explainSelect), selectAggType); } } + if (!newEntry.values.empty()) { + explainEntry.Succeed(newEntry); + Insert(cur, operation, std::move(newEntry)); ++cur; ++count; prevIsSkipped = false; } else { + explainEntry.Skipped("Skipped as cannot obtain values from right namespace."sv); if (operation == OpOr) { Erase(cur - orChainLength, cur); count -= orChainLength; + // Marking On-injections as fail for removed entries. + explainJoinOn.FailOnEntriesAsOrChain(orChainLength); } prevIsSkipped = true; } - } + } // end of entries processing + if (count > 0) { EncloseInBracket(cur - count, cur, OpAnd); + + explainJoinOn.Succeed( + [this, cur, count, &js](WrSerializer &ser) { briefDump(cur - count, Next(cur - count), js, ser); }); + ++cur; + injectedCount += count + 2; to += count + 2; } }); } + return injectedCount; } +class JoinOnExplainDisabled { + JoinOnExplainDisabled() noexcept = default; + struct OnEntryExplain { + OnEntryExplain() noexcept = default; + + RX_ALWAYS_INLINE void InitialCondition(const QueryJoinEntry &, const JoinedSelector &) const noexcept {} + RX_ALWAYS_INLINE void Succeed(const QueryEntry &) const noexcept {} + RX_ALWAYS_INLINE void Skipped(std::string_view) const noexcept {} + RX_ALWAYS_INLINE void OrChainPart(bool) const noexcept {} + RX_ALWAYS_INLINE void ExplainSelect(std::string &&, AggType) const noexcept {} + }; + +public: + [[nodiscard]] RX_ALWAYS_INLINE static JoinOnExplainDisabled AppendJoinOnExplain(OnConditionInjections &) noexcept { return {}; } + + RX_ALWAYS_INLINE void Init(const JoinQueryEntry &, const JoinedSelectors &, bool) const noexcept {} + RX_ALWAYS_INLINE void Succeed(const std::function &) const noexcept {} + RX_ALWAYS_INLINE void Skipped(std::string_view) const noexcept {} + RX_ALWAYS_INLINE void ReserveOnEntries(size_t) const noexcept {} + [[nodiscard]] RX_ALWAYS_INLINE OnEntryExplain AppendOnEntryExplain() const noexcept { return {}; } + + RX_ALWAYS_INLINE void FailOnEntriesAsOrChain(size_t) const noexcept {} +}; + +class JoinOnExplainEnabled { + using time_point_t = ExplainCalc::Clock::time_point; + struct OnEntryExplain { + OnEntryExplain(ConditionInjection &explainEntry) noexcept : startTime_(ExplainCalc::Clock::now()), explainEntry_(explainEntry) {} + ~OnEntryExplain() noexcept { explainEntry_.totalTime_ = ExplainCalc::Clock::now() - startTime_; } + OnEntryExplain(const OnEntryExplain &) = delete; + OnEntryExplain(OnEntryExplain &&) = delete; + OnEntryExplain &operator=(const OnEntryExplain &) = delete; + OnEntryExplain &operator=(OnEntryExplain &&) = delete; + + void InitialCondition(const QueryJoinEntry &joinEntry, const JoinedSelector &joinedSelector) { + explainEntry_.initCond = joinEntry.DumpCondition(joinedSelector); + } + void Succeed(const QueryEntry &newEntry) { + explainEntry_.succeed = true; + explainEntry_.reason = ""; + explainEntry_.newCond = newEntry.DumpBrief(); + explainEntry_.valuesCount = newEntry.values.size(); + } + + void Skipped(std::string_view reason) noexcept { + if (explainEntry_.reason.empty()) { + explainEntry_.reason = reason; + } + explainEntry_.succeed = false; + } + + void OrChainPart(bool orChainPart) noexcept { explainEntry_.orChainPart_ = orChainPart; } + void ExplainSelect(std::string &&explain, AggType aggType) noexcept { + explainEntry_.explain = std::move(explain); + explainEntry_.aggType = aggType; + } + + private: + time_point_t startTime_; + ConditionInjection &explainEntry_; + }; + + JoinOnExplainEnabled(const JoinOnExplainEnabled &) = delete; + JoinOnExplainEnabled(JoinOnExplainEnabled &&) = delete; + JoinOnExplainEnabled &operator=(const JoinOnExplainEnabled &) = delete; + JoinOnExplainEnabled &operator=(JoinOnExplainEnabled &&) = delete; + + JoinOnExplainEnabled(JoinOnInjection &joinOn) noexcept : explainJoinOn_(joinOn), startTime_(ExplainCalc::Clock::now()) {} + +public: + [[nodiscard]] static JoinOnExplainEnabled AppendJoinOnExplain(OnConditionInjections &explainOnInjections) { + return {explainOnInjections.emplace_back()}; + } + ~JoinOnExplainEnabled() noexcept { explainJoinOn_.totalTime_ = ExplainCalc::Clock::now() - startTime_; } + + void Init(const JoinQueryEntry &jqe, const JoinedSelectors &js, bool byValues) { + const JoinedSelector &joinedSelector = js[jqe.joinIndex]; + explainJoinOn_.rightNsName = joinedSelector.RightNsName(); + explainJoinOn_.joinCond = jqe.DumpOnCondition(js); + explainJoinOn_.type = byValues ? JoinOnInjection::ByValue : JoinOnInjection::Select; + } + void Succeed(const std::function &setInjectedCond) { + explainJoinOn_.succeed = true; + setInjectedCond(explainJoinOn_.injectedCond); + } + void Skipped(std::string_view reason) noexcept { + if (explainJoinOn_.reason.empty()) { + explainJoinOn_.reason = reason; + } + explainJoinOn_.succeed = false; + } + + void ReserveOnEntries(size_t count) { explainJoinOn_.conditions.reserve(count); } + [[nodiscard]] OnEntryExplain AppendOnEntryExplain() { return {explainJoinOn_.conditions.emplace_back()}; }; + + void FailOnEntriesAsOrChain(size_t orChainLength) { + using namespace std::string_view_literals; + auto &conditions = explainJoinOn_.conditions; + assertrx(conditions.size() >= orChainLength); + // Marking On-injections as fail for removed entries. + for (size_t jsz = conditions.size(), j = jsz - orChainLength; j < jsz; ++j) { + conditions[j].succeed = false; + conditions[j].orChainPart_ = true; + } + } + +private: + JoinOnInjection &explainJoinOn_; + time_point_t startTime_; +}; + } // namespace reindexer diff --git a/cpp_src/core/nsselecter/querypreprocessor.h b/cpp_src/core/nsselecter/querypreprocessor.h index da56c0b75..907636251 100644 --- a/cpp_src/core/nsselecter/querypreprocessor.h +++ b/cpp_src/core/nsselecter/querypreprocessor.h @@ -9,16 +9,17 @@ namespace reindexer { -class Index; class NamespaceImpl; -class SelectIteratorContainer; +class QresExplainHolder; class QueryPreprocessor : private QueryEntries { public: - QueryPreprocessor(QueryEntries &&, const Query &, NamespaceImpl *, bool reqMatchedOnce, bool inTransaction); + QueryPreprocessor(QueryEntries &&, NamespaceImpl *, const SelectCtx &); const QueryEntries &GetQueryEntries() const noexcept { return *this; } bool LookupQueryIndexes() { - const size_t merged = lookupQueryIndexes(0, 0, container_.size() - queryEntryAddedByForcedSortOptimization_); + const unsigned lookupEnd = queryEntryAddedByForcedSortOptimization_ ? container_.size() - 1 : container_.size(); + assertrx_throw(lookupEnd <= uint32_t(std::numeric_limits::max() - 1)); + const size_t merged = lookupQueryIndexes(0, 0, lookupEnd); if (queryEntryAddedByForcedSortOptimization_) { container_[container_.size() - merged - 1] = std::move(container_.back()); } @@ -41,24 +42,23 @@ class QueryPreprocessor : private QueryEntries { } void ConvertWhereValues() { convertWhereValues(begin(), end()); } void AddDistinctEntries(const h_vector &); - bool NeedNextEvaluation(unsigned start, unsigned count, bool &matchedAtLeastOnce) noexcept; + bool NeedNextEvaluation(unsigned start, unsigned count, bool &matchedAtLeastOnce, QresExplainHolder &qresHolder) noexcept; unsigned Start() const noexcept { return start_; } unsigned Count() const noexcept { return count_; } bool MoreThanOneEvaluation() const noexcept { return queryEntryAddedByForcedSortOptimization_; } bool AvailableSelectBySortIndex() const noexcept { return !queryEntryAddedByForcedSortOptimization_ || !forcedStage(); } - void InjectConditionsFromJoins(JoinedSelectors &js, const RdxContext &rdxCtx) { - injectConditionsFromJoins(0, container_.size(), js, rdxCtx); - } + void InjectConditionsFromJoins(JoinedSelectors &js, OnConditionInjections &expalainOnInjections, const RdxContext &rdxCtx); void Reduce(bool isFt); void InitIndexNumbers(); using QueryEntries::Size; using QueryEntries::Dump; + using QueryEntries::ToDsl; [[nodiscard]] SortingEntries GetSortingEntries(const SelectCtx &ctx) const; bool IsFtExcluded() const noexcept { return ftEntry_.has_value(); } - void ExcludeFtQuery(const SelectFunction &, const RdxContext &); + void ExcludeFtQuery(const RdxContext &); FtMergeStatuses &GetFtMergeStatuses() noexcept { assertrx(ftPreselect_); - return std::get(*ftPreselect_); + return *ftPreselect_; } FtPreselectT &&MoveFtPreselect() noexcept { assertrx(ftPreselect_); @@ -80,7 +80,7 @@ class QueryPreprocessor : private QueryEntries { [[nodiscard]] SortingEntries detectOptimalSortOrder() const; bool forcedStage() const noexcept { return evaluationsCount_ == (desc_ ? 1 : 0); } - size_t lookupQueryIndexes(size_t dst, size_t srcBegin, size_t srcEnd); + size_t lookupQueryIndexes(uint16_t dst, uint16_t srcBegin, uint16_t srcEnd); size_t substituteCompositeIndexes(size_t from, size_t to); bool mergeQueryEntries(size_t lhs, size_t rhs); const std::vector *getCompositeIndex(int field) const; @@ -89,9 +89,13 @@ class QueryPreprocessor : private QueryEntries { [[nodiscard]] const Index *findMaxIndex(QueryEntries::const_iterator begin, QueryEntries::const_iterator end) const; void findMaxIndex(QueryEntries::const_iterator begin, QueryEntries::const_iterator end, h_vector &foundIndexes) const; - void injectConditionsFromJoins(size_t from, size_t to, JoinedSelectors &, const RdxContext &); - void fillQueryEntryFromOnCondition(QueryEntry &, NamespaceImpl &rightNs, Query joinQuery, std::string joinIndex, CondType condition, - KeyValueType, const RdxContext &); + /** @brief recurrently checks and injects Join ON conditions + * @returns injected conditions and EntryBrackets count + */ + template + size_t injectConditionsFromJoins(size_t from, size_t to, JoinedSelectors &, OnConditionInjections &, const RdxContext &); + void fillQueryEntryFromOnCondition(QueryEntry &, std::string &outExplainStr, AggType &, NamespaceImpl &rightNs, Query joinQuery, + std::string joinIndex, CondType condition, KeyValueType, const RdxContext &); template void fillQueryEntryFromOnCondition(QueryEntry &, std::string_view joinIndex, CondType condition, const JoinedSelector &, KeyValueType, int rightIdxNo, const CollateOpts &); @@ -100,16 +104,19 @@ class QueryPreprocessor : private QueryEntries { size_t removeBrackets(size_t begin, size_t end); bool canRemoveBracket(size_t i) const; + template + void briefDump(size_t from, size_t to, const std::vector &joinedSelectors, WrSerializer &ser) const; + NamespaceImpl &ns_; + const Query &query_; StrictMode strictMode_; size_t evaluationsCount_ = 0; - unsigned start_ = 0; - unsigned count_ = UINT_MAX; + unsigned start_ = QueryEntry::kDefaultOffset; + unsigned count_ = QueryEntry::kDefaultLimit; bool queryEntryAddedByForcedSortOptimization_ = false; bool desc_ = false; bool forcedSortOrder_ = false; bool reqMatchedOnce_ = false; - const Query &query_; std::optional ftEntry_; std::optional ftPreselect_; }; diff --git a/cpp_src/core/nsselecter/selectiterator.cc b/cpp_src/core/nsselecter/selectiterator.cc index 719e86806..6a5cd3ba3 100644 --- a/cpp_src/core/nsselecter/selectiterator.cc +++ b/cpp_src/core/nsselecter/selectiterator.cc @@ -1,13 +1,14 @@ #include "selectiterator.h" + #include #include #include "core/index/indexiterator.h" namespace reindexer { -SelectIterator::SelectIterator(SelectKeyResult res, bool dist, std::string n, bool forcedFirst) - : SelectKeyResult(std::move(res)), distinct(dist), name(std::move(n)), forcedFirst_(forcedFirst), type_(Forward) {} +SelectIterator::SelectIterator(SelectKeyResult res, bool dist, std::string n, IteratorFieldKind fKind, bool forcedFirst) + : SelectKeyResult(std::move(res)), distinct(dist), name(std::move(n)), fieldKind(fKind), forcedFirst_(forcedFirst), type_(Forward) {} void SelectIterator::Bind(const PayloadType &type, int field) { for (Comparator &cmp : comparators_) cmp.Bind(type, field); @@ -77,7 +78,7 @@ void SelectIterator::Start(bool reverse, int maxIterations) { } // Generic next implementation -bool SelectIterator::nextFwd(IdType minHint) { +bool SelectIterator::nextFwd(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; int minVal = INT_MAX; for (auto it = begin(); it != end(); it++) { @@ -112,7 +113,7 @@ bool SelectIterator::nextFwd(IdType minHint) { return lastVal_ != INT_MAX; } -bool SelectIterator::nextRev(IdType maxHint) { +bool SelectIterator::nextRev(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; int maxVal = INT_MIN; @@ -145,7 +146,7 @@ bool SelectIterator::nextRev(IdType maxHint) { } // Single idset next implementation -bool SelectIterator::nextFwdSingleIdset(IdType minHint) { +bool SelectIterator::nextFwdSingleIdset(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; auto it = begin(); if (it->useBtree_) { @@ -167,7 +168,7 @@ bool SelectIterator::nextFwdSingleIdset(IdType minHint) { return !(lastVal_ == INT_MAX); } -bool SelectIterator::nextRevSingleIdset(IdType maxHint) { +bool SelectIterator::nextRevSingleIdset(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; auto it = begin(); @@ -185,8 +186,10 @@ bool SelectIterator::nextRevSingleIdset(IdType maxHint) { return !(lastVal_ == INT_MIN); } +bool SelectIterator::nextUnbuiltSortOrders() noexcept { return begin()->indexForwardIter_->Next(); } + // Single range next implementation -bool SelectIterator::nextFwdSingleRange(IdType minHint) { +bool SelectIterator::nextFwdSingleRange(IdType minHint) noexcept { if (minHint > lastVal_) lastVal_ = minHint - 1; if (lastVal_ < begin()->rBegin_) lastVal_ = begin()->rBegin_ - 1; @@ -196,7 +199,7 @@ bool SelectIterator::nextFwdSingleRange(IdType minHint) { return (lastVal_ != INT_MAX); } -bool SelectIterator::nextRevSingleRange(IdType maxHint) { +bool SelectIterator::nextRevSingleRange(IdType maxHint) noexcept { if (maxHint < lastVal_) lastVal_ = maxHint + 1; if (lastVal_ > begin()->rrBegin_) lastVal_ = begin()->rrBegin_ + 1; @@ -207,7 +210,7 @@ bool SelectIterator::nextRevSingleRange(IdType maxHint) { } // Unsorted next implementation -bool SelectIterator::nextUnsorted() { +bool SelectIterator::nextUnsorted() noexcept { if (lastIt_ == end()) { return false; } else if (lastIt_->it_ == lastIt_->end_) { @@ -230,8 +233,6 @@ bool SelectIterator::nextUnsorted() { return false; } -bool SelectIterator::nextUnbuiltSortOrders() { return begin()->indexForwardIter_->Next(); } - void SelectIterator::ExcludeLastSet(const PayloadValue &value, IdType rowId, IdType properRowId) { for (auto &comp : comparators_) comp.ExcludeDistinct(value, properRowId); if (type_ == UnbuiltSortOrdersIndex) { @@ -270,17 +271,28 @@ void SelectIterator::AppendAndBind(SelectKeyResult &other, const PayloadType &ty } double SelectIterator::Cost(int expectedIterations) const noexcept { - if (type_ == UnbuiltSortOrdersIndex) return -1; - if (forcedFirst_) return -GetMaxIterations(); + if (type_ == UnbuiltSortOrdersIndex) { + return -1; + } + if (forcedFirst_) { + return -GetMaxIterations(); + } double result{0.0}; if (!comparators_.empty()) { const auto jsonPathComparators = std::count_if(comparators_.begin(), comparators_.end(), [](const Comparator &c) noexcept { return c.HasJsonPaths(); }); // Comparatos with non index fields must have much higher cost, than comparators with index fields - result = jsonPathComparators ? (kNonIdxFieldComparatorCostMultiplier * expectedIterations + jsonPathComparators + 1) - : (expectedIterations + 1); + result = jsonPathComparators ? (kNonIdxFieldComparatorCostMultiplier * double(expectedIterations) + jsonPathComparators + 1) + : (double(expectedIterations) + 1); + } + if (distinct) { + result += size(); + } else if (type_ != SingleIdSetWithDeferedSort && type_ != RevSingleIdSetWithDeferedSort && !deferedExplicitSort) { + result += static_cast(GetMaxIterations()) * size(); + } else { + result += static_cast(CostWithDefferedSort(size(), GetMaxIterations(), expectedIterations)); } - return result + static_cast(distinct ? 1 : GetMaxIterations()) * size(); + return isNotOperation_ ? expectedIterations + result : result; } IdType SelectIterator::Val() const noexcept { diff --git a/cpp_src/core/nsselecter/selectiterator.h b/cpp_src/core/nsselecter/selectiterator.h index d51469ffd..55286e183 100644 --- a/cpp_src/core/nsselecter/selectiterator.h +++ b/cpp_src/core/nsselecter/selectiterator.h @@ -4,6 +4,7 @@ namespace reindexer { +enum class IteratorFieldKind { None, NonIndexed, Indexed }; /// Allows to iterate over a result of selecting /// data for one certain key. class SelectIterator : public SelectKeyResult { @@ -23,7 +24,7 @@ class SelectIterator : public SelectKeyResult { }; SelectIterator() = default; - SelectIterator(SelectKeyResult res, bool distinct, std::string name, bool forcedFirst = false); + SelectIterator(SelectKeyResult res, bool distinct, std::string name, IteratorFieldKind fieldKind, bool forcedFirst = false); /// Starts iteration process: prepares /// object for further work. @@ -32,11 +33,11 @@ class SelectIterator : public SelectKeyResult { void Start(bool reverse, int maxIterations); /// Signalizes if iteration is over. /// @return true if iteration is done. - inline bool End() const noexcept { return lastVal_ == (isReverse_ ? INT_MIN : INT_MAX) && !comparators_.size(); } + RX_ALWAYS_INLINE bool End() const noexcept { return lastVal_ == (isReverse_ ? INT_MIN : INT_MAX) && !comparators_.size(); } /// Iterates to a next item of result. /// @param minHint - rowId value to start from. /// @return true if operation succeed. - inline bool Next(IdType minHint) { + RX_ALWAYS_INLINE bool Next(IdType minHint) { bool res = false; switch (type_) { case Forward: @@ -73,7 +74,7 @@ class SelectIterator : public SelectKeyResult { } /// Sets Unsorted iteration mode - inline void SetUnsorted() noexcept { isUnsorted = true; } + RX_ALWAYS_INLINE void SetUnsorted() noexcept { isUnsorted = true; } /// Current rowId IdType Val() const noexcept; @@ -92,12 +93,13 @@ class SelectIterator : public SelectKeyResult { /// Uses each comparator to compare with pl. /// @param pl - PayloadValue to be compared. /// @param rowId - rowId. - inline bool TryCompare(const PayloadValue &pl, int rowId) noexcept { - for (auto &cmp : comparators_) + RX_ALWAYS_INLINE bool TryCompare(const PayloadValue &pl, int rowId) { + for (auto &cmp : comparators_) { if (cmp.Compare(pl, rowId)) { matchedCount_++; return true; } + } return false; } /// @return amonut of matched items @@ -120,6 +122,8 @@ class SelectIterator : public SelectKeyResult { /// cost goes before others. double Cost(int expectedIterations) const noexcept; + void SetNotOperationFlag(bool isNotOperation) noexcept { isNotOperation_ = isNotOperation; } + /// Switches SingleSelectKeyResult to btree search /// mode if it's more efficient than just comparing /// each object in sequence. @@ -132,19 +136,20 @@ class SelectIterator : public SelectKeyResult { bool distinct = false; std::string name; + IteratorFieldKind fieldKind; protected: // Iterates to a next item of result // depending on iterator type starting // from minHint which is the least rowId. - bool nextFwd(IdType minHint); - bool nextRev(IdType minHint); - bool nextFwdSingleRange(IdType minHint); - bool nextFwdSingleIdset(IdType minHint); - bool nextRevSingleRange(IdType minHint); - bool nextRevSingleIdset(IdType minHint); - bool nextUnbuiltSortOrders(); - bool nextUnsorted(); + bool nextFwd(IdType minHint) noexcept; + bool nextRev(IdType minHint) noexcept; + bool nextFwdSingleRange(IdType minHint) noexcept; + bool nextFwdSingleIdset(IdType minHint) noexcept; + bool nextRevSingleRange(IdType minHint) noexcept; + bool nextRevSingleIdset(IdType minHint) noexcept; + bool nextUnbuiltSortOrders() noexcept; + bool nextUnsorted() noexcept; /// Performs ID sets merge and sort in case, when this sort was defered earlier and still effective with current maxIterations value bool applyDeferedSort(int maxIterations) { @@ -161,9 +166,10 @@ class SelectIterator : public SelectKeyResult { bool isUnsorted = false; bool isReverse_ = false; bool forcedFirst_ = false; + bool isNotOperation_ = false; int type_ = 0; - IdType lastVal_ = INT_MIN; iterator lastIt_ = nullptr; + IdType lastVal_ = INT_MIN; IdType end_ = 0; int matchedCount_ = 0; }; diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.cc b/cpp_src/core/nsselecter/selectiteratorcontainer.cc index 94d76c0fe..f30cdfabe 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.cc +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.cc @@ -1,4 +1,6 @@ #include "selectiteratorcontainer.h" + +#include #include #include "core/namespace/namespaceimpl.h" #include "core/rdxcontext.h" @@ -15,9 +17,7 @@ void SelectIteratorContainer::SortByCost(int expectedIterations) { indexes.resize(container_.size()); costs.resize(container_.size()); } - for (size_t i = 0; i < container_.size(); ++i) { - indexes[i] = i; - } + std::iota(indexes.begin(), indexes.begin() + container_.size(), 0); sortByCost(indexes, costs, 0, container_.size(), expectedIterations); for (size_t i = 0; i < container_.size(); ++i) { if (indexes[i] != i) { @@ -190,6 +190,34 @@ SelectKeyResults SelectIteratorContainer::processQueryEntry(const QueryEntry &qe FieldsSet fields; TagsPath tagsPath = ns.tagsMatcher_.path2tag(qe.index); + + // TODO: it may be necessary to remove or change this switch after QueryEntry refactoring + switch (qe.condition) { + case CondAny: + case CondEmpty: + case CondAllSet: + case CondEq: + case CondSet: + break; + case CondRange: + case CondDWithin: + if (qe.values.size() != 2) { + throw Error(errParams, "For condition %s required exactly 2 arguments, but provided %d", CondTypeToStr(qe.condition), + qe.values.size()); + } + break; + case CondLt: + case CondLe: + case CondGt: + case CondGe: + case CondLike: + if (qe.values.size() != 1) { + throw Error(errParams, "For condition %s required exactly 1 argument, but provided %d", CondTypeToStr(qe.condition), + qe.values.size()); + } + break; + } + if (!tagsPath.empty()) { SelectKeyResult comparisonResult; fields.push_back(tagsPath); @@ -299,7 +327,14 @@ void SelectIteratorContainer::processJoinEntry(const JoinQueryEntry &jqe, OpType } void SelectIteratorContainer::processQueryEntryResults(SelectKeyResults &selectResults, OpType op, const NamespaceImpl &ns, - const QueryEntry &qe, bool isIndexFt, bool isIndexSparse, bool nonIndexField) { + const QueryEntry &qe, bool isIndexFt, bool isIndexSparse, bool nonIndexField, + std::optional nextOp) { + if (selectResults.empty()) { + if (op == OpAnd) { + Append(OpAnd, AlwaysFalse{}); + } + return; + } for (SelectKeyResult &res : selectResults) { switch (op) { case OpOr: { @@ -324,7 +359,9 @@ void SelectIteratorContainer::processQueryEntryResults(SelectKeyResults &selectR [[fallthrough]]; case OpNot: case OpAnd: - Append(op, SelectIterator(res, qe.distinct, qe.index, isIndexFt)); + // Iterator Field Kind: Query entry results. Field known. + Append(op, SelectIterator(res, qe.distinct, qe.index, + qe.idxNo < 0 ? IteratorFieldKind::NonIndexed : IteratorFieldKind::Indexed, isIndexFt)); if (!nonIndexField && !isIndexSparse) { // last appended is always a SelectIterator const auto lastAppendedIt = lastAppendedOrClosed(); @@ -333,8 +370,10 @@ void SelectIteratorContainer::processQueryEntryResults(SelectKeyResults &selectR } SelectIterator &lastAppended = lastAppendedIt->Value(); lastAppended.Bind(ns.payloadType_, qe.idxNo); - const int cur = lastAppended.GetMaxIterations(); - if (lastAppended.comparators_.empty()) { + lastAppended.SetNotOperationFlag(op == OpNot); + const auto maxIterations = lastAppended.GetMaxIterations(); + const int cur = op == OpNot ? ns.items_.size() - maxIterations : maxIterations; + if (lastAppended.comparators_.empty() && (!nextOp.has_value() || nextOp.value() != OpOr)) { if (cur && cur < maxIterations_) maxIterations_ = cur; if (!cur) wasZeroIterations_ = true; } @@ -511,7 +550,11 @@ bool SelectIteratorContainer::prepareIteratorsForSelectLoop(QueryPreprocessor &q selectResults = processQueryEntry(qe, enableSortIndexOptimize, ns, sortId, isQueryFt, selectFnc, isIndexFt, isIndexSparse, ftCtx, qPreproc, rdxCtx); } - processQueryEntryResults(selectResults, op, ns, qe, isIndexFt, isIndexSparse, nonIndexField); + std::optional nextOp; + if (next != end) { + nextOp = queries.GetOperation(next); + } + processQueryEntryResults(selectResults, op, ns, qe, isIndexFt, isIndexSparse, nonIndexField, nextOp); if (op != OpOr) { for (auto &ep : equalPositions) { const auto lastPosition = ep.queryEntriesPositions.back(); @@ -630,7 +673,13 @@ IdType SelectIteratorContainer::next(const_iterator it, IdType from) { return from; }, [from](const JoinSelectIterator &) { return from; }, [from](const FieldsComparator &) { return from; }, - [from](const AlwaysFalse &) { return from; }); + [](const AlwaysFalse &) { + if constexpr (reverse) { + return std::numeric_limits::lowest(); + } else { + return std::numeric_limits::max(); + } + }); } template diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.h b/cpp_src/core/nsselecter/selectiteratorcontainer.h index 556a70cdc..6b07c7275 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.h +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.h @@ -104,7 +104,7 @@ class SelectIteratorContainer void processField(FieldsComparator &, std::string_view field, int idxNo, const NamespaceImpl &ns) const; void processJoinEntry(const JoinQueryEntry &, OpType); void processQueryEntryResults(SelectKeyResults &selectResults, OpType, const NamespaceImpl &ns, const QueryEntry &qe, bool isIndexFt, - bool isIndexSparse, bool nonIndexField); + bool isIndexSparse, bool nonIndexField, std::optional nextOp); struct EqualPositions { h_vector queryEntriesPositions; size_t positionToInsertIterator = 0; diff --git a/cpp_src/core/nsselecter/substitutionhelpers.h b/cpp_src/core/nsselecter/substitutionhelpers.h index 8450a6b6c..f53fee54e 100644 --- a/cpp_src/core/nsselecter/substitutionhelpers.h +++ b/cpp_src/core/nsselecter/substitutionhelpers.h @@ -7,19 +7,35 @@ namespace reindexer { namespace composite_substitution_helpers { +class CompositeValuesCountLimits { +public: + uint32_t operator[](uint32_t fieldsCount) const noexcept { + if rx_unlikely (fieldsCount >= limits_.size()) { + return kMaxValuesCount; + } + return limits_[fieldsCount]; + } + +private: + constexpr static uint32_t kMaxValuesCount = 4000; + + std::array limits_ = {0, 0, 300, 1000, 2000, 4000}; +}; + class CompositeSearcher { public: struct IndexData { - IndexData(int field, int _idx, unsigned entry) : fields{field}, idx{_idx}, entries{entry} {} + IndexData(int field, int _idx, uint16_t entry) : fields(field), idx(_idx), entries{entry} {} IndexesFieldsSet fields; int idx; - h_vector entries; + h_vector entries; }; CompositeSearcher(const NamespaceImpl &ns) noexcept : ns_(ns) {} void Add(int field, const std::vector &composites, unsigned entry) { + assertrx_throw(entry < std::numeric_limits::max()); for (auto composite : composites) { const auto idxType = ns_.indexes_[composite]->Type(); if (idxType != IndexCompositeBTree && idxType != IndexCompositeHash) { @@ -46,9 +62,10 @@ class CompositeSearcher { auto &data = d_[i]; const auto &idxFields = ns_.indexes_[data.idx]->Fields(); // If all of the composite fields were found in query - if (data.fields.size() == idxFields.size() && idxFields.contains(data.fields)) { - if (data.fields.size() > maxSize) { - maxSize = data.fields.size(); + const auto dfCnt = data.fields.count(); + if (dfCnt == idxFields.size() && idxFields.contains(data.fields)) { + if (dfCnt > maxSize) { + maxSize = dfCnt; res = i; } } else { @@ -57,22 +74,32 @@ class CompositeSearcher { } return res; } - int RemoveAndGetNext(unsigned curId) noexcept { + int RemoveUnusedAndGetNext(uint16_t curId) noexcept { + if (unsigned(curId) + 1 != d_.size()) { + std::swap(d_[curId], d_.back()); + } + d_.pop_back(); + return GetResult(); + } + int RemoveUsedAndGetNext(uint16_t curId) noexcept { int res = -1; unsigned deleted = 1; unsigned maxSize = 0; - if (curId + 1 != d_.size()) { + if (unsigned(curId) + 1 != d_.size()) { std::swap(d_[curId], d_.back()); } const auto &cur = d_.back(); - for (unsigned i = 0; i < d_.size() - deleted; ++i) { + for (unsigned i = 0, sz = d_.size(); i < sz - deleted; ++i) { auto &data = d_[i]; if (haveIntersection(data.entries, cur.entries)) { - std::swap(data, d_[d_.size() - ++deleted]); + std::swap(data, d_[sz - ++deleted]); --i; - } else if (data.fields.size() > maxSize) { - res = i; - maxSize = data.fields.size(); + } else { + const auto dfCnt = data.fields.count(); + if (dfCnt > maxSize) { + res = i; + maxSize = dfCnt; + } } } while (deleted--) { @@ -80,16 +107,16 @@ class CompositeSearcher { } return res; } - const IndexData &operator[](unsigned i) const noexcept { return d_[i]; } + const IndexData &operator[](uint16_t i) const noexcept { return d_[i]; } private: - void remove(unsigned i) noexcept { - if (i + 1 != d_.size()) { + void remove(uint16_t i) noexcept { + if (unsigned(i) + 1 != d_.size()) { std::swap(d_[i], d_.back()); } d_.pop_back(); } - static bool haveIntersection(const h_vector &lEntries, const h_vector &rEntries) noexcept { + static bool haveIntersection(const h_vector &lEntries, const h_vector &rEntries) noexcept { for (auto lit = lEntries.begin(), rit = rEntries.begin(); lit != lEntries.end() && rit != rEntries.end();) { if (*lit < *rit) { ++lit; @@ -102,20 +129,20 @@ class CompositeSearcher { return false; } - h_vector d_; + h_vector d_; const NamespaceImpl &ns_; }; // EntriesRange - query entries range. [from; to) class EntriesRange { public: - EntriesRange(unsigned from, unsigned to) : from_(from), to_(to) { + EntriesRange(uint16_t from, uint16_t to) : from_(from), to_(to) { if (to_ <= from_) { throw Error(errLogic, "Unexpected range boarders during indexes substitution: [%u,%u)", from_, to_); } } - unsigned From() const noexcept { return from_; } - unsigned To() const noexcept { return to_; } + uint16_t From() const noexcept { return from_; } + uint16_t To() const noexcept { return to_; } void ExtendRight() noexcept { ++to_; } void ExtendLeft() { if (!from_) { @@ -130,11 +157,11 @@ class EntriesRange { } return false; } - unsigned Size() const noexcept { return to_ - from_; } + uint16_t Size() const noexcept { return to_ - from_; } private: - unsigned from_; - unsigned to_; + uint16_t from_; + uint16_t to_; }; // EntriesRanges - contains ordered vector of entries ranges. Ranges can not intercept with each other @@ -145,7 +172,7 @@ class EntriesRanges : h_vector { Base::const_reverse_iterator rbegin() const noexcept { return Base::rbegin(); } Base::const_reverse_iterator rend() const noexcept { return Base::rend(); } - void Add(span entries) { + void Add(span entries) { for (auto entry : entries) { auto insertionPos = Base::end(); bool wasMerged = false; @@ -188,7 +215,7 @@ class EntriesRanges : h_vector { } } if (!wasMerged) { - Base::insert(insertionPos, EntriesRange{entry, entry + 1}); + Base::insert(insertionPos, EntriesRange{entry, uint16_t(entry + 1)}); } } } diff --git a/cpp_src/core/parallelexecutor.h b/cpp_src/core/parallelexecutor.h index 113ae536b..7fda21a15 100644 --- a/cpp_src/core/parallelexecutor.h +++ b/cpp_src/core/parallelexecutor.h @@ -49,7 +49,15 @@ class ParallelExecutor { completionFunction(clientCount, clientCompl, clientErrors, shardId, mtx, cv, err); }); - Error err = std::invoke(f, results.back().connection, std::forward(args)...); + auto &conn = results.back().connection; + auto invokeWrap = [&f, &conn](auto &&...args) { return std::invoke(f, conn, std::forward(args)...); }; + + Error err; + // check whether it is necessary to pass the ShardId to the function + if constexpr (std::is_invocable_v) + err = invokeWrap(std::forward(args)..., shardId); + else + err = invokeWrap(std::forward(args)...); if (!err.ok()) { std::lock_guard lck(mtx); diff --git a/cpp_src/core/payload/fieldsset.cc b/cpp_src/core/payload/fieldsset.cc index 453b7e33d..80a6b85c6 100644 --- a/cpp_src/core/payload/fieldsset.cc +++ b/cpp_src/core/payload/fieldsset.cc @@ -4,15 +4,17 @@ namespace reindexer { [[noreturn]] void IndexesFieldsSet::throwMaxValueError(int f) { - static_assert(std::numeric_limits::digits >= maxIndexes, "mask_ needs to provide 'maxIndexes' bits or more"); - throw Error(errLogic, "Can not push_back(%d) to IndexesFieldsSet. Value must be in scope [-1,%d]", f, maxIndexes); + throw Error(errLogic, "Can not push_back(%d) to IndexesFieldsSet. Value must be in scope [-1,%d]", f, kMaxIndexes - 1); } FieldsSet::FieldsSet(const TagsMatcher &tagsMatcher, const h_vector &fields) : mask_(0) { - static_assert(std::numeric_limits::digits >= maxIndexes, "mask_ needs to provide 'maxIndexes' bits or more"); for (const std::string &str : fields) { tagsPaths_.emplace_back(tagsMatcher.path2tag(str)); } } +void FieldsSet::throwMaxValueError(int f) { + throw Error(errLogic, "Can not push_back(%d) to FieldsSet. Value must be in scope [-1,%d]", f, kMaxIndexes - 1); +} + } // namespace reindexer diff --git a/cpp_src/core/payload/fieldsset.h b/cpp_src/core/payload/fieldsset.h index dcb5e9a0c..3e6f49cef 100644 --- a/cpp_src/core/payload/fieldsset.h +++ b/cpp_src/core/payload/fieldsset.h @@ -10,38 +10,37 @@ namespace reindexer { class TagsMatcher; -static constexpr int maxIndexes = 64; +static constexpr int kMaxIndexes = 256; // 'tuple'-index always occupies 1 slot -using base_fields_set = h_vector; +using base_fields_set = h_vector; +static_assert(std::numeric_limits::max() >= kMaxIndexes, + "base_fields_set must be able to store any indexed field number"); +static_assert(std::numeric_limits::min() <= SetByJsonPath, + "base_fields_set must be able to store non-indexed fields"); +static_assert(sizeof(std::bitset) == 32, "Expecting no overhead from std::bitset"); using FieldsPath = std::variant; +using ScalarIndexesSetT = std::bitset; + class IndexesFieldsSet { public: IndexesFieldsSet() noexcept = default; - IndexesFieldsSet(std::initializer_list l) { - for (auto i : l) { - push_back(i); - } - } - bool contains(int f) const noexcept { return f >= 0 && f <= maxIndexes && (mask_ & (1ULL << f)); } + IndexesFieldsSet(int f) { push_back(f); } + bool contains(int f) const noexcept { return f >= 0 && f < kMaxIndexes && mask_.test(unsigned(f)); } void push_back(int f) { if (f < 0) return; - if (f > maxIndexes) { + if (f >= kMaxIndexes) { throwMaxValueError(f); } - if (!contains(f)) { - mask_ |= 1ULL << f; - ++count_; - } + mask_.set(unsigned(f)); } - uint64_t mask() const noexcept { return mask_; } - unsigned size() const noexcept { return count_; } + const std::bitset &mask() const &noexcept { return mask_; } + const std::bitset &mask() const && = delete; + unsigned count() const noexcept { return mask_.count(); } private: [[noreturn]] void throwMaxValueError(int f); - - uint64_t mask_ = 0; - unsigned count_ = 0; + std::bitset mask_; }; class FieldsSet : protected base_fields_set { @@ -53,13 +52,14 @@ class FieldsSet : protected base_fields_set { using base_fields_set::empty; using base_fields_set::operator[]; FieldsSet(const TagsMatcher &, const h_vector &fields); - FieldsSet(std::initializer_list l) : mask_(0) { + FieldsSet(int f) { push_back(f); } + FieldsSet(std::initializer_list l) { for (auto f : l) push_back(f); } - FieldsSet(std::initializer_list l) : mask_(0) { + FieldsSet(std::initializer_list l) { for (const TagsPath &tagsPath : l) push_back(tagsPath); } - FieldsSet(std::initializer_list l) : mask_(0) { + FieldsSet(std::initializer_list l) { for (const IndexedTagsPath &tagsPath : l) push_back(tagsPath); } FieldsSet() = default; @@ -115,38 +115,42 @@ class FieldsSet : protected base_fields_set { } void push_back(int f) { - if (f == IndexValueType::SetByJsonPath) return; - assertrx(f < maxIndexes); + if (f < 0) return; + if (f >= kMaxIndexes) { + throwMaxValueError(f); + } if (!contains(f)) { - mask_ |= 1ULL << f; + mask_.set(unsigned(f)); base_fields_set::push_back(f); } } void push_front(int f) { - if (f == IndexValueType::SetByJsonPath) return; - assertrx(f < maxIndexes); + if (f < 0) return; + if (f >= kMaxIndexes) { + throwMaxValueError(f); + } if (!contains(f)) { - mask_ |= 1ULL << f; + mask_.set(unsigned(f)); base_fields_set::insert(begin(), f); } } void erase(int f) { - bool byJsonPath = (f == IndexValueType::SetByJsonPath); + const bool byJsonPath = (f < 0); if (byJsonPath || contains(f)) { auto it = std::find(begin(), end(), f); assertrx(it != end()); base_fields_set::erase(it); - if (!byJsonPath) mask_ &= ~(1ULL << f); + if (!byJsonPath) mask_.reset(unsigned(f)); } } - bool contains(int f) const noexcept { return mask_ & (1ULL << f); } - bool contains(const FieldsSet &f) const noexcept { return mask_ && ((mask_ & f.mask_) == f.mask_); } - bool contains(const std::string &jsonPath) const noexcept { + bool contains(int f) const noexcept { return f >= 0 && f < kMaxIndexes && mask_.test(unsigned(f)); } + bool contains(const FieldsSet &f) const noexcept { return (mask_ & f.mask_) == f.mask_; } + bool contains(std::string_view jsonPath) const noexcept { return std::find(jsonPaths_.begin(), jsonPaths_.end(), jsonPath) != jsonPaths_.end(); } - bool contains(const IndexesFieldsSet &f) const noexcept { return mask_ && ((mask_ & f.mask()) == f.mask()); } + bool contains(const IndexesFieldsSet &f) const noexcept { return (mask_ & f.mask()) == f.mask(); } bool contains(const TagsPath &tagsPath) const noexcept { for (const FieldsPath &path : tagsPaths_) { if (path.index() == 0) { @@ -194,7 +198,7 @@ class FieldsSet : protected base_fields_set { base_fields_set::clear(); tagsPaths_.clear(); jsonPaths_.clear(); - mask_ = 0; + mask_.reset(); } size_t getTagsPathsLength() const noexcept { return tagsPaths_.size(); } @@ -204,9 +208,12 @@ class FieldsSet : protected base_fields_set { assertrx(idx < tagsPaths_.size()); return (tagsPaths_[idx].index() == 1); } - const TagsPath &getTagsPath(size_t idx) const { return std::get(tagsPaths_[idx]); } - const IndexedTagsPath &getIndexedTagsPath(size_t idx) const { return std::get(tagsPaths_[idx]); } - const std::string &getJsonPath(size_t idx) const { return jsonPaths_[idx]; } + const TagsPath &getTagsPath(size_t idx) const & { return std::get(tagsPaths_[idx]); } + const TagsPath &getTagsPath(size_t idx) const && = delete; + const IndexedTagsPath &getIndexedTagsPath(size_t idx) const & { return std::get(tagsPaths_[idx]); } + const IndexedTagsPath &getIndexedTagsPath(size_t idx) const && = delete; + const std::string &getJsonPath(size_t idx) const &noexcept { return jsonPaths_[idx]; } + const std::string &getJsonPath(size_t idx) const && = delete; bool operator==(const FieldsSet &f) const noexcept { return (mask_ == f.mask_) && (tagsPaths_ == f.tagsPaths_) && (jsonPaths_ == jsonPaths_); @@ -221,7 +228,7 @@ class FieldsSet : protected base_fields_set { if (it != b) os << ", "; os << *it; } - os << "], mask: " << std::bitset<64>{mask_} << ", tagsPaths: ["; + os << "], mask: " << mask_ << ", tagsPaths: ["; for (auto b = tagsPaths_.cbegin(), it = b, e = tagsPaths_.cend(); it != e; ++it) { if (it != b) os << ", "; std::visit(fieldsPathDumper, *it); @@ -243,8 +250,9 @@ class FieldsSet : protected base_fields_set { } return (i == count); } + [[noreturn]] void throwMaxValueError(int f); - uint64_t mask_ = 0; + std::bitset mask_; h_vector tagsPaths_; /// Json paths to non indexed fields. /// Necessary only for composite full text diff --git a/cpp_src/core/payload/payloadfieldvalue.cc b/cpp_src/core/payload/payloadfieldvalue.cc index 8a0ccf296..e9a0d6a06 100644 --- a/cpp_src/core/payload/payloadfieldvalue.cc +++ b/cpp_src/core/payload/payloadfieldvalue.cc @@ -1,82 +1,10 @@ #include "payloadfieldvalue.h" -#include "core/keyvalue/p_string.h" -#include "core/keyvalue/uuid.h" -#include "estl/one_of.h" -#include "tools/stringstools.h" namespace reindexer { -void PayloadFieldValue::Set(Variant kv) { - t_.Type().EvaluateOneOf(overloaded{[&kv](KeyValueType::Int64) { - if (kv.Type().Is()) kv.convert(KeyValueType::Int64{}); - }, - [&kv](KeyValueType::Int) { - if (kv.Type().Is()) kv.convert(KeyValueType::Int{}); - }, - [&kv](KeyValueType::Uuid) { - if (kv.Type().Is()) kv.convert(KeyValueType::Uuid{}); - }, - [](OneOf) noexcept {}}); - if (!kv.Type().IsSame(t_.Type())) { - throw Error(errLogic, "PayloadFieldValue::Set field '%s' type mismatch. passed '%s', expected '%s'\n", t_.Name(), kv.Type().Name(), - t_.Type().Name()); - } - - t_.Type().EvaluateOneOf([&](KeyValueType::Int) noexcept { *reinterpret_cast(p_) = int(kv); }, - [&](KeyValueType::Bool) noexcept { *reinterpret_cast(p_) = bool(kv); }, - [&](KeyValueType::Int64) noexcept { *reinterpret_cast(p_) = int64_t(kv); }, - [&](KeyValueType::Double) noexcept { *reinterpret_cast(p_) = double(kv); }, - [&](KeyValueType::String) noexcept { *reinterpret_cast(p_) = p_string(kv); }, - [&](KeyValueType::Uuid) noexcept { *reinterpret_cast(p_) = Uuid{kv}; }, - [](OneOf) noexcept { - assertrx(0); - abort(); - }); -} - -Variant PayloadFieldValue::Get(bool enableHold) const { - return t_.Type().EvaluateOneOf( - [&](KeyValueType::Bool) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::Int) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::Int64) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::Double) noexcept { return Variant(*reinterpret_cast(p_)); }, - [&](KeyValueType::String) { return Variant(*reinterpret_cast(p_), enableHold); }, - [&](KeyValueType::Uuid) noexcept { return Variant(*reinterpret_cast(p_)); }, - [](OneOf) noexcept -> Variant { - assertrx(0); - abort(); - }); -} -size_t PayloadFieldValue::Hash() const noexcept { - return t_.Type().EvaluateOneOf( - [&](KeyValueType::Bool) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::Int) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::Int64) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::Double) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::String) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [&](KeyValueType::Uuid) noexcept { return std::hash()(*reinterpret_cast(p_)); }, - [](OneOf) noexcept -> size_t { - assertrx(0); - abort(); - }); -} - -bool PayloadFieldValue::IsEQ(const PayloadFieldValue &o) const { - if (!t_.Type().IsSame(o.t_.Type())) return false; - return t_.Type().EvaluateOneOf( - [&](KeyValueType::Bool) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [&](KeyValueType::Int) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [&](KeyValueType::Int64) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [&](KeyValueType::Double) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [&](KeyValueType::String) { - return collateCompare(*reinterpret_cast(p_), *reinterpret_cast(o.p_), CollateOpts()) == 0; - }, - [&](KeyValueType::Uuid) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, - [](OneOf) noexcept -> bool { - assertrx(0); - abort(); - }); +void PayloadFieldValue::throwSetTypeMissmatch(const Variant& kv) { + throw Error(errLogic, "PayloadFieldValue::Set field '%s' type mismatch. passed '%s', expected '%s'\n", t_.Name(), kv.Type().Name(), + t_.Type().Name()); } } // namespace reindexer diff --git a/cpp_src/core/payload/payloadfieldvalue.h b/cpp_src/core/payload/payloadfieldvalue.h index 7ab1bf76f..011b01ee8 100644 --- a/cpp_src/core/payload/payloadfieldvalue.h +++ b/cpp_src/core/payload/payloadfieldvalue.h @@ -1,7 +1,11 @@ #pragma once +#include "core/keyvalue/p_string.h" +#include "core/keyvalue/uuid.h" #include "core/keyvalue/variant.h" +#include "estl/one_of.h" #include "payloadfieldtype.h" +#include "tools/stringstools.h" namespace reindexer { @@ -15,15 +19,87 @@ class PayloadFieldValue { // Construct object PayloadFieldValue(const PayloadFieldType &t, uint8_t *v) noexcept : t_(t), p_(v) {} // Single value operations - void Set(Variant kv); - Variant Get(bool enableHold = false) const; - size_t Hash() const noexcept; - bool IsEQ(const PayloadFieldValue &o) const; + void Set(Variant kv) { + t_.Type().EvaluateOneOf(overloaded{[&kv](KeyValueType::Int64) { + if (kv.Type().Is()) kv.convert(KeyValueType::Int64{}); + }, + [&kv](KeyValueType::Int) { + if (kv.Type().Is()) kv.convert(KeyValueType::Int{}); + }, + [&kv](KeyValueType::Uuid) { + if (kv.Type().Is()) kv.convert(KeyValueType::Uuid{}); + }, + [](OneOf) noexcept {}}); + if (!kv.Type().IsSame(t_.Type())) { + throwSetTypeMissmatch(kv); + } + + t_.Type().EvaluateOneOf( + [&](KeyValueType::Int) noexcept { *reinterpret_cast(p_) = int(kv); }, + [&](KeyValueType::Bool) noexcept { *reinterpret_cast(p_) = bool(kv); }, + [&](KeyValueType::Int64) noexcept { *reinterpret_cast(p_) = int64_t(kv); }, + [&](KeyValueType::Double) noexcept { *reinterpret_cast(p_) = double(kv); }, + [&](KeyValueType::String) noexcept { *reinterpret_cast(p_) = p_string(kv); }, + [&](KeyValueType::Uuid) noexcept { *reinterpret_cast(p_) = Uuid{kv}; }, + [](OneOf) noexcept { + assertrx(0); + abort(); + }); + } + Variant Get(bool enableHold = false) const { + return t_.Type().EvaluateOneOf( + [&](KeyValueType::Bool) noexcept { return Variant(*reinterpret_cast(p_)); }, + [&](KeyValueType::Int) noexcept { return Variant(*reinterpret_cast(p_)); }, + [&](KeyValueType::Int64) noexcept { return Variant(*reinterpret_cast(p_)); }, + [&](KeyValueType::Double) noexcept { return Variant(*reinterpret_cast(p_)); }, + [&](KeyValueType::String) { return Variant(*reinterpret_cast(p_), enableHold); }, + [&](KeyValueType::Uuid) noexcept { return Variant(*reinterpret_cast(p_)); }, + [](OneOf) noexcept -> Variant { + assertrx(0); + abort(); + }); + } + size_t Hash() const noexcept { + return t_.Type().EvaluateOneOf( + [&](KeyValueType::Bool) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::Int) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::Int64) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::Double) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::String) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [&](KeyValueType::Uuid) noexcept { return std::hash()(*reinterpret_cast(p_)); }, + [](OneOf) noexcept -> size_t { + assertrx(0); + abort(); + }); + } + bool IsEQ(const PayloadFieldValue &o) const { + if (!t_.Type().IsSame(o.t_.Type())) return false; + return t_.Type().EvaluateOneOf( + [&](KeyValueType::Bool) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, + [&](KeyValueType::Int) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, + [&](KeyValueType::Int64) noexcept { + return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); + }, + [&](KeyValueType::Double) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, + [&](KeyValueType::String) { + return collateCompare(*reinterpret_cast(p_), *reinterpret_cast(o.p_), + SortingPrioritiesTable()) == 0; + }, + [&](KeyValueType::Uuid) noexcept { return *reinterpret_cast(p_) == *reinterpret_cast(o.p_); }, + [](OneOf) noexcept -> bool { + assertrx(0); + abort(); + }); + } // Type of value, not owning const PayloadFieldType &t_; // Value data, not owning uint8_t *p_; + +private: + [[noreturn]] void throwSetTypeMissmatch(const Variant &kv); }; } // namespace reindexer diff --git a/cpp_src/core/payload/payloadiface.cc b/cpp_src/core/payload/payloadiface.cc index ac2c430ea..b70d716b7 100644 --- a/cpp_src/core/payload/payloadiface.cc +++ b/cpp_src/core/payload/payloadiface.cc @@ -93,7 +93,7 @@ void PayloadIface::GetByJsonPath(const IndexedTagsPath &tagsPath, VariantArra ConstPayload pl(t_, *v_); FieldsSet filter({tagsPath}); BaseEncoder encoder(nullptr, &filter); - krefs.clear(); + krefs.Clear(); if (!tagsPath.empty()) { FieldsExtractor extractor(&krefs, expectedType, tagsPath.size(), &filter); encoder.Encode(pl, extractor); @@ -101,52 +101,26 @@ void PayloadIface::GetByJsonPath(const IndexedTagsPath &tagsPath, VariantArra } template -VariantArray PayloadIface::GetIndexedArrayData(const IndexedTagsPath &tagsPath, int &offset, int &size) const { +VariantArray PayloadIface::GetIndexedArrayData(const IndexedTagsPath &tagsPath, int field, int &offset, int &size) const { if (tagsPath.empty()) { throw Error(errParams, "GetIndexedArrayData(): tagsPath shouldn't be empty!"); } - + if (field < 0 || field >= kMaxIndexes) { + throw Error(errParams, "GetIndexedArrayData(): field must be a valid index number"); + } VariantArray values; FieldsSet filter({tagsPath}); BaseEncoder encoder(nullptr, &filter); - FieldsExtractor extractor(&values, KeyValueType::Undefined{}, tagsPath.size(), &filter, &offset, &size); + offset = -1; + size = -1; + FieldsExtractor::FieldParams params{.index = offset, .length = size, .field = field}; + FieldsExtractor extractor(&values, KeyValueType::Undefined{}, tagsPath.size(), &filter, ¶ms); ConstPayload pl(t_, *v_); encoder.Encode(pl, extractor); return values; } -// Set element or array by field index -template -template ::value>::type *> -void PayloadIface::Set(std::string_view field, const VariantArray &keys, bool append) { - return Set(t_.FieldByName(field), keys, append); -} - -template -template ::value>::type *> -void PayloadIface::Set(int field, const VariantArray &keys, bool append) { - const auto size = keys.size(); - if (!t_.Field(field).IsArray() && size >= 1) { - Field(field).Set(keys[0]); - return; - } - - if (keys.IsNullValue()) { - ResizeArray(field, 0, append); - return; - } - - int pos = ResizeArray(field, size, append); - auto const *const arr = reinterpret_cast(Field(field).p_); - const auto elemSize = t_.Field(field).ElemSizeof(); - - for (const Variant &kv : keys) { - PayloadFieldValue pv(t_.Field(field), v_->Ptr() + arr->offset + (pos++) * elemSize); - pv.Set(kv); - } -} - template template ::value>::type *> void PayloadIface::SetSingleElement(int field, const Variant &key) { @@ -217,11 +191,6 @@ size_t PayloadIface::RealSize() const { return sz; } -template -PayloadFieldValue PayloadIface::Field(int field) const noexcept { - return PayloadFieldValue(t_.Field(field), v_->Ptr() + t_.Field(field).Offset()); -} - // Serialize field values template void PayloadIface::SerializeFields(WrSerializer &ser, const FieldsSet &fields) const { @@ -522,6 +491,24 @@ void PayloadIface::copyOrMoveStrings(int field, StrHolder &dest, bool copy) { } } +template +template ::value>::type *> +void PayloadIface::setArray(int field, const VariantArray &keys, bool append) { + if (keys.IsNullValue()) { + ResizeArray(field, 0, append); + return; + } + + int pos = ResizeArray(field, keys.size(), append); + auto const *const arr = reinterpret_cast(Field(field).p_); + const auto elemSize = t_.Field(field).ElemSizeof(); + + for (const Variant &kv : keys) { + PayloadFieldValue pv(t_.Field(field), v_->Ptr() + arr->offset + (pos++) * elemSize); + pv.Set(kv); + } +} + template void PayloadIface::MoveStrings(int field, StringsHolder &dest) { copyOrMoveStrings(field, dest, false); diff --git a/cpp_src/core/payload/payloadiface.h b/cpp_src/core/payload/payloadiface.h index 72117ebf5..345622e56 100644 --- a/cpp_src/core/payload/payloadiface.h +++ b/cpp_src/core/payload/payloadiface.h @@ -54,7 +54,29 @@ class PayloadIface { // Set element or array by field index template ::value>::type * = nullptr> - void Set(int field, const VariantArray &keys, bool append = false); + void Set(int field, const VariantArray &keys, bool append = false) { + if (!t_.Field(field).IsArray() && keys.size() >= 1) { + Field(field).Set(keys[0]); + } else { + setArray(field, keys, append); + } + } + template ::value>::type * = nullptr> + void Set(int field, const Variant &key, bool append = false) { + if (t_.Field(field).IsArray()) { + Set(field, VariantArray{key}, append); + return; + } + Field(field).Set(key); + } + template ::value>::type * = nullptr> + void Set(int field, Variant &&key, bool append = false) { + if (t_.Field(field).IsArray()) { + Set(field, VariantArray{std::move(key)}, append); + return; + } + Field(field).Set(std::move(key)); + } // Set non-array element by field index template ::value>::type * = nullptr> @@ -62,7 +84,17 @@ class PayloadIface { // Set element or array by field index template ::value>::type * = nullptr> - void Set(std::string_view field, const VariantArray &keys, bool append = false); + void Set(std::string_view field, const VariantArray &keys, bool append = false) { + return Set(t_.FieldByName(field), keys, append); + } + template ::value>::type * = nullptr> + void Set(std::string_view field, const Variant &key, bool append = false) { + return Set(t_.FieldByName(field), key, append); + } + template ::value>::type * = nullptr> + void Set(std::string_view field, Variant &&key, bool append = false) { + return Set(t_.FieldByName(field), std::move(key), append); + } // Set element or array by field index and element index template ::value>::type * = nullptr> @@ -80,7 +112,7 @@ class PayloadIface { void GetByJsonPath(std::string_view jsonPath, TagsMatcher &tagsMatcher, VariantArray &, KeyValueType expectedType) const; void GetByJsonPath(const TagsPath &jsonPath, VariantArray &, KeyValueType expectedType) const; void GetByJsonPath(const IndexedTagsPath &jsonPath, VariantArray &, KeyValueType expectedType) const; - VariantArray GetIndexedArrayData(const IndexedTagsPath &jsonPath, int &offset, int &size) const; + VariantArray GetIndexedArrayData(const IndexedTagsPath &jsonPath, int field, int &offset, int &size) const; // Get fields count int NumFields() const noexcept { return t_.NumFields(); } @@ -113,7 +145,7 @@ class PayloadIface { TagsMatcher &rtm, bool lForceByJsonPath, bool rForceByJsonPath) const; // Get PayloadFieldValue by field index - PayloadFieldValue Field(int field) const noexcept; + PayloadFieldValue Field(int field) const noexcept { return PayloadFieldValue(t_.Field(field), v_->Ptr() + t_.Field(field).Offset()); } // Add refs to strings - make payload value complete self holding void AddRefStrings() noexcept; @@ -138,6 +170,8 @@ class PayloadIface { T CopyWithRemovedFields(PayloadType t); template void copyOrMoveStrings(int field, StrHolder &dest, bool copy); + template ::value>::type * = nullptr> + void setArray(int field, const VariantArray &keys, bool append); // Array of elements types , not owning const PayloadTypeImpl &t_; diff --git a/cpp_src/core/proxycallback.h b/cpp_src/core/proxycallback.h new file mode 100644 index 000000000..85ea0835d --- /dev/null +++ b/cpp_src/core/proxycallback.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include +#include "tools/errors.h" + +namespace reindexer { +class RdxContext; +} + +namespace reindexer::proxycb { + +enum class Type { + kSharding, + kCluster, +}; + +enum class ActionType { + kApplyShardingConfig, + kNone, +}; + +const fast_hash_map> kActions = { + {"apply_sharding_config", {Type::kSharding, ActionType::kApplyShardingConfig}}}; + +using CallbackFT = std::function; +using CallbackMap = fast_hash_map; +} // namespace reindexer::proxycb \ No newline at end of file diff --git a/cpp_src/core/query/dsl/dslencoder.cc b/cpp_src/core/query/dsl/dslencoder.cc index 1bbfa00e6..988119ec6 100644 --- a/cpp_src/core/query/dsl/dslencoder.cc +++ b/cpp_src/core/query/dsl/dslencoder.cc @@ -1,4 +1,5 @@ #include "dslencoder.h" + #include #include "core/cjson/jsonbuilder.h" #include "core/keyvalue/key_string.h" @@ -103,8 +104,8 @@ void encodeAggregationFunctions(const Query& query, JsonBuilder& builder) { auto aggNode = arrNode.Object(); aggNode.Put("type", AggTypeToStr(entry.Type())); encodeSorting(entry.Sorting(), aggNode); - if (entry.Limit() != AggregateEntry::kDefaultLimit) aggNode.Put("limit", entry.Limit()); - if (entry.Offset() != AggregateEntry::kDefaultOffset) aggNode.Put("offset", entry.Offset()); + if (entry.Limit() != QueryEntry::kDefaultLimit) aggNode.Put("limit", entry.Limit()); + if (entry.Offset() != QueryEntry::kDefaultOffset) aggNode.Put("offset", entry.Offset()); auto fldNode = aggNode.Array("fields"); for (const auto& field : entry.Fields()) { fldNode.Put(nullptr, field); diff --git a/cpp_src/core/query/dsl/dslencoder.h b/cpp_src/core/query/dsl/dslencoder.h index 55928152b..b629d74b5 100644 --- a/cpp_src/core/query/dsl/dslencoder.h +++ b/cpp_src/core/query/dsl/dslencoder.h @@ -8,5 +8,6 @@ class Query; namespace dsl { std::string toDsl(const Query& query); -} +} // namespace dsl + } // namespace reindexer diff --git a/cpp_src/core/query/dsl/dslparser.cc b/cpp_src/core/query/dsl/dslparser.cc index 9b3fe0209..be7431a35 100644 --- a/cpp_src/core/query/dsl/dslparser.cc +++ b/cpp_src/core/query/dsl/dslparser.cc @@ -151,15 +151,15 @@ static const fast_str_map update_field_type_map = { {"value", UpdateFieldType::Value}, }; -bool checkTag(JsonValue& val, JsonTag tag) { return val.getTag() == tag; } +static bool checkTag(const JsonValue& val, JsonTag tag) noexcept { return val.getTag() == tag; } template -bool checkTag(JsonValue& val, JsonTag tag, Tags... tags) { +bool checkTag(const JsonValue& val, JsonTag tag, Tags... tags) noexcept { return std::max(tag == val.getTag(), checkTag(val, tags...)); } template -void checkJsonValueType(JsonValue& val, std::string_view name, JsonTags... possibleTags) { +void checkJsonValueType(const JsonValue& val, std::string_view name, JsonTags... possibleTags) { if (!checkTag(val, possibleTags...)) throw Error(errParseJson, "Wrong type of field '%s'", name); } @@ -173,23 +173,23 @@ T get(fast_str_map const& m, std::string_view name, std::string_view mapName) } template -void parseStringArray(JsonValue& stringArray, Arr& array) { - for (auto element : stringArray) { - auto& value = element->value; +void parseStringArray(const JsonValue& stringArray, Arr& array) { + for (const auto& element : stringArray) { + auto& value = element.value; checkJsonValueType(value, "string array item", JSON_STRING); - array.push_back(std::string(value.toString())); + array.emplace_back(value.toString()); } } template -void parseValues(JsonValue& values, Array& kvs) { +void parseValues(const JsonValue& values, Array& kvs) { if (values.getTag() == JSON_ARRAY) { - for (auto elem : values) { + for (const auto& elem : values) { Variant kv; - if (elem->value.getTag() == JSON_OBJECT) { - kv = Variant(stringifyJson(*elem)); - } else if (elem->value.getTag() != JSON_NULL) { - kv = jsonValue2Variant(elem->value, KeyValueType::Undefined{}); + if (elem.value.getTag() == JSON_OBJECT) { + kv = Variant(stringifyJson(elem)); + } else if (elem.value.getTag() != JSON_NULL) { + kv = jsonValue2Variant(elem.value, KeyValueType::Undefined{}); kv.EnsureHold(); } if (!kvs.empty() && !kvs.back().Type().IsSame(kv.Type())) { @@ -211,14 +211,14 @@ void parseValues(JsonValue& values, Array& kvs) { } } -void parse(JsonValue& root, Query& q); +void parse(const JsonValue& root, Query& q); -void parseSortEntry(JsonValue& entry, SortingEntries& sortingEntries, std::vector& forcedSortOrder) { +static void parseSortEntry(const JsonValue& entry, SortingEntries& sortingEntries, std::vector& forcedSortOrder) { checkJsonValueType(entry, "Sort", JSON_OBJECT); SortingEntry sortingEntry; - for (auto subelement : entry) { - auto& v = subelement->value; - std::string_view name = subelement->key; + for (const auto& subelement : entry) { + auto& v = subelement.value; + std::string_view name = subelement.key; switch (get(sort_map, name, "sort"sv)) { case Sort::Desc: if ((v.getTag() != JSON_TRUE) && (v.getTag() != JSON_FALSE)) throw Error(errParseJson, "Wrong type of field '%s'", name); @@ -243,9 +243,9 @@ void parseSortEntry(JsonValue& entry, SortingEntries& sortingEntries, std::vecto } } -void parseSort(JsonValue& v, SortingEntries& sortingEntries, std::vector& forcedSortOrder) { +static void parseSort(const JsonValue& v, SortingEntries& sortingEntries, std::vector& forcedSortOrder) { if (v.getTag() == JSON_ARRAY) { - for (auto entry : v) parseSort(entry->value, sortingEntries, forcedSortOrder); + for (auto entry : v) parseSort(entry.value, sortingEntries, forcedSortOrder); } else if (v.getTag() == JSON_OBJECT) { parseSortEntry(v, sortingEntries, forcedSortOrder); } else { @@ -253,10 +253,11 @@ void parseSort(JsonValue& v, SortingEntries& sortingEntries, std::vector>& equalPositions, size_t lastBracketPosition); +void parseSingleJoinQuery(const JsonValue& join, Query& query); +void parseEqualPositions(const JsonValue& dsl, std::vector>& equalPositions, size_t lastBracketPosition); -void parseFilter(JsonValue& filter, Query& q, std::vector>& equalPositions, size_t lastBracketPosition) { +static void parseFilter(const JsonValue& filter, Query& q, std::vector>& equalPositions, + size_t lastBracketPosition) { OpType op = OpAnd; CondType condition{CondEq}; VariantArray values; @@ -264,9 +265,9 @@ void parseFilter(JsonValue& filter, Query& q, std::vectorvalue; - auto name = elem->key; + for (const auto& elem : filter) { + auto& v = elem.value; + std::string_view name(elem.key); switch (get(filter_map, name, "filter"sv)) { case Filter::Cond: checkJsonValueType(v, name, JSON_STRING); @@ -309,7 +310,7 @@ void parseFilter(JsonValue& filter, Query& q, std::vectorvalue, q, equalPositions, bracketPosition); + for (const auto& f : v) parseFilter(f.value, q, equalPositions, bracketPosition); q.entries.CloseBracket(); entryType = BRACKET; break; @@ -328,7 +329,6 @@ void parseFilter(JsonValue& filter, Query& q, std::vectorvalue; + for (const auto& element : joinEntries) { + auto& joinEntry = element.value; checkJsonValueType(joinEntry, "Joined", JSON_OBJECT); QueryJoinEntry qjoinEntry; - for (auto subelement : joinEntry) { - auto& value = subelement->value; - std::string_view name = subelement->key; + for (const auto& subelement : joinEntry) { + auto& value = subelement.value; + std::string_view name = subelement.key; switch (get(joined_entry_map, name, "join_query.on"sv)) { case JoinEntry::LeftField: checkJsonValueType(value, name, JSON_STRING); @@ -407,12 +408,12 @@ void parseJoinedEntries(JsonValue& joinEntries, JoinedQuery& qjoin) { } } -void parseSingleJoinQuery(JsonValue& join, Query& query) { +void parseSingleJoinQuery(const JsonValue& join, Query& query) { JoinedQuery qjoin; std::vector> equalPositions; - for (auto subelement : join) { - auto& value = subelement->value; - std::string_view name = subelement->key; + for (const auto& subelement : join) { + auto& value = subelement.value; + std::string_view name = subelement.key; switch (get(joins_map, name, "join_query"sv)) { case JoinRoot::Type: checkJsonValueType(value, name, JSON_STRING); @@ -424,7 +425,7 @@ void parseSingleJoinQuery(JsonValue& join, Query& query) { break; case JoinRoot::Filters: checkJsonValueType(value, name, JSON_ARRAY); - for (auto filter : value) parseFilter(filter->value, qjoin, equalPositions, 0); + for (const auto& filter : value) parseFilter(filter.value, qjoin, equalPositions, 0); break; case JoinRoot::Sort: parseSort(value, qjoin.sortingEntries_, qjoin.forcedSortOrder_); @@ -460,9 +461,9 @@ void parseSingleJoinQuery(JsonValue& join, Query& query) { query.joinQueries_.emplace_back(std::move(qjoin)); } -void parseMergeQueries(JsonValue& mergeQueries, Query& query) { - for (auto element : mergeQueries) { - auto& merged = element->value; +static void parseMergeQueries(const JsonValue& mergeQueries, Query& query) { + for (const auto& element : mergeQueries) { + auto& merged = element.value; checkJsonValueType(merged, "Merged", JSON_OBJECT); JoinedQuery qmerged; parse(merged, qmerged); @@ -471,22 +472,22 @@ void parseMergeQueries(JsonValue& mergeQueries, Query& query) { } } -void parseAggregation(JsonValue& aggregation, Query& query) { +static void parseAggregation(const JsonValue& aggregation, Query& query) { checkJsonValueType(aggregation, "Aggregation", JSON_OBJECT); h_vector fields; AggType type = AggUnknown; SortingEntries sortingEntries; - unsigned limit{AggregateEntry::kDefaultLimit}; - unsigned offset{AggregateEntry::kDefaultOffset}; - for (auto element : aggregation) { - auto& value = element->value; - std::string_view name = element->key; + unsigned limit{QueryEntry::kDefaultLimit}; + unsigned offset{QueryEntry::kDefaultOffset}; + for (const auto& element : aggregation) { + auto& value = element.value; + std::string_view name = element.key; switch (get(aggregation_map, name, "aggregations"sv)) { case Aggregation::Fields: checkJsonValueType(value, name, JSON_ARRAY); - for (auto subElem : value) { - if (subElem->value.getTag() != JSON_STRING) throw Error(errParseJson, "Expected string in array 'fields'"); - fields.push_back(std::string(subElem->value.toString())); + for (const auto& subElem : value) { + if (subElem.value.getTag() != JSON_STRING) throw Error(errParseJson, "Expected string in array 'fields'"); + fields.emplace_back(subElem.value.toString()); } break; case Aggregation::Type: @@ -516,23 +517,25 @@ void parseAggregation(JsonValue& aggregation, Query& query) { query.aggregations_.emplace_back(type, std::move(fields), std::move(sortingEntries), limit, offset); } -void parseEqualPositions(JsonValue& dsl, std::vector>& equalPositions, size_t lastBracketPosition) { - for (auto ar : dsl) { - auto subArray = ar->value; - checkJsonValueType(subArray, ar->key, JSON_OBJECT); - for (auto element : subArray) { - auto& value = element->value; - std::string_view name = element->key; +void parseEqualPositions(const JsonValue& dsl, std::vector>& equalPositions, + size_t lastBracketPosition) { + for (const auto& ar : dsl) { + auto subArray = ar.value; + checkJsonValueType(subArray, ar.key, JSON_OBJECT); + for (const auto& element : subArray) { + auto& value = element.value; + std::string_view name = element.key; switch (get(equationPosition_map, name, "equal_positions"sv)) { case EqualPosition::Positions: { EqualPosition_t ep; - for (auto f : value) { - checkJsonValueType(f->value, f->key, JSON_STRING); - ep.emplace_back(f->value.toString()); + for (const auto& f : value) { + checkJsonValueType(f.value, f.key, JSON_STRING); + ep.emplace_back(f.value.toString()); } - if (ep.size() < 2) + if (ep.size() < 2) { throw Error(errLogic, "equal_position() is supposed to have at least 2 arguments. Arguments: [%s]", ep.size() == 1 ? ep[0] : ""); + } equalPositions.emplace_back(lastBracketPosition, std::move(ep)); } } @@ -540,16 +543,16 @@ void parseEqualPositions(JsonValue& dsl, std::vectorvalue; - checkJsonValueType(field, item->key, JSON_OBJECT); +static void parseUpdateFields(const JsonValue& updateFields, Query& query) { + for (const auto& item : updateFields) { + auto& field = item.value; + checkJsonValueType(field, item.key, JSON_OBJECT); std::string fieldName; bool isObject = false, isExpression = false; VariantArray values; - for (auto v : field) { - auto& value = v->value; - std::string_view name = v->key; + for (const auto& v : field) { + auto& value = v.value; + std::string_view name = v.key; switch (get(update_field_map, name, "update_fields"sv)) { case UpdateField::Name: checkJsonValueType(value, name, JSON_STRING); @@ -572,7 +575,7 @@ void parseUpdateFields(JsonValue& updateFields, Query& query) { } case UpdateField::IsArray: checkJsonValueType(value, name, JSON_TRUE, JSON_FALSE); - if (value.getTag() == JSON_TRUE) values.MarkArray(); + values.MarkArray(value.getTag() == JSON_TRUE); break; case UpdateField::Values: checkJsonValueType(value, name, JSON_ARRAY); @@ -580,23 +583,26 @@ void parseUpdateFields(JsonValue& updateFields, Query& query) { break; } } + if (isExpression && (values.size() != 1 || !values.front().Type().template Is())) + throw Error(errParseDSL, "The array \"values\" must contain only a string type value for the type \"expression\""); + if (isObject) { - query.SetObject(fieldName, values); + query.SetObject(fieldName, std::move(values)); } else { - query.Set(fieldName, values, isExpression); + query.Set(fieldName, std::move(values), isExpression); } } } -void parse(JsonValue& root, Query& q) { +void parse(const JsonValue& root, Query& q) { if (root.getTag() != JSON_OBJECT) { throw Error(errParseJson, "Json is malformed: %d", root.getTag()); } std::vector> equalPositions; - for (auto elem : root) { - auto& v = elem->value; - auto name = elem->key; + for (const auto& elem : root) { + auto& v = elem.value; + auto name = elem.key; switch (get(root_map, name, "root"sv)) { case Root::Namespace: checkJsonValueType(v, name, JSON_STRING); @@ -615,7 +621,7 @@ void parse(JsonValue& root, Query& q) { case Root::Filters: checkJsonValueType(v, name, JSON_ARRAY); - for (auto filter : v) parseFilter(filter->value, q, equalPositions, 0); + for (const auto& filter : v) parseFilter(filter.value, q, equalPositions, 0); break; case Root::Sort: @@ -643,7 +649,7 @@ void parse(JsonValue& root, Query& q) { break; case Root::Aggregations: checkJsonValueType(v, name, JSON_ARRAY); - for (auto aggregation : v) parseAggregation(aggregation->value, q); + for (const auto& aggregation : v) parseAggregation(aggregation.value, q); break; case Root::Explain: checkJsonValueType(v, name, JSON_FALSE, JSON_TRUE); @@ -666,15 +672,14 @@ void parse(JsonValue& root, Query& q) { break; case Root::EqualPositions: throw Error(errParseDSL, "Unsupported old DSL format. Equal positions should be in filters."); - break; case Root::QueryType: checkJsonValueType(v, name, JSON_STRING); q.type_ = get(query_types, v.toString(), "query_type"sv); break; case Root::DropFields: checkJsonValueType(v, name, JSON_ARRAY); - for (auto element : v) { - auto& value = element->value; + for (const auto& element : v) { + auto& value = element.value; checkJsonValueType(value, "string array item", JSON_STRING); q.Drop(std::string(value.toString())); } @@ -683,8 +688,6 @@ void parse(JsonValue& root, Query& q) { checkJsonValueType(v, name, JSON_ARRAY); parseUpdateFields(v, q); break; - default: - throw Error(errParseDSL, "incorrect tag '%'", name); } } for (auto&& eqPos : equalPositions) { diff --git a/cpp_src/core/query/expressionevaluator.cc b/cpp_src/core/query/expressionevaluator.cc index 681187e3a..c47265a14 100644 --- a/cpp_src/core/query/expressionevaluator.cc +++ b/cpp_src/core/query/expressionevaluator.cc @@ -3,143 +3,224 @@ #include "core/payload/payloadiface.h" #include "core/selectfunc/functionexecutor.h" #include "core/selectfunc/selectfunc.h" +#include "double-conversion/double-conversion.h" #include "estl/tokenizer.h" namespace reindexer { using namespace std::string_view_literals; -const char* kWrongFieldTypeError = "Only integral type non-array fields are supported in arithmetical expressions: %s"; - -ExpressionEvaluator::ExpressionEvaluator(const PayloadType& type, TagsMatcher& tagsMatcher, FunctionExecutor& func) - : type_(type), tagsMatcher_(tagsMatcher), functionExecutor_(func) {} +constexpr char kWrongFieldTypeError[] = "Only integral type non-array fields are supported in arithmetical expressions: %s"; +constexpr char kScalarsInConcatenationError[] = "Unable to use scalar values in the arrays concatenation expressions: %s"; void ExpressionEvaluator::captureArrayContent(tokenizer& parser) { - token tok = parser.next_token(false); - for (;;) { - tok = parser.next_token(false); - if (tok.text() == "]"sv) { - if (arrayValues_.empty()) break; + arrayValues_.MarkArray(); + token tok = parser.next_token(tokenizer::flags::no_flags); + if (tok.text() == "]") { + return; + } + for (;; tok = parser.next_token(tokenizer::flags::no_flags)) { + if rx_unlikely (tok.text() == "]"sv) { throw Error(errParseSQL, "Expected field value, but found ']' in query, %s", parser.where()); } arrayValues_.emplace_back(token2kv(tok, parser, false)); - tok = parser.next_token(); - if (tok.text() == "]"sv) break; - if (tok.text() != ","sv) { + tok = parser.next_token(tokenizer::flags::no_flags); + if (tok.text() == "]"sv) { + break; + } + if rx_unlikely (tok.text() != ","sv) { throw Error(errParseSQL, "Expected ']' or ',', but found '%s' in query, %s", tok.text(), parser.where()); } } } -double ExpressionEvaluator::getPrimaryToken(tokenizer& parser, const PayloadValue& v, const NsContext& ctx) { - token tok = parser.peek_token(true, true); - if (tok.text() == "("sv) { - parser.next_token(); - double val = performSumAndSubtracting(parser, v, ctx); - if (parser.next_token().text() != ")"sv) throw Error(errLogic, "')' expected in arithmetical expression"); - return val; - } else if (tok.text() == "["sv) { +void ExpressionEvaluator::throwUnexpectedTokenError(tokenizer& parser, const token& outTok) { + if (state_ == StateArrayConcat || parser.peek_token(tokenizer::flags::treat_sign_as_token).text() == "|"sv) { + throw Error(errParams, kScalarsInConcatenationError, outTok.text()); + } + throw Error(errParams, kWrongFieldTypeError, outTok.text()); +} + +ExpressionEvaluator::PrimaryToken ExpressionEvaluator::getPrimaryToken(tokenizer& parser, const PayloadValue& v, token& outTok, + const NsContext& ctx) { + outTok = parser.next_token(); + if (outTok.text() == "("sv) { + const double val = performSumAndSubtracting(parser, v, ctx); + if rx_unlikely (parser.next_token().text() != ")"sv) { + throw Error(errParams, "')' expected in arithmetical expression"); + } + return {.value = val, .type = PrimaryToken::Type::Scalar}; + } else if (outTok.text() == "["sv) { captureArrayContent(parser); - } else if (tok.type == TokenNumber) { - char* p = nullptr; - parser.next_token(); - return strtod(tok.text().data(), &p); - } else if (tok.type == TokenName) { - int field = 0; - VariantArray fieldValues; - ConstPayload pv(type_, v); - if (type_.FieldByName(tok.text(), field)) { - const auto type = type_.Field(field).Type(); - if (type_.Field(field).IsArray()) { - pv.Get(field, fieldValues); - for (const Variant& v : fieldValues) { - arrayValues_.emplace_back(v); - } - parser.next_token(); - return 0.0; - } else if (state_ == StateArrayConcat) { - VariantArray vals; - pv.GetByJsonPath(tok.text(), tagsMatcher_, vals, KeyValueType::Undefined{}); - for (const Variant& v : vals) { - arrayValues_.emplace_back(v); - } - parser.next_token(); - return 0.0; - } else { - return type.EvaluateOneOf( - [&](OneOf) { - pv.Get(field, fieldValues); - if (fieldValues.empty()) throw Error(errLogic, "Calculating value of an empty field is impossible: %s", tok.text()); - parser.next_token(); - return fieldValues.front().As(); - }, - [&](OneOf) -> double { - throw Error(errLogic, kWrongFieldTypeError, tok.text()); - }, - [](OneOf) noexcept -> double { - assertrx(0); - abort(); - }); + return {.value = std::nullopt, .type = PrimaryToken::Type::Array}; + } + switch (outTok.type) { + case TokenNumber: { + try { + using double_conversion::StringToDoubleConverter; + static const StringToDoubleConverter converter{StringToDoubleConverter::NO_FLAGS, NAN, NAN, nullptr, nullptr}; + int countOfCharsParsedAsDouble; + return {.value = converter.StringToDouble(outTok.text_.data(), outTok.text_.size(), &countOfCharsParsedAsDouble), + .type = PrimaryToken::Type::Scalar}; + } catch (...) { + throw Error(errParams, "Unable to convert '%s' to double value", outTok.text()); } - } else { - pv.GetByJsonPath(tok.text(), tagsMatcher_, fieldValues, KeyValueType::Undefined{}); - if (fieldValues.size() > 0) { - const auto type = fieldValues.front().Type(); - if ((fieldValues.size() > 1) || (state_ == StateArrayConcat)) { - for (const Variant& v : fieldValues) { - arrayValues_.emplace_back(v); - } - parser.next_token(); - return 0.0; - } else { - return type.EvaluateOneOf( - [&](OneOf) { - parser.next_token(); - return fieldValues.front().As(); - }, - [&](OneOf) -> double { - throw Error(errLogic, kWrongFieldTypeError, tok.text()); - }); - } - } else { - SelectFuncStruct funcData = SelectFuncParser().ParseFunction(parser, true); - funcData.field = forField_; - return functionExecutor_.Execute(funcData, ctx).As(); + } + case TokenName: + return handleTokenName(parser, v, outTok, ctx); + case TokenString: + throwUnexpectedTokenError(parser, outTok); + case TokenEnd: + case TokenOp: + case TokenSymbol: + case TokenSign: + break; + } + throw Error(errParams, "Unexpected token in expression: '%s'", outTok.text()); +} + +ExpressionEvaluator::PrimaryToken ExpressionEvaluator::handleTokenName(tokenizer& parser, const PayloadValue& v, token& outTok, + const NsContext& ctx) { + int field = 0; + VariantArray fieldValues; + ConstPayload pv(type_, v); + if (type_.FieldByName(outTok.text(), field)) { + if (type_.Field(field).IsArray()) { + pv.Get(field, fieldValues); + arrayValues_.MarkArray(); + for (Variant& v : fieldValues) { + arrayValues_.emplace_back(std::move(v)); } + return (state_ == StateArrayConcat || fieldValues.size() != 1) + ? PrimaryToken{.value = std::nullopt, .type = PrimaryToken::Type::Array} + : type_.Field(field).Type().EvaluateOneOf( + [this](OneOf) -> PrimaryToken { + return {.value = arrayValues_.back().As(), .type = PrimaryToken::Type::Array}; + }, + [&, this](OneOf) -> PrimaryToken { + if rx_unlikely (state_ != StateArrayConcat && + parser.peek_token(tokenizer::flags::treat_sign_as_token).text() != "|"sv) { + throw Error(errParams, kWrongFieldTypeError, outTok.text()); + } + return {.value = std::nullopt, .type = PrimaryToken::Type::Array}; + }, + [](OneOf) noexcept + -> PrimaryToken { + assertrx_throw(false); + abort(); + }); + } + return type_.Field(field).Type().EvaluateOneOf( + [&](OneOf) -> PrimaryToken { + pv.Get(field, fieldValues); + if rx_unlikely (fieldValues.empty()) { + throw Error(errParams, "Calculating value of an empty field is impossible: %s", outTok.text()); + } + return {.value = fieldValues.front().As(), .type = PrimaryToken::Type::Scalar}; + }, + [&, this](OneOf) -> PrimaryToken { + throwUnexpectedTokenError(parser, outTok); + }, + [](OneOf) -> PrimaryToken { + assertrx_throw(false); + abort(); + }); + } else if rx_unlikely (outTok.text() == "true"sv || outTok.text() == "false"sv) { + throwUnexpectedTokenError(parser, outTok); + } + + pv.GetByJsonPath(outTok.text(), tagsMatcher_, fieldValues, KeyValueType::Undefined{}); + + if (fieldValues.IsNullValue()) { + return {.value = std::nullopt, .type = PrimaryToken::Type::Null}; + } + + const bool isArrayField = fieldValues.IsArrayValue(); + if (isArrayField) { + for (Variant& v : fieldValues) { + arrayValues_.emplace_back(std::move(v)); } - } else { - throw Error(errLogic, "Only integral type non-array fields are supported in arithmetical expressions"); + if ((state_ == StateArrayConcat) || (fieldValues.size() != 1)) { + return {.value = std::nullopt, .type = PrimaryToken::Type::Array}; + } + } + if (fieldValues.size() == 1) { + const Variant* vptr = isArrayField ? &arrayValues_.back() : &fieldValues.front(); + return vptr->Type().EvaluateOneOf( + [vptr, isArrayField](OneOf) -> PrimaryToken { + return {.value = vptr->As(), .type = isArrayField ? PrimaryToken::Type::Array : PrimaryToken::Type::Scalar}; + }, + [&, this](OneOf) -> PrimaryToken { + if (isArrayField) { + return {.value = std::nullopt, .type = PrimaryToken::Type::Array}; + } + throwUnexpectedTokenError(parser, outTok); + }, + [](OneOf) -> PrimaryToken { + assertrx_throw(0); + abort(); + }); + } else if (parser.peek_token(tokenizer::flags::treat_sign_as_token).text() == "(") { + SelectFuncStruct funcData = SelectFuncParser().ParseFunction(parser, true, outTok); + funcData.field = std::string(forField_); + return {.value = functionExecutor_.Execute(funcData, ctx).As(), .type = PrimaryToken::Type::Scalar}; } - return 0.0; + return {.value = std::nullopt, .type = PrimaryToken::Type::Null}; } double ExpressionEvaluator::performArrayConcatenation(tokenizer& parser, const PayloadValue& v, token& tok, const NsContext& ctx) { - double left = getPrimaryToken(parser, v, ctx); + token valueToken; + auto left = getPrimaryToken(parser, v, valueToken, ctx); tok = parser.peek_token(); + switch (left.type) { + case PrimaryToken::Type::Scalar: + if rx_unlikely (tok.text() == "|"sv) { + throw Error(errParams, kScalarsInConcatenationError, valueToken.text()); + } + break; + case PrimaryToken::Type::Array: + case PrimaryToken::Type::Null: + if rx_unlikely (!left.value.has_value() && tok.text() != "|"sv) { + throw Error(errParams, "Unable to use array and null values outside of the arrays concatenation"); + } + break; + } + while (tok.text() == "|"sv) { parser.next_token(); tok = parser.next_token(); - if (tok.text() != "|") throw Error(errLogic, "Expected '|', not %s", tok.text()); + if rx_unlikely (tok.text() != "|") { + throw Error(errParams, "Expected '|', not %s", tok.text()); + } + if rx_unlikely (state_ != StateArrayConcat && state_ != None) { + throw Error(errParams, "Unable to mix arrays concatenation and arithmetic operations. Got token: '%s'", tok.text()); + } state_ = StateArrayConcat; - getPrimaryToken(parser, v, ctx); + const auto right = getPrimaryToken(parser, v, valueToken, ctx); + if rx_unlikely (right.type == PrimaryToken::Type::Scalar) { + throw Error(errParams, kScalarsInConcatenationError, valueToken.text()); + } + assertrx_throw(!right.value.has_value()); tok = parser.peek_token(); } - return left; + return left.value.has_value() ? left.value.value() : 0.0; } double ExpressionEvaluator::performMultiplicationAndDivision(tokenizer& parser, const PayloadValue& v, token& tok, const NsContext& ctx) { double left = performArrayConcatenation(parser, v, tok, ctx); - tok = parser.peek_token(true, true); + tok = parser.peek_token(tokenizer::flags::treat_sign_as_token); while (tok.text() == "*"sv || tok.text() == "/"sv) { + if rx_unlikely (state_ == StateArrayConcat) { + throw Error(errParams, "Unable to mix arrays concatenation and arithmetic operations. Got token: '%s'", tok.text()); + } state_ = StateMultiplyAndDivide; if (tok.text() == "*"sv) { - parser.next_token(); + parser.next_token(tokenizer::flags::treat_sign_as_token); left *= performMultiplicationAndDivision(parser, v, tok, ctx); - } else if (tok.text() == "/"sv) { - parser.next_token(); - double val = performMultiplicationAndDivision(parser, v, tok, ctx); + } else { + // tok.text() == "/"sv + parser.next_token(tokenizer::flags::treat_sign_as_token); + const double val = performMultiplicationAndDivision(parser, v, tok, ctx); if (val == 0) throw Error(errLogic, "Division by zero!"); left /= val; } @@ -150,35 +231,31 @@ double ExpressionEvaluator::performMultiplicationAndDivision(tokenizer& parser, double ExpressionEvaluator::performSumAndSubtracting(tokenizer& parser, const PayloadValue& v, const NsContext& ctx) { token tok; double left = performMultiplicationAndDivision(parser, v, tok, ctx); - tok = parser.peek_token(true, true); + tok = parser.peek_token(tokenizer::flags::treat_sign_as_token); while (tok.text() == "+"sv || tok.text() == "-"sv) { + if rx_unlikely (state_ == StateArrayConcat) { + throw Error(errParams, "Unable to mix arrays concatenation and arithmetic operations. Got token: '%s'", tok.text()); + } state_ = StateSumAndSubtract; if (tok.text() == "+"sv) { - parser.next_token(true, true); + parser.next_token(tokenizer::flags::treat_sign_as_token); left += performMultiplicationAndDivision(parser, v, tok, ctx); - } else if (tok.text() == "-"sv) { - parser.next_token(true, true); + } else { + // tok.text() == "-"sv + parser.next_token(tokenizer::flags::treat_sign_as_token); left -= performMultiplicationAndDivision(parser, v, tok, ctx); } } return left; } -VariantArray ExpressionEvaluator::Evaluate(tokenizer& parser, const PayloadValue& v, std::string_view forField, const NsContext& ctx) { - forField_ = std::string(forField); - double expressionValue = performSumAndSubtracting(parser, v, ctx); - if (arrayValues_.empty()) { - return {Variant(expressionValue)}; - } else { - arrayValues_.MarkArray(); - return arrayValues_; - } -} - VariantArray ExpressionEvaluator::Evaluate(std::string_view expr, const PayloadValue& v, std::string_view forField, const NsContext& ctx) { - arrayValues_.clear(); + arrayValues_.clear(); tokenizer parser(expr); - return Evaluate(parser, v, forField, ctx); + forField_ = forField; + state_ = None; + const double expressionValue = performSumAndSubtracting(parser, v, ctx); + return (state_ == StateArrayConcat) ? std::move(arrayValues_).MarkArray() : VariantArray{Variant(expressionValue)}; } } // namespace reindexer diff --git a/cpp_src/core/query/expressionevaluator.h b/cpp_src/core/query/expressionevaluator.h index e0be27aa5..ac7ca9b64 100644 --- a/cpp_src/core/query/expressionevaluator.h +++ b/cpp_src/core/query/expressionevaluator.h @@ -1,5 +1,6 @@ #pragma once +#include #include "core/keyvalue/variant.h" namespace reindexer { @@ -12,25 +13,34 @@ class NsContext; class ExpressionEvaluator { public: - ExpressionEvaluator(const PayloadType& type, TagsMatcher& tagsMatcher, FunctionExecutor& func); + ExpressionEvaluator(const PayloadType& type, TagsMatcher& tagsMatcher, FunctionExecutor& func) noexcept + : type_(type), tagsMatcher_(tagsMatcher), functionExecutor_(func) {} - VariantArray Evaluate(tokenizer& parser, const PayloadValue& v, std::string_view forField, const NsContext& ctx); VariantArray Evaluate(std::string_view expr, const PayloadValue& v, std::string_view forField, const NsContext& ctx); private: - double getPrimaryToken(tokenizer& parser, const PayloadValue& v, const NsContext& ctx); - double performSumAndSubtracting(tokenizer& parser, const PayloadValue& v, const NsContext& ctx); - double performMultiplicationAndDivision(tokenizer& parser, const PayloadValue& v, token& lastTok, const NsContext& ctx); - double performArrayConcatenation(tokenizer& parser, const PayloadValue& v, token& lastTok, const NsContext& ctx); + struct PrimaryToken { + enum class Type { Scalar, Array, Null }; + + std::optional value; + Type type; + }; + + [[nodiscard]] PrimaryToken getPrimaryToken(tokenizer& parser, const PayloadValue& v, token& outTok, const NsContext& ctx); + [[nodiscard]] PrimaryToken handleTokenName(tokenizer& parser, const PayloadValue& v, token& outTok, const NsContext& ctx); + [[nodiscard]] double performSumAndSubtracting(tokenizer& parser, const PayloadValue& v, const NsContext& ctx); + [[nodiscard]] double performMultiplicationAndDivision(tokenizer& parser, const PayloadValue& v, token& lastTok, const NsContext& ctx); + [[nodiscard]] double performArrayConcatenation(tokenizer& parser, const PayloadValue& v, token& lastTok, const NsContext& ctx); void captureArrayContent(tokenizer& parser); + [[noreturn]] void throwUnexpectedTokenError(tokenizer& parser, const token& outTok); enum State { None = 0, StateArrayConcat, StateMultiplyAndDivide, StateSumAndSubtract }; const PayloadType& type_; TagsMatcher& tagsMatcher_; FunctionExecutor& functionExecutor_; - std::string forField_; + std::string_view forField_; VariantArray arrayValues_; State state_ = None; }; diff --git a/cpp_src/core/query/query.cc b/cpp_src/core/query/query.cc index b8999affa..40bdc1391 100644 --- a/cpp_src/core/query/query.cc +++ b/cpp_src/core/query/query.cc @@ -239,8 +239,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { hasExpressions = ser.GetVarUint(); val.emplace_back(ser.GetVariant().EnsureHold()); } - if (isArray) val.MarkArray(); - Set(std::move(field), std::move(val), hasExpressions); + Set(std::move(field), std::move(val.MarkArray(isArray)), hasExpressions); break; } case QueryUpdateField: { @@ -253,8 +252,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { hasExpressions = ser.GetVarUint(); val.emplace_back(ser.GetVariant().EnsureHold()); } - if (isArray) val.MarkArray(); - Set(std::move(field), std::move(val), hasExpressions); + Set(std::move(field), std::move(val.MarkArray(isArray)), hasExpressions); break; } case QueryUpdateObject: { @@ -262,7 +260,7 @@ void Query::deserialize(Serializer &ser, bool &hasJoinConditions) { std::string field(ser.GetVString()); bool hasExpressions = false; int numValues = ser.GetVarUint(); - if (ser.GetVarUint() == 1) val.MarkArray(); + val.MarkArray(ser.GetVarUint() == 1); while (numValues--) { hasExpressions = ser.GetVarUint(); val.emplace_back(ser.GetVariant().EnsureHold()); @@ -311,11 +309,11 @@ void Query::Serialize(WrSerializer &ser, uint8_t mode) const { ser.PutVString(se.expression); ser.PutVarUint(se.desc); } - if (agg.Limit() != AggregateEntry::kDefaultLimit) { + if (agg.Limit() != QueryEntry::kDefaultLimit) { ser.PutVarUint(QueryAggregationLimit); ser.PutVarUint(agg.Limit()); } - if (agg.Offset() != AggregateEntry::kDefaultOffset) { + if (agg.Offset() != QueryEntry::kDefaultOffset) { ser.PutVarUint(QueryAggregationOffset); ser.PutVarUint(agg.Offset()); } @@ -482,8 +480,8 @@ Query &Query::Join(JoinType joinType, const std::string &index, const std::strin joinEntry.condition_ = cond; joinEntry.index_ = index; joinEntry.joinIndex_ = joinIndex; - joinQueries_.emplace_back(joinType, std::move(qr)); - joinQueries_.back().joinEntries_.emplace_back(std::move(joinEntry)); + auto &jq = joinQueries_.emplace_back(joinType, std::move(qr)); + jq.joinEntries_.emplace_back(std::move(joinEntry)); if (joinType != JoinType::LeftJoin) { entries.Append((joinType == JoinType::InnerJoin) ? OpType::OpAnd : OpType::OpOr, JoinQueryEntry(joinQueries_.size() - 1)); } @@ -536,8 +534,29 @@ Query::OnHelperR Query::Join(JoinType joinType, const Query &q) && { return {std::move(*this), joinQueries_.back()}; } -Query &Query::Merge(Query mq) & { - mergeQueries_.emplace_back(JoinType::Merge, std::move(mq)); +Query &Query::Merge(const Query &q) & { + mergeQueries_.emplace_back(JoinType::Merge, q); + return *this; +} + +Query &Query::Merge(Query &&q) & { + mergeQueries_.emplace_back(JoinType::Merge, std::move(q)); + return *this; +} + +Query &Query::SortStDistance(std::string_view field, Point p, bool desc) & { + if (field.empty()) { + throw Error(errParams, "Field name for ST_Distance can not be empty"); + } + sortingEntries_.emplace_back(fmt::sprintf("ST_Distance(%s,ST_GeomFromText('point(%.12f %.12f)'))", field, p.X(), p.Y()), desc); + return *this; +} + +Query &Query::SortStDistance(std::string_view field1, std::string_view field2, bool desc) & { + if (field1.empty() || field2.empty()) { + throw Error(errParams, "Fields names for ST_Distance can not be empty"); + } + sortingEntries_.emplace_back(fmt::sprintf("ST_Distance(%s,%s)", field1, field2), desc); return *this; } diff --git a/cpp_src/core/query/query.h b/cpp_src/core/query/query.h index 4d187eb5d..a2ce6d197 100644 --- a/cpp_src/core/query/query.h +++ b/cpp_src/core/query/query.h @@ -71,7 +71,8 @@ class Query { /// @param start - number of the first row to get from selected set. Analog to sql OFFSET Offset. /// @param count - number of rows to get from result set. Analog to sql LIMIT RowsCount. /// @param calcTotal - calculation mode. - explicit Query(std::string nsName, unsigned start = 0, unsigned count = UINT_MAX, CalcTotalMode calcTotal = ModeNoTotal); + explicit Query(std::string nsName, unsigned start = QueryEntry::kDefaultOffset, unsigned count = QueryEntry::kDefaultLimit, + CalcTotalMode calcTotal = ModeNoTotal); /// Creates an empty object. Query() = default; @@ -286,9 +287,8 @@ class Query { Query &Set(std::string field, const std::vector &l, bool hasExpressions = false) & { VariantArray value; value.reserve(l.size()); - value.MarkArray(); for (auto it = l.begin(); it != l.end(); it++) value.emplace_back(*it); - return Set(std::move(field), std::move(value), hasExpressions); + return Set(std::move(field), std::move(value.MarkArray()), hasExpressions); } template Query &&Set(std::string field, const std::vector &l, bool hasExpressions = false) && { @@ -340,9 +340,8 @@ class Query { Query &SetObject(std::string field, const std::vector &l, bool hasExpressions = false) & { VariantArray value; value.reserve(l.size()); - value.MarkArray(); for (auto it = l.begin(); it != l.end(); it++) value.emplace_back(Variant(*it)); - return SetObject(std::move(field), std::move(value), hasExpressions); + return SetObject(std::move(field), std::move(value.MarkArray()), hasExpressions); } template Query &&SetObject(std::string field, const std::vector &l, bool hasExpressions = false) && { @@ -471,26 +470,30 @@ class Query { Query &&OrInnerJoin(const std::string &index, const std::string &joinIndex, CondType cond, const Query &qr) && { return std::move(OrInnerJoin(index, joinIndex, cond, qr)); } + Query &Merge(const Query &q) &; + Query &&Merge(const Query &q) && { return std::move(Merge(q)); } + Query &Merge(Query &&q) &; + Query &&Merge(Query &&q) && { return std::move(Merge(std::move(q))); } /// Changes debug level. /// @param level - debug level. /// @return Query object. - Query &Debug(int level) & { // -V1071 + Query &Debug(int level) &noexcept { debugLevel = level; return *this; } - Query &&Debug(int level) && { return std::move(Debug(level)); } + Query &&Debug(int level) &&noexcept { return std::move(Debug(level)); } /// Changes strict mode. /// @param mode - strict mode. /// @return Query object. - Query &Strict(StrictMode mode) & { // -V1071 + Query &Strict(StrictMode mode) &noexcept { strictMode = mode; return *this; } - Query &&Strict(StrictMode mode) && { return std::move(Strict(mode)); } + Query &&Strict(StrictMode mode) &&noexcept { return std::move(Strict(mode)); } - /// Performs sorting by certain column. Analog to sql ORDER BY. + /// Performs sorting by certain column. Same as sql 'ORDER BY'. /// @param sort - sorting column name. /// @param desc - is sorting direction descending or ascending. /// @return Query object. @@ -500,6 +503,23 @@ class Query { } Query &&Sort(std::string sort, bool desc) && { return std::move(Sort(std::move(sort), desc)); } + /// Performs sorting by ST_Distance() expressions for geometry index. Sorting function will use distance between field and target point. + /// @param field - field's name. This field must contain Point. + /// @param p - target point. + /// @param desc - is sorting direction descending or ascending. + /// @return Query object. + Query &SortStDistance(std::string_view field, reindexer::Point p, bool desc) &; + Query &&SortStDistance(std::string_view field, reindexer::Point p, bool desc) && { return std::move(SortStDistance(field, p, desc)); } + /// Performs sorting by ST_Distance() expressions for geometry index. Sorting function will use distance 2 fields. + /// @param field1 - first field name. This field must contain Point. + /// @param field2 - second field name.This field must contain Point. + /// @param desc - is sorting direction descending or ascending. + /// @return Query object. + Query &SortStDistance(std::string_view field1, std::string_view field2, bool desc) &; + Query &&SortStDistance(std::string_view field1, std::string_view field2, bool desc) && { + return std::move(SortStDistance(field1, field2, desc)); + } + /// Performs sorting by certain column. Analog to sql ORDER BY. /// @param sort - sorting column name. /// @param desc - is sorting direction descending or ascending. @@ -540,8 +560,7 @@ class Query { /// @param indexName - name of index for distict operation. Query &Distinct(std::string indexName) & { if (indexName.length()) { - AggregateEntry aggEntry{AggDistinct, {std::move(indexName)}}; - aggregations_.emplace_back(std::move(aggEntry)); + aggregations_.emplace_back(AggDistinct, h_vector{std::move(indexName)}); } return *this; } @@ -568,11 +587,12 @@ class Query { /// @param offset - index of the first row to get from result set. /// @return Query object ready to be executed. Query &Aggregate(AggType type, h_vector fields, const std::vector> &sort = {}, - unsigned limit = UINT_MAX, unsigned offset = 0) & { + unsigned limit = QueryEntry::kDefaultLimit, unsigned offset = QueryEntry::kDefaultOffset) & { if (!CanAddAggregation(type)) { throw Error(errConflict, kAggregationWithSelectFieldsMsgError); } SortingEntries sorting; + sorting.reserve(sort.size()); for (const auto &s : sort) { sorting.emplace_back(s.first, s.second); } @@ -580,7 +600,7 @@ class Query { return *this; } Query &&Aggregate(AggType type, h_vector fields, const std::vector> &sort = {}, - unsigned limit = UINT_MAX, unsigned offset = 0) && { + unsigned limit = QueryEntry::kDefaultLimit, unsigned offset = QueryEntry::kDefaultOffset) && { return std::move(Aggregate(type, std::move(fields), sort, limit, offset)); } @@ -664,9 +684,6 @@ class Query { Query &&WithRank() &&noexcept { return std::move(WithRank()); } bool IsWithRank() const noexcept { return withRank_; } - Query &Merge(Query mq) &; - Query &&Merge(Query mq) && { return std::move(Merge(std::move(mq))); } - /// Can we add aggregation functions /// or new select fields to a current query? bool CanAddAggregation(AggType type) const noexcept { return type == AggDistinct || (selectFilter_.empty()); } @@ -685,34 +702,34 @@ class Query { void WalkNested(bool withSelf, bool withMerged, const std::function &visitor) const; - bool HasLimit() const noexcept { return count != UINT_MAX; } - bool HasOffset() const noexcept { return start != 0; } + bool HasLimit() const noexcept { return count != QueryEntry::kDefaultLimit; } + bool HasOffset() const noexcept { return start != QueryEntry::kDefaultOffset; } bool IsWALQuery() const noexcept; const std::vector &UpdateFields() const noexcept { return updateFields_; } - QueryType Type() const { return type_; } - const std::string &Namespace() const { return _namespace; } + QueryType Type() const noexcept { return type_; } + const std::string &Namespace() const &noexcept { return _namespace; } protected: void deserialize(Serializer &ser, bool &hasJoinConditions); public: - std::string _namespace; /// Name of the namespace. - unsigned start = 0; /// First row index from result set. - unsigned count = UINT_MAX; /// Number of rows from result set. - int debugLevel = 0; /// Debug level. - StrictMode strictMode = StrictModeNotSet; /// Strict mode. - bool explain_ = false; /// Explain query if true - bool local_ = false; /// Local query if true - CalcTotalMode calcTotal = ModeNoTotal; /// Calculation mode. - QueryType type_ = QuerySelect; /// Query type - OpType nextOp_ = OpAnd; /// Next operation constant. - SortingEntries sortingEntries_; /// Sorting data. - std::vector forcedSortOrder_; /// Keys that always go first - before any ordered values. - std::vector joinQueries_; /// List of queries for join. - std::vector mergeQueries_; /// List of merge queries. - h_vector selectFilter_; /// List of columns in a final result set. - std::vector selectFunctions_; /// List of sql functions + std::string _namespace; /// Name of the namespace. + unsigned start = QueryEntry::kDefaultOffset; /// First row index from result set. + unsigned count = QueryEntry::kDefaultLimit; /// Number of rows from result set. + int debugLevel = 0; /// Debug level. + StrictMode strictMode = StrictModeNotSet; /// Strict mode. + bool explain_ = false; /// Explain query if true + bool local_ = false; /// Local query if true + CalcTotalMode calcTotal = ModeNoTotal; /// Calculation mode. + QueryType type_ = QuerySelect; /// Query type + OpType nextOp_ = OpAnd; /// Next operation constant. + SortingEntries sortingEntries_; /// Sorting data. + std::vector forcedSortOrder_; /// Keys that always go first - before any ordered values. + std::vector joinQueries_; /// List of queries for join. + std::vector mergeQueries_; /// List of merge queries. + h_vector selectFilter_; /// List of columns in a final result set. + std::vector selectFunctions_; /// List of sql functions QueryEntries entries; diff --git a/cpp_src/core/query/queryentry.cc b/cpp_src/core/query/queryentry.cc index 63f71bacb..a7cc7f4e3 100644 --- a/cpp_src/core/query/queryentry.cc +++ b/cpp_src/core/query/queryentry.cc @@ -1,6 +1,9 @@ #include "queryentry.h" + #include #include +#include "core/nsselecter/joinedselector.h" +#include "core/nsselecter/joinedselectormock.h" #include "core/payload/payloadiface.h" #include "query.h" #include "tools/serializer.h" @@ -9,6 +12,44 @@ namespace reindexer { +template +std::string JoinQueryEntry::Dump(const std::vector &joinedSelectors) const { + WrSerializer ser; + const auto &js = joinedSelectors.at(joinIndex); + const auto &q = js.JoinQuery(); + ser << js.Type() << " (" << q.GetSQL() << ") ON "; + ser << '('; + for (const auto &jqe : q.joinEntries_) { + if (&jqe != &q.joinEntries_.front()) { + ser << ' ' << jqe.op_ << ' '; + } else { + assertrx(jqe.op_ == OpAnd); + } + ser << q._namespace << '.' << jqe.joinIndex_ << ' ' << InvertJoinCondition(jqe.condition_) << ' ' << jqe.index_; + } + ser << ')'; + return std::string{ser.Slice()}; +} +template std::string JoinQueryEntry::Dump(const JoinedSelectors &) const; +template std::string JoinQueryEntry::Dump(const std::vector &) const; + +template +std::string JoinQueryEntry::DumpOnCondition(const std::vector &joinedSelectors) const { + WrSerializer ser; + const auto &js = joinedSelectors.at(joinIndex); + const auto &q = js.JoinQuery(); + ser << js.Type() << " ON ("; + for (const auto &jqe : q.joinEntries_) { + if (&jqe != &q.joinEntries_.front()) { + ser << ' ' << jqe.op_ << ' '; + } + ser << q._namespace << '.' << jqe.joinIndex_ << ' ' << InvertJoinCondition(jqe.condition_) << ' ' << jqe.index_; + } + ser << ')'; + return std::string{ser.Slice()}; +} +template std::string JoinQueryEntry::DumpOnCondition(const JoinedSelectors &) const; + bool QueryEntry::operator==(const QueryEntry &obj) const { return condition == obj.condition && index == obj.index && idxNo == obj.idxNo && distinct == obj.distinct && values.RelaxCompare(obj.values) == 0; @@ -31,6 +72,20 @@ std::string QueryEntry::Dump() const { return std::string{ser.Slice()}; } +std::string QueryEntry::DumpBrief() const { + WrSerializer ser; + { + ser << index << ' ' << condition << ' '; + const bool severalValues = (values.size() > 1); + if (severalValues) { + ser << "(...)"; + } else { + ser << '\'' << values.front().As() << '\''; + } + } + return std::string(ser.Slice()); +} + AggregateEntry::AggregateEntry(AggType type, h_vector fields, SortingEntries sort, unsigned limit, unsigned offset) : type_(type), fields_(std::move(fields)), sortingEntries_{std::move(sort)}, limit_(limit), offset_(offset) { switch (type_) { @@ -65,7 +120,7 @@ AggregateEntry::AggregateEntry(AggType type, h_vector fields, So case AggAvg: case AggCount: case AggCountCached: - if (limit_ != kDefaultLimit || offset_ != kDefaultOffset) { + if (limit_ != QueryEntry::kDefaultLimit || offset_ != QueryEntry::kDefaultOffset) { throw Error(errQueryExec, "Limit or offset are not available for aggregation %s", AggTypeToStr(type_)); } if (!sortingEntries_.empty()) { @@ -141,13 +196,13 @@ void QueryEntries::serialize(const_iterator it, const_iterator to, WrSerializer ser.PutVarUint(3); if (entry.values[0].Type().Is()) { const Point point = static_cast(entry.values[0]); - ser.PutDouble(point.x); - ser.PutDouble(point.y); + ser.PutDouble(point.X()); + ser.PutDouble(point.Y()); ser.PutVariant(entry.values[1]); } else { const Point point = static_cast(entry.values[1]); - ser.PutDouble(point.x); - ser.PutDouble(point.y); + ser.PutDouble(point.X()); + ser.PutDouble(point.Y()); ser.PutVariant(entry.values[0]); } } else { @@ -314,7 +369,7 @@ bool QueryEntries::checkIfSatisfyCondition(const VariantArray &lValues, CondType if (matchLikePattern(std::string_view(v), std::string_view(rValues[0]))) return true; } return false; - case CondType::CondDWithin: + case CondType::CondDWithin: { if (rValues.size() != 2) { throw Error(errLogic, "Condition DWithin must have exact 2 value, but %d values was provided", rValues.size()); } @@ -328,6 +383,7 @@ bool QueryEntries::checkIfSatisfyCondition(const VariantArray &lValues, CondType distance = rValues[0].As(); } return DWithin(static_cast(lValues), point, distance); + } default: assertrx(0); } diff --git a/cpp_src/core/query/queryentry.h b/cpp_src/core/query/queryentry.h index 9db20f4c2..d2b53ebf8 100644 --- a/cpp_src/core/query/queryentry.h +++ b/cpp_src/core/query/queryentry.h @@ -25,26 +25,16 @@ struct JoinQueryEntry { bool operator!=(const JoinQueryEntry &other) const noexcept { return !operator==(other); } template - std::string Dump(const std::vector &joinedSelectors) const { - WrSerializer ser; - const auto &js = joinedSelectors.at(joinIndex); - const auto &q = js.JoinQuery(); - ser << js.Type() << " (" << q.GetSQL() << ") ON "; - ser << '('; - for (const auto &jqe : q.joinEntries_) { - if (&jqe != &q.joinEntries_.front()) { - ser << ' ' << jqe.op_ << ' '; - } else { - assertrx(jqe.op_ == OpAnd); - } - ser << q._namespace << '.' << jqe.joinIndex_ << ' ' << InvertJoinCondition(jqe.condition_) << ' ' << jqe.index_; - } - ser << ')'; - return std::string{ser.Slice()}; - } + std::string Dump(const std::vector &joinedSelectors) const; + + template + std::string DumpOnCondition(const std::vector &joinedSelectors) const; }; struct QueryEntry { + static constexpr unsigned kDefaultLimit = UINT_MAX; + static constexpr unsigned kDefaultOffset = 0; + QueryEntry(std::string idx, CondType cond, VariantArray v) : index{std::move(idx)}, condition{cond}, values(std::move(v)) {} QueryEntry(CondType cond, std::string idx, int idxN, bool dist = false) : index(std::move(idx)), idxNo(idxN), condition(cond), distinct(dist) {} @@ -60,6 +50,7 @@ struct QueryEntry { VariantArray values; std::string Dump() const; + std::string DumpBrief() const; }; class BetweenFieldsQueryEntry { @@ -133,6 +124,8 @@ class QueryEntries static bool checkIfSatisfyCondition(const QueryEntry &, const ConstPayload &, TagsMatcher &); static bool checkIfSatisfyCondition(const BetweenFieldsQueryEntry &, const ConstPayload &, TagsMatcher &); static bool checkIfSatisfyCondition(const VariantArray &lValues, CondType, const VariantArray &rValues); + +protected: static void dumpEqualPositions(size_t level, WrSerializer &ser, const EqualPositions_t &equalPositions) { for (const auto &eq : equalPositions) { for (size_t i = 0; i < level; ++i) { @@ -146,6 +139,7 @@ class QueryEntries ser << ")\n"; } } + template static void dump(size_t level, const_iterator begin, const_iterator end, const std::vector &joinedSelectors, WrSerializer &ser) { for (const_iterator it = begin; it != end; ++it) { @@ -205,11 +199,24 @@ struct QueryJoinEntry { bool operator==(const QueryJoinEntry &) const noexcept; bool operator!=(const QueryJoinEntry &qje) const noexcept { return !operator==(qje); } OpType op_ = OpAnd; - CondType condition_ = CondEq; - std::string index_; - std::string joinIndex_; - int idxNo = -1; - bool reverseNamespacesOrder = false; + CondType condition_ = CondEq; ///< Condition applied to expression: index_ COND joinIndex_ + std::string index_; ///< main ns index field name + std::string joinIndex_; ///< joining ns index field name + int idxNo = -1; ///< index_ field Index number in main ns + bool reverseNamespacesOrder = false; ///< controls SQL encoding order + ///< false: mainNs.index Condition joinNs.joinIndex + ///< true: joinNs.joinIndex Invert(Condition) mainNs.index + + template + std::string DumpCondition(const JS &joinedSelector, bool needOp = false) const { + WrSerializer ser; + const auto &q = joinedSelector.JoinQuery(); + if (needOp) { + ser << ' ' << op_ << ' '; + } + ser << q._namespace << '.' << joinIndex_ << ' ' << InvertJoinCondition(condition_) << ' ' << index_; + return std::string{ser.Slice()}; + } }; struct SortingEntry { @@ -226,10 +233,8 @@ struct SortingEntries : public h_vector {}; class AggregateEntry { public: - static constexpr unsigned kDefaultLimit = UINT_MAX; - static constexpr unsigned kDefaultOffset = 0; - - AggregateEntry(AggType type, h_vector fields, SortingEntries sort = {}, unsigned limit = UINT_MAX, unsigned offset = 0); + AggregateEntry(AggType type, h_vector fields, SortingEntries sort = {}, unsigned limit = QueryEntry::kDefaultLimit, + unsigned offset = QueryEntry::kDefaultOffset); [[nodiscard]] bool operator==(const AggregateEntry &) const noexcept; [[nodiscard]] bool operator!=(const AggregateEntry &ae) const noexcept { return !operator==(ae); } [[nodiscard]] AggType Type() const noexcept { return type_; } @@ -245,8 +250,8 @@ class AggregateEntry { AggType type_; h_vector fields_; SortingEntries sortingEntries_; - unsigned limit_ = kDefaultLimit; - unsigned offset_ = kDefaultOffset; + unsigned limit_ = QueryEntry::kDefaultLimit; + unsigned offset_ = QueryEntry::kDefaultOffset; }; } // namespace reindexer diff --git a/cpp_src/core/query/sql/sqlencoder.cc b/cpp_src/core/query/sql/sqlencoder.cc index 1462ac8a7..b961257a0 100644 --- a/cpp_src/core/query/sql/sqlencoder.cc +++ b/cpp_src/core/query/sql/sqlencoder.cc @@ -1,4 +1,5 @@ #include "core/query/sql/sqlencoder.h" + #include "core/keyvalue/geometry.h" #include "core/queryresults/aggregationresult.h" #include "core/sorting/sortexpression.h" @@ -7,21 +8,6 @@ namespace reindexer { -const char *SQLEncoder::JoinTypeName(JoinType type) { - switch (type) { - case JoinType::InnerJoin: - return "INNER JOIN"; - case JoinType::OrInnerJoin: - return "OR INNER JOIN"; - case JoinType::LeftJoin: - return "LEFT JOIN"; - case JoinType::Merge: - return "MERGE"; - default: - return ""; - } -} - static void indexToSql(const std::string &index, WrSerializer &ser) { if (index.find('+') == std::string::npos) { ser << index; @@ -68,8 +54,8 @@ SQLEncoder::SQLEncoder(const Query &q) : query_(q) {} void SQLEncoder::DumpSingleJoinQuery(size_t idx, WrSerializer &ser, bool stripArgs) const { assertrx(idx < query_.joinQueries_.size()); const auto &jq = query_.joinQueries_[idx]; - ser << ' ' << JoinTypeName(jq.joinType); - if (jq.entries.Empty() && jq.count == UINT_MAX && jq.sortingEntries_.empty()) { + ser << ' ' << jq.joinType; + if (jq.entries.Empty() && jq.count == QueryEntry::kDefaultLimit && jq.sortingEntries_.empty()) { ser << ' ' << jq._namespace << " ON "; } else { ser << " ("; @@ -101,7 +87,7 @@ void SQLEncoder::dumpJoined(WrSerializer &ser, bool stripArgs) const { void SQLEncoder::dumpMerged(WrSerializer &ser, bool stripArgs) const { for (auto &me : query_.mergeQueries_) { - ser << ' ' << JoinTypeName(me.joinType) << "( "; + ser << ' ' << me.joinType << "( "; me.GetSQL(ser, stripArgs); ser << ')'; } @@ -177,8 +163,8 @@ WrSerializer &SQLEncoder::GetSQL(WrSerializer &ser, bool stripArgs) const { ser << " ORDER BY " << '\'' << escapeQuotes(se.expression) << '\'' << (se.desc ? " DESC" : " ASC"); } - if (a.Offset() != AggregateEntry::kDefaultOffset && !stripArgs) ser << " OFFSET " << a.Offset(); - if (a.Limit() != AggregateEntry::kDefaultLimit && !stripArgs) ser << " LIMIT " << a.Limit(); + if (a.Offset() != QueryEntry::kDefaultOffset && !stripArgs) ser << " OFFSET " << a.Offset(); + if (a.Limit() != QueryEntry::kDefaultLimit && !stripArgs) ser << " LIMIT " << a.Limit(); ser << ')'; } if (query_.aggregations_.empty() || (query_.aggregations_.size() == 1 && query_.aggregations_[0].Type() == AggDistinct)) { @@ -259,12 +245,12 @@ WrSerializer &SQLEncoder::GetSQL(WrSerializer &ser, bool stripArgs) const { dumpMerged(ser, stripArgs); dumpOrderBy(ser, stripArgs); - if (query_.start != 0 && !stripArgs) ser << " OFFSET " << query_.start; - if (query_.count != UINT_MAX && !stripArgs) ser << " LIMIT " << query_.count; + if (query_.start != QueryEntry::kDefaultOffset && !stripArgs) ser << " OFFSET " << query_.start; + if (query_.count != QueryEntry::kDefaultLimit && !stripArgs) ser << " LIMIT " << query_.count; return ser; } -const char *opNames[] = {"-", "OR", "AND", "AND NOT"}; +static const char *opNames[] = {"-", "OR", "AND", "AND NOT"}; void SQLEncoder::dumpWhereEntries(QueryEntries::const_iterator from, QueryEntries::const_iterator to, WrSerializer &ser, bool stripArgs) const { @@ -307,7 +293,7 @@ void SQLEncoder::dumpWhereEntries(QueryEntries::const_iterator from, QueryEntrie point = static_cast(entry.values[1]); distance = entry.values[0].As(); } - ser << ", ST_GeomFromText('POINT(" << point.x << ' ' << point.y << ")'), " << distance << ')'; + ser << ", ST_GeomFromText('POINT(" << point.X() << ' ' << point.Y() << ")'), " << distance << ')'; } } else { indexToSql(entry.index, ser); diff --git a/cpp_src/core/query/sql/sqlencoder.h b/cpp_src/core/query/sql/sqlencoder.h index 2e9e1cc5e..c1b217609 100644 --- a/cpp_src/core/query/sql/sqlencoder.h +++ b/cpp_src/core/query/sql/sqlencoder.h @@ -22,11 +22,6 @@ class SQLEncoder { /// @param stripArgs - replace condition values with '?'. void DumpSingleJoinQuery(size_t idx, WrSerializer &ser, bool stripArgs) const; - /// Get readaby Join Type - /// @param type - join tyoe - /// @return string with join type name - static const char *JoinTypeName(JoinType type); - protected: /// Builds print version of a query with join in sql format. /// @param ser - serializer to store SQL string diff --git a/cpp_src/core/query/sql/sqlparser.cc b/cpp_src/core/query/sql/sqlparser.cc index d149b4641..d0d664ebc 100644 --- a/cpp_src/core/query/sql/sqlparser.cc +++ b/cpp_src/core/query/sql/sqlparser.cc @@ -6,14 +6,13 @@ #include "core/queryresults/aggregationresult.h" #include "core/type_consts_helpers.h" #include "sqltokentype.h" +#include "vendor/double-conversion/double-conversion.h" #include "vendor/gason/gason.h" namespace reindexer { using namespace std::string_view_literals; -SQLParser::SQLParser(Query &query) : query_(query) {} - int SQLParser::Parse(std::string_view q) { tokenizer parser(q); return Parse(parser); @@ -25,7 +24,7 @@ bool SQLParser::reachedAutocompleteToken(tokenizer &parser, const token &tok) { } token SQLParser::peekSqlToken(tokenizer &parser, int tokenType, bool toLower) { - token tok = parser.peek_token(toLower); + token tok = parser.peek_token(toLower ? tokenizer::flags::to_lower : tokenizer::flags::no_flags); bool eof = ((parser.getPos() + tok.text().length()) == parser.length()); if (ctx_.autocompleteMode && !tok.text().empty() && reachedAutocompleteToken(parser, tok)) { size_t tokenLen = 0; @@ -109,12 +108,16 @@ int SQLParser::selectParse(tokenizer &parser) { tok = peekSqlToken(parser, SingleSelectFieldSqlToken); if (name.text() == "count"sv) { query_.calcTotal = ModeAccurateTotal; - if (!wasSelectFilter) query_.count = 0; + if (!wasSelectFilter) { + query_.count = 0; + } tok = parser.next_token(); if (tok.text() != "*") throw Error(errParseSQL, "Expected '*', but found '%s' in query, %s", tok.text(), parser.where()); } else if (name.text() == "count_cached"sv) { query_.calcTotal = ModeCachedTotal; - if (!wasSelectFilter) query_.count = 0; + if (!wasSelectFilter) { + query_.count = 0; + } tok = parser.next_token(); if (tok.text() != "*"sv) throw Error(errParseSQL, "Expected '*', but found '%s' in query, %s", tok.text(), parser.where()); } else if (name.text() == "rank"sv) { @@ -164,7 +167,7 @@ int SQLParser::selectParse(tokenizer &parser) { break; } } - query_.aggregations_.push_back(std::move(entry)); + query_.aggregations_.emplace_back(std::move(entry)); } else { throw Error(errParams, "Unknown function name SQL - '%s', %s", name.text(), parser.where()); } @@ -180,14 +183,14 @@ int SQLParser::selectParse(tokenizer &parser) { if (!query_.CanAddSelectFilter()) { throw Error(errConflict, kAggregationWithSelectFieldsMsgError); } - query_.selectFilter_.push_back(std::string(nameWithCase.text())); - query_.count = UINT_MAX; + query_.selectFilter_.emplace_back(nameWithCase.text()); + query_.count = QueryEntry::kDefaultLimit; wasSelectFilter = true; } else if (name.text() == "*"sv) { if (!query_.CanAddSelectFilter()) { throw Error(errConflict, kAggregationWithSelectFieldsMsgError); } - query_.count = UINT_MAX; + query_.count = QueryEntry::kDefaultLimit; wasSelectFilter = true; query_.selectFilter_.clear(); } @@ -336,8 +339,14 @@ Variant token2kv(const token &currTok, tokenizer &parser, bool allowComposite) { return detectValueType(currTok).EvaluateOneOf( [&](KeyValueType::Int64) { return Variant(int64_t(stoll(value))); }, [&](KeyValueType::Double) { - char *p = 0; - return Variant(double(strtod(value.data(), &p))); + try { + using double_conversion::StringToDoubleConverter; + static const StringToDoubleConverter converter{StringToDoubleConverter::NO_FLAGS, NAN, NAN, nullptr, nullptr}; + int countOfCharsParsedAsDouble; + return Variant(converter.StringToDouble(value.data(), value.size(), &countOfCharsParsedAsDouble)); + } catch (...) { + throw Error(errParseSQL, "Unable to convert '%s' to double value", value); + } }, [&](KeyValueType::String) { return Variant(make_key_string(value.data(), value.length())); }, [](OneOf 0) { @@ -506,17 +516,17 @@ UpdateEntry SQLParser::parseUpdateField(tokenizer &parser) { size_t startPos = parser.getPos(); bool withArrayExpressions = false; - tok = parser.next_token(false); + tok = parser.next_token(tokenizer::flags::no_flags); if (tok.text() == "["sv) { updateField.Values().MarkArray(); for (;;) { - tok = parser.next_token(false); + tok = parser.next_token(tokenizer::flags::no_flags); if (tok.text() == "]") { if (updateField.Values().empty()) break; throw Error(errParseSQL, "Expected field value, but found ']' in query, %s", parser.where()); } addUpdateValue(tok, parser, updateField); - tok = parser.next_token(false); + tok = parser.next_token(tokenizer::flags::no_flags); if (tok.text() == "]"sv) break; if (tok.text() != ","sv) { throw Error(errParseSQL, "Expected ']' or ',', but found '%s' in query, %s", tok.text(), parser.where()); @@ -766,7 +776,7 @@ void SQLParser::parseEqualPositions(tokenizer &parser, std::vector; class SQLParser { public: - explicit SQLParser(Query &q); + explicit SQLParser(Query &q) noexcept : query_(q) {} /// Parses pure sql select query and initializes Query object data members as a result. /// @param q - sql query. diff --git a/cpp_src/core/query/sql/sqlsuggester.cc b/cpp_src/core/query/sql/sqlsuggester.cc index 35cc8b3b3..ffd299323 100644 --- a/cpp_src/core/query/sql/sqlsuggester.cc +++ b/cpp_src/core/query/sql/sqlsuggester.cc @@ -92,7 +92,8 @@ void SQLSuggester::getMatchingFieldsNames(const std::string &token, std::vector< auto dotPos = token.find('.'); for (auto &idx : namespaces[0].indexes) { if (idx.name_ == "#pk" || idx.name_ == "-tuple") continue; - if (isBlank(token) || checkIfStartsWith(token, idx.name_, dotPos != std::string::npos)) { + if (isBlank(token) || (dotPos != std::string::npos ? checkIfStartsWith(token, idx.name_) + : checkIfStartsWith(token, idx.name_))) { if (dotPos == std::string::npos) { variants.push_back(idx.name_); } else { diff --git a/cpp_src/core/querycache.h b/cpp_src/core/querycache.h index bef71db69..4d98bd1d2 100644 --- a/cpp_src/core/querycache.h +++ b/cpp_src/core/querycache.h @@ -10,42 +10,39 @@ namespace reindexer { struct QueryTotalCountCacheVal { QueryTotalCountCacheVal() = default; - QueryTotalCountCacheVal(const size_t& total) : total_count(total) {} + QueryTotalCountCacheVal(size_t total) noexcept : total_count(total) {} - size_t Size() const { return 0; } + size_t Size() const noexcept { return 0; } int total_count = -1; }; struct QueryCacheKey { - QueryCacheKey() {} - QueryCacheKey(const QueryCacheKey& other) : buf(other.buf) {} - QueryCacheKey& operator=(const QueryCacheKey& other) { - if (this != &other) { - buf = other.buf; - } - return *this; - } + QueryCacheKey() = default; + QueryCacheKey(QueryCacheKey&& other) = default; + QueryCacheKey(const QueryCacheKey& other) = default; + QueryCacheKey& operator=(QueryCacheKey&& other) = default; + QueryCacheKey& operator=(const QueryCacheKey& other) = delete; QueryCacheKey(const Query& q) { WrSerializer ser; q.Serialize(ser, (SkipJoinQueries | SkipMergeQueries | SkipLimitOffset)); buf.reserve(ser.Len()); buf.assign(ser.Buf(), ser.Buf() + ser.Len()); } - size_t Size() const { return sizeof(QueryCacheKey) + (buf.is_hdata() ? 0 : buf.size()); } + size_t Size() const noexcept { return sizeof(QueryCacheKey) + (buf.is_hdata() ? 0 : buf.size()); } QueryCacheKey(WrSerializer& ser) : buf(ser.Buf(), ser.Buf() + ser.Len()) {} h_vector buf; }; struct EqQueryCacheKey { - bool operator()(const QueryCacheKey& lhs, const QueryCacheKey& rhs) const { + bool operator()(const QueryCacheKey& lhs, const QueryCacheKey& rhs) const noexcept { return (lhs.buf.size() == rhs.buf.size()) && (memcmp(lhs.buf.data(), rhs.buf.data(), lhs.buf.size()) == 0); } }; struct HashQueryCacheKey { - size_t operator()(const QueryCacheKey& q) const { + size_t operator()(const QueryCacheKey& q) const noexcept { uint64_t hash[2]; MurmurHash3_x64_128(q.buf.data(), q.buf.size(), 0, &hash); return hash[0]; diff --git a/cpp_src/core/queryresults/additionaldatasource.h b/cpp_src/core/queryresults/additionaldatasource.h index 14ef43c24..288ed0937 100644 --- a/cpp_src/core/queryresults/additionaldatasource.h +++ b/cpp_src/core/queryresults/additionaldatasource.h @@ -29,4 +29,14 @@ class AdditionalDatasourceShardId : public IAdditionalDatasource { int shardId_; }; +class AdditionalDatasourceCSV : public IAdditionalDatasource { +public: + AdditionalDatasourceCSV(IEncoderDatasourceWithJoins *jds) : joinsDs_(jds) {} + void PutAdditionalFields(CsvBuilder &) const final {} + IEncoderDatasourceWithJoins *GetJoinsDatasource() final { return joinsDs_; } + +private: + IEncoderDatasourceWithJoins *joinsDs_; +}; + } // namespace reindexer diff --git a/cpp_src/core/queryresults/aggregationresult.h b/cpp_src/core/queryresults/aggregationresult.h index d8b669c7d..0f8259a82 100644 --- a/cpp_src/core/queryresults/aggregationresult.h +++ b/cpp_src/core/queryresults/aggregationresult.h @@ -50,8 +50,9 @@ class ParametersFields { }; struct FacetResult { - FacetResult(const h_vector &v, int c) : values(v), count(c) {} - FacetResult() : count(0) {} + FacetResult(const h_vector &v, int c) noexcept : values(v), count(c) {} + FacetResult() noexcept : count(0) {} + h_vector values; int count; }; @@ -131,6 +132,20 @@ struct AggregationResult { for (auto &v : fields) fieldsArray.Put(0, v); fieldsArray.End(); } + template + S &DumpFields(S &os) { + os << '['; + bool first = true; + for (const auto &f : fields) { + if (!first) { + os << ", "; + } + first = false; + os << f; + } + os << ']'; + return os; + } private: std::optional value_ = std::nullopt; diff --git a/cpp_src/core/queryresults/localqueryresults.cc b/cpp_src/core/queryresults/localqueryresults.cc index 9e508a3d6..d568bacf0 100644 --- a/cpp_src/core/queryresults/localqueryresults.cc +++ b/cpp_src/core/queryresults/localqueryresults.cc @@ -2,20 +2,21 @@ #include "additionaldatasource.h" #include "cluster/sharding/sharding.h" #include "core/cbinding/resultserializer.h" +#include "core/cjson/csvbuilder.h" #include "core/cjson/msgpackbuilder.h" #include "core/cjson/protobufbuilder.h" #include "core/itemimpl.h" #include "core/namespace/namespace.h" #include "joinresults.h" +#include "tools/catch_and_return.h" namespace reindexer { -void LocalQueryResults::AddNamespace(std::shared_ptr ns, bool noLock, const RdxContext &ctx) { +void LocalQueryResults::AddNamespace(NamespaceImplPtr ns, [[maybe_unused]] bool noLock) { assertrx(noLock); const NamespaceImpl *nsPtr = ns.get(); - auto strHolder = ns->StrHolder(noLock, ctx); - const auto it = - std::find_if(nsData_.cbegin(), nsData_.cend(), [nsPtr](const NsDataHolder &nsData) { return nsData.ns.get() == nsPtr; }); + auto strHolder = ns->strHolder(); + const auto it = std::find_if(nsData_.cbegin(), nsData_.cend(), [nsPtr](const NsDataHolder &nsData) { return nsData.ns == nsPtr; }); if (it != nsData_.cend()) { assertrx(it->strHolder.get() == strHolder.get()); return; @@ -23,8 +24,19 @@ void LocalQueryResults::AddNamespace(std::shared_ptr ns, bool noL nsData_.emplace_back(std::move(ns), std::move(strHolder)); } +void LocalQueryResults::AddNamespace(NamespaceImpl *ns, [[maybe_unused]] bool noLock) { + assertrx(noLock); + auto strHolder = ns->strHolder(); + const auto it = std::find_if(nsData_.cbegin(), nsData_.cend(), [ns](const NsDataHolder &nsData) { return nsData.ns == ns; }); + if (it != nsData_.cend()) { + assertrx(it->strHolder.get() == strHolder.get()); + return; + } + nsData_.emplace_back(ns, std::move(strHolder)); +} + void LocalQueryResults::RemoveNamespace(const NamespaceImpl *ns) { - const auto it = std::find_if(nsData_.begin(), nsData_.end(), [ns](const NsDataHolder &nsData) { return nsData.ns.get() == ns; }); + const auto it = std::find_if(nsData_.begin(), nsData_.end(), [ns](const NsDataHolder &nsData) { return nsData.ns == ns; }); assertrx(it != nsData_.end()); nsData_.erase(it); } @@ -245,6 +257,70 @@ Error LocalQueryResults::Iterator::GetJSON(WrSerializer &ser, bool withHdrLen) { return errOK; } +CsvOrdering LocalQueryResults::MakeCSVTagOrdering(unsigned limit, unsigned offset) const { + if (!ctxs[0].fieldsFilter_.empty()) { + std::vector ordering; + ordering.reserve(ctxs[0].fieldsFilter_.size()); + for (const auto &tag : ctxs[0].fieldsFilter_) { + ordering.emplace_back(tag); + } + return ordering; + } + + std::vector ordering; + ordering.reserve(128); + fast_hash_set fieldsTmIds; + WrSerializer ser; + const auto &tm = getTagsMatcher(0); + for (size_t i = offset; i < items_.size() && i < offset + limit; ++i) { + ser.Reset(); + encodeJSON(i, ser); + + gason::JsonParser parser; + auto jsonNode = parser.Parse(giftStr(ser.Slice())); + + for (const auto &child : jsonNode) { + auto [it, inserted] = fieldsTmIds.insert(tm.name2tag(child.key)); + if (inserted && *it > 0) { + ordering.emplace_back(*it); + } + } + } + return ordering; +} + +[[nodiscard]] Error LocalQueryResults::Iterator::GetCSV(WrSerializer &ser, CsvOrdering &ordering) noexcept { + try { + auto &itemRef = qr_->items_[idx_]; + assertrx(qr_->ctxs.size() > itemRef.Nsid()); + auto &ctx = qr_->ctxs[itemRef.Nsid()]; + + if (itemRef.Value().IsFree()) { + return Error(errNotFound, "Item not found"); + } + + ConstPayload pl(ctx.type_, itemRef.Value()); + CsvBuilder builder(ser, ordering); + CsvEncoder encoder(&ctx.tagsMatcher_, &ctx.fieldsFilter_); + + if (!qr_->joined_.empty()) { + joins::ItemIterator itemIt = (qr_->begin() + idx_).GetJoined(); + if (itemIt.getJoinedItemsCount() > 0) { + EncoderDatasourceWithJoins joinsDs(itemIt, qr_->ctxs, qr_->GetJoinedNsCtxIndex(itemRef.Nsid())); + h_vector *, 2> dss; + AdditionalDatasourceCSV ds(&joinsDs); + dss.push_back(&ds); + encoder.Encode(pl, builder, dss); + return errOK; + } + } + + encoder.Encode(pl, builder); + } + CATCH_AND_RETURN + return errOK; +} + Error LocalQueryResults::Iterator::GetCJSON(WrSerializer &ser, bool withHdrLen) { try { auto &itemRef = qr_->items_[idx_]; @@ -333,38 +409,19 @@ void LocalQueryResults::AddItem(Item &item, bool withData, bool enableHold) { } } -const TagsMatcher &LocalQueryResults::getTagsMatcher(int nsid) const noexcept { - assertrx(nsid < int(ctxs.size())); - return ctxs[nsid].tagsMatcher_; -} +const TagsMatcher &LocalQueryResults::getTagsMatcher(int nsid) const noexcept { return ctxs[nsid].tagsMatcher_; } -const PayloadType &LocalQueryResults::getPayloadType(int nsid) const noexcept { - assertrx(nsid < int(ctxs.size())); - return ctxs[nsid].type_; -} +const PayloadType &LocalQueryResults::getPayloadType(int nsid) const noexcept { return ctxs[nsid].type_; } -const FieldsSet &LocalQueryResults::getFieldsFilter(int nsid) const noexcept { - assertrx(nsid < int(ctxs.size())); - return ctxs[nsid].fieldsFilter_; -} +const FieldsSet &LocalQueryResults::getFieldsFilter(int nsid) const noexcept { return ctxs[nsid].fieldsFilter_; } -TagsMatcher &LocalQueryResults::getTagsMatcher(int nsid) noexcept { - assertrx(nsid < int(ctxs.size())); - return ctxs[nsid].tagsMatcher_; -} +TagsMatcher &LocalQueryResults::getTagsMatcher(int nsid) noexcept { return ctxs[nsid].tagsMatcher_; } -PayloadType &LocalQueryResults::getPayloadType(int nsid) noexcept { - assertrx(nsid < int(ctxs.size())); - return ctxs[nsid].type_; -} +PayloadType &LocalQueryResults::getPayloadType(int nsid) noexcept { return ctxs[nsid].type_; } -std::shared_ptr LocalQueryResults::getSchema(int nsid) const noexcept { - assertrx(nsid < int(ctxs.size())); - return ctxs[nsid].schema_; -} +std::shared_ptr LocalQueryResults::getSchema(int nsid) const noexcept { return ctxs[nsid].schema_; } int LocalQueryResults::getNsNumber(int nsid) const noexcept { - assertrx(nsid < int(ctxs.size())); assertrx(ctxs[nsid].schema_); return ctxs[nsid].schema_->GetProtobufNsNumber(); } @@ -378,4 +435,10 @@ void LocalQueryResults::addNSContext(const PayloadType &type, const TagsMatcher ctxs.push_back(Context(type, tagsMatcher, filter, std::move(schema))); } +LocalQueryResults::NsDataHolder::NsDataHolder(LocalQueryResults::NamespaceImplPtr &&_ns, StringsHolderPtr &&strHldr) noexcept + : nsPtr_{std::move(_ns)}, ns(nsPtr_.get()), strHolder{std::move(strHldr)} {} + +LocalQueryResults::NsDataHolder::NsDataHolder(NamespaceImpl *_ns, StringsHolderPtr &&strHldr) noexcept + : ns(_ns), strHolder(std::move(strHldr)) {} + } // namespace reindexer diff --git a/cpp_src/core/queryresults/localqueryresults.h b/cpp_src/core/queryresults/localqueryresults.h index 3000b8443..a64994ff6 100644 --- a/cpp_src/core/queryresults/localqueryresults.h +++ b/cpp_src/core/queryresults/localqueryresults.h @@ -19,6 +19,7 @@ struct ResultFetchOpts; struct ItemImplRawData; class SelectFunctionsHolder; class NamespaceImpl; +struct CsvOrdering; namespace joins { class NamespaceResults; @@ -32,6 +33,8 @@ class ItemIterator; class LocalQueryResults { public: + using NamespaceImplPtr = intrusive_ptr; + LocalQueryResults(); LocalQueryResults(const ItemRefVector::const_iterator &b, const ItemRefVector::const_iterator &e); LocalQueryResults(std::initializer_list l); @@ -55,6 +58,7 @@ class LocalQueryResults { h_vector GetNamespaces() const; bool IsCacheEnabled() const { return !nonCacheableData; } void SetOutputShardId(int shardId) noexcept { outputShardId = shardId; } + CsvOrdering MakeCSVTagOrdering(unsigned limit, unsigned offset) const; class Iterator { public: @@ -62,6 +66,8 @@ class LocalQueryResults { Error GetCJSON(WrSerializer &wrser, bool withHdrLen = true); Error GetMsgPack(WrSerializer &wrser, bool withHdrLen = true); Error GetProtobuf(WrSerializer &wrser, bool withHdrLen = true); + [[nodiscard]] Error GetCSV(WrSerializer &wrser, CsvOrdering &ordering) noexcept; + // use enableHold = false only if you are sure that the item will be destroyed before the LocalQueryResults Item GetItem(bool enableHold = true); joins::ItemIterator GetJoined(); @@ -95,7 +101,7 @@ class LocalQueryResults { struct Context; // precalc context size - static constexpr int kSizeofContext = 208; // sizeof(PayloadType) + sizeof(TagsMatcher) + sizeof(FieldsSet) + sizeof(shared_ptr); + static constexpr int kSizeofContext = 264; // sizeof(PayloadType) + sizeof(TagsMatcher) + sizeof(FieldsSet) + sizeof(shared_ptr); // Order of storing contexts for namespaces: // [0] - main NS context @@ -120,10 +126,15 @@ class LocalQueryResults { void SaveRawData(ItemImplRawData &&); - void AddNamespace(std::shared_ptr ns, bool noLock, const RdxContext &); + // Add owning ns pointer + // noLock has always to be 'true' (i.e. this method can only be called unders Namespace's lock) + void AddNamespace(NamespaceImplPtr, bool noLock); + // Add non-owning ns pointer + // noLock has always to be 'true' (i.e. this method can only be called unders Namespace's lock) + void AddNamespace(NamespaceImpl *, bool noLock); void RemoveNamespace(const NamespaceImpl *ns); bool IsNamespaceAdded(const NamespaceImpl *ns) const noexcept { - return std::find_if(nsData_.cbegin(), nsData_.cend(), [ns](const NsDataHolder &nsData) { return nsData.ns.get() == ns; }) != + return std::find_if(nsData_.cbegin(), nsData_.cend(), [ns](const NsDataHolder &nsData) { return nsData.ns == ns; }) != nsData_.cend(); } @@ -138,17 +149,23 @@ class LocalQueryResults { ItemRefVector items_; std::vector rawDataHolder_; friend SelectFunctionsHolder; - struct NsDataHolder { - NsDataHolder(std::shared_ptr &&ns_, StringsHolderPtr &&strHldr) noexcept - : ns{std::move(ns_)}, strHolder{std::move(strHldr)} {} + class NsDataHolder { + public: + NsDataHolder(NamespaceImplPtr &&_ns, StringsHolderPtr &&strHldr) noexcept; + NsDataHolder(NamespaceImpl *_ns, StringsHolderPtr &&strHldr) noexcept; NsDataHolder(const NsDataHolder &) = delete; NsDataHolder(NsDataHolder &&) noexcept = default; NsDataHolder &operator=(const NsDataHolder &) = delete; NsDataHolder &operator=(NsDataHolder &&) = default; - std::shared_ptr ns; + private: + NamespaceImplPtr nsPtr_; + + public: + NamespaceImpl *ns; StringsHolderPtr strHolder; }; + h_vector nsData_; std::vector stringsHolder_; }; diff --git a/cpp_src/core/queryresults/queryresults.cc b/cpp_src/core/queryresults/queryresults.cc index d8c6327f2..eb10d928a 100644 --- a/cpp_src/core/queryresults/queryresults.cc +++ b/cpp_src/core/queryresults/queryresults.cc @@ -7,6 +7,7 @@ #include "core/type_consts.h" #include "estl/overloaded.h" #include "joinresults.h" +#include "tools/catch_and_return.h" namespace reindexer { @@ -507,13 +508,13 @@ Error QueryResults::Iterator::GetCJSON(WrSerializer& wrser, bool withHdrLen) { break; } - Error err = std::visit(overloaded{[&](LocalQueryResults::Iterator&& it) { + Error err = std::visit(overloaded{[&](LocalQueryResults::Iterator it) { if (qr_->local_->hasCompatibleTm) { return it.GetCJSON(wrser, withHdrLen); } return getCJSONviaJSON(wrser, withHdrLen, it); }, - [&](client::QueryResults::Iterator&& it) { + [&](client::QueryResults::Iterator it) { if (qr_->type_ == Type::SingleRemote || qr_->remote_[size_t(qr_->curQrId_)].hasCompatibleTm) { return it.GetCJSON(wrser, withHdrLen); } @@ -536,8 +537,8 @@ Error QueryResults::Iterator::GetMsgPack(WrSerializer& wrser, bool withHdrLen) { Error QueryResults::Iterator::GetProtobuf(WrSerializer& wrser, bool withHdrLen) { try { - return std::visit(overloaded{[&wrser, withHdrLen](LocalQueryResults::Iterator&& it) { return it.GetProtobuf(wrser, withHdrLen); }, - [](client::QueryResults::Iterator&&) { + return std::visit(overloaded{[&wrser, withHdrLen](LocalQueryResults::Iterator it) { return it.GetProtobuf(wrser, withHdrLen); }, + [](const client::QueryResults::Iterator&) { return Error(errParams, "Protobuf is not supported for distributed and proxied queries"); // return it.GetProtobuf(wrser, withHdrLen); }}, @@ -606,6 +607,13 @@ void QueryResults::QrMetaData::ResetJoinStorage(int64_t idx) const { } } +[[nodiscard]] Error QueryResults::Iterator::GetCSV(WrSerializer& ser, CsvOrdering& ordering) noexcept { + try { + return std::visit(overloaded{[&ser, &ordering](auto it) { return it.GetCSV(ser, ordering); }}, getVariantIt()); + } + CATCH_AND_RETURN +} + joins::ItemIterator QueryResults::Iterator::GetJoined(std::vector* storage) { if (qr_->type_ == Type::Local) { // NOLINTNEXTLINE(bugprone-unchecked-optional-access) diff --git a/cpp_src/core/queryresults/queryresults.h b/cpp_src/core/queryresults/queryresults.h index 007ba1ee0..42ce467c5 100644 --- a/cpp_src/core/queryresults/queryresults.h +++ b/cpp_src/core/queryresults/queryresults.h @@ -83,6 +83,8 @@ class QueryResults { } public: + using NamespaceImplPtr = intrusive_ptr; + QueryResults(int flags = 0); ~QueryResults(); QueryResults(QueryResults &&); @@ -220,6 +222,8 @@ class QueryResults { Error GetCJSON(WrSerializer &wrser, bool withHdrLen = true); Error GetMsgPack(WrSerializer &wrser, bool withHdrLen = true); Error GetProtobuf(WrSerializer &wrser, bool withHdrLen = true); + [[nodiscard]] Error GetCSV(WrSerializer &wrser, CsvOrdering &ordering) noexcept; + // use enableHold = false only if you are sure that the item will be destroyed before the LocalQueryResults Item GetItem(bool enableHold = true); joins::ItemIterator GetJoined(std::vector *storage = nullptr); diff --git a/cpp_src/core/querystat.h b/cpp_src/core/querystat.h index de3bbfdec..4d80138c2 100644 --- a/cpp_src/core/querystat.h +++ b/cpp_src/core/querystat.h @@ -1,6 +1,7 @@ #pragma once #include +#include "core/nsselecter/explaincalc.h" #include "estl/fast_hash_map.h" #include "namespace/namespacestat.h" #include "perfstatcounter.h" @@ -57,7 +58,7 @@ class QueryStatCalculator { if (enable_) tmStart = std::chrono::high_resolution_clock::now(); } - QueryStatCalculator(Logger logger) : enable_(true), logger_(std::move(logger)) { + QueryStatCalculator(Logger logger, bool enable = true) : enable_(enable), logger_(std::move(logger)) { if (enable_) tmStart = std::chrono::high_resolution_clock::now(); } ~QueryStatCalculator() { @@ -77,12 +78,57 @@ class QueryStatCalculator { } } + template + auto LogDuration(Type& var, Method method, Args&&... args) { + return exec([&var, &method](Args&&... aa) { return (var.*method)(std::forward(aa)...); }, std::forward(args)...); + } + + template + auto LogFlushDuration(Type& var, Method method, Args&&... args) { + return LogDuration(var, method, std::forward(args)...); + } + + template + auto CreateLock(Type& var, Method method, Args&&... args) { + return LogDuration(args)...))::MutexType::mark)>( + var, method, std::forward(args)...); + } + + template