diff --git a/.clang-format b/.clang-format index a148e5429..130ce61da 100644 --- a/.clang-format +++ b/.clang-format @@ -3,27 +3,71 @@ Language: Cpp # BasedOnStyle: Google AccessModifierOffset: -4 AlignAfterOpenBracket: Align -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true -AlignOperands: true -AlignTrailingComments: true +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: false +AllowBreakBeforeNoexceptSpecifier: Never +AllowShortBlocksOnASingleLine: Never AllowShortCaseLabelsOnASingleLine: false +AllowShortCompoundRequirementOnASingleLine: true +AllowShortEnumsOnASingleLine: true AllowShortFunctionsOnASingleLine: All -AllowShortIfStatementsOnASingleLine: true +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLambdasOnASingleLine: All AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true -AlwaysBreakTemplateDeclarations: true +AlwaysBreakTemplateDeclarations: Yes +AttributeMacros: + - __capability BinPackArguments: true BinPackParameters: true +BitFieldColonSpacing: Both BraceWrapping: + AfterCaseLabel: false AfterClass: false - AfterControlStatement: false + AfterControlStatement: Never AfterEnum: false + AfterExternBlock: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false @@ -31,68 +75,172 @@ BraceWrapping: AfterUnion: false BeforeCatch: false BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAdjacentStringLiterals: true +BreakAfterAttributes: Leave +BreakAfterJavaFieldAnnotations: false +BreakArrays: true BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always BreakBeforeBraces: Attach +BreakBeforeInlineASMColon: OnlyMultiline BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakAfterJavaFieldAnnotations: false +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon BreakStringLiterals: true ColumnLimit: 140 CommentPragmas: '^ IWYU pragma:' -ConstructorInitializerAllOnOneLineOrOnePerLine: true +CompactNamespaces: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: true DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock ExperimentalAutoDetectBinPacking: false -ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] FixNamespaceComments: true -IncludeCategories: +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: - Regex: '^<.*\.h>' Priority: 1 + SortPriority: 0 + CaseSensitive: false - Regex: '^<.*' Priority: 2 + SortPriority: 0 + CaseSensitive: false - Regex: '.*' Priority: 3 + SortPriority: 0 + CaseSensitive: false IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false IndentCaseLabels: true +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true IndentWidth: 4 IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: false +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: false +KeepEmptyLinesAtEOF: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None +ObjCBinPackProtocolList: Auto ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: false +PackConstructorInitializers: NextLine +PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakScopeResolution: 500 PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Left +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer ReflowComments: true -SortIncludes: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SkipMacroDefinitionBody: false +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: Never SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterPlacementOperator: true + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false SpacesBeforeTrailingComments: 2 -SpacesInAngles: false +SpacesInAngles: Never SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false SpacesInSquareBrackets: false -SortUsingDeclarations: false Standard: Auto +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION TabWidth: 4 UseTab: Always +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE ... diff --git a/bindings/consts.go b/bindings/consts.go index e6736fdcc..9f5f03202 100644 --- a/bindings/consts.go +++ b/bindings/consts.go @@ -2,7 +2,7 @@ package bindings const CInt32Max = int(^uint32(0) >> 1) -const ReindexerVersion = "v3.26.0" +const ReindexerVersion = "v3.27.0" // public go consts from type_consts.h and reindexer_ctypes.h const ( diff --git a/changelog.md b/changelog.md index dfc1864f2..679ac8167 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,31 @@ +# Version 3.27.0 (09.07.2024) +## Core +- [fea] Decreased heap allocations count in general selection pipeline +- [fea] Optimized allocations in fulltext query preparation phase +- [fea] Added heuristic for [copying transactions](readme.md#transactions-commit-strategies), which allows to avoid extra namespace copying if there are no select queries expected. May be disabled via `REINDEXER_NOTXHEURISTIC` env variable +- [fea] Added automatic default values setting on new index creation (except UUID and rtree indexes) +- [fea] Improved comparators performance for `IN()`-conditions +- [fix] Fixed potential deadlock on data reading at server startup +- [fix] Fixed double fulltext index building after copying transactions +- [fix] Fixed incorrect composite indexes detection in some of `UPDATE`-queries (it could lead to random crashes during updates) +- [ref] Deprecated fulltext index option `warmup_on_ns_copy`. In current implementation reindexer will always build fulltext index after copying transaction +- [ref] Removed some of the [default RU stop-words](https://github.com/Restream/reindexer/blob/v3.27.0/cpp_src/core/ft/stopwords/stop_ru.cc) + +## Replication +- [fix] Fixed `updated_unix_nano` in `#memstats` table +- [fix] Disabled `statement-based update replication` - in some cases it could lead to crashes and data loss due to logic conflicts in WAL + +## Reindexer server +- [fea] Added `SO_LINGER`-option in cases when client drops connection first. This allows to prevent `TIME_WAIT` socket state on the client's side + +## Deploy +- [fea] Added build for Ubuntu 24.04 +- [ref] CentOS 7 reindexer repo is no longer supported due to CentOS 7 EOL + +## Face +- [fea] Added RU language in UI +- [fix] Fixed input width in settings section + # Version 3.26.0 (21.06.2024) ## Core diff --git a/clang-tidy/.clang-tidy b/clang-tidy/.clang-tidy deleted file mode 100644 index 063df74f1..000000000 --- a/clang-tidy/.clang-tidy +++ /dev/null @@ -1,31 +0,0 @@ -Checks: 'clang-diagnostic-*, - clang-analyzer-*, - performance-*, - bugprone-*, - -bugprone-exception-escape, - -bugprone-branch-clone, - -bugprone-easily-swappable-parameters, - -bugprone-macro-parentheses, - -bugprone-signed-char-misuse, - -bugprone-narrowing-conversions, - -bugprone-reserved-identifier, - -bugprone-implicit-widening-of-multiplication-result, - -bugprone-assignment-in-if-condition, - -bugprone-parent-virtual-call, - -bugprone-integer-division, - -bugprone-unhandled-self-assignment, - -bugprone-inc-dec-in-conditions, - -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, - -performance-no-int-to-ptr, - -performance-enum-size, - -performance-avoid-endl' -# clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling - too many unnecessary warning in vendored code -# performance-no-int-to-ptr - consider how to fix this -# bugprone-macro-parentheses - consider fixing -WarningsAsErrors: '*' -HeaderFilterRegex: '.*(?= 4.0.0 are given under - # the top level key 'Diagnostics' in the output yaml files - mergekey = "Diagnostics" - merged=[] - for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): - content = yaml.safe_load(open(replacefile, 'r')) - if not content: - continue # Skip empty files. - merged.extend(content.get(mergekey, [])) - - if merged: - # MainSourceFile: The key is required by the definition inside - # include/clang/Tooling/ReplacementsYaml.h, but the value - # is actually never used inside clang-apply-replacements, - # so we set it to '' here. - output = {'MainSourceFile': '', mergekey: merged} - with open(mergefile, 'w') as out: - yaml.safe_dump(output, out) - else: - # Empty the file: - open(mergefile, 'w').close() - - -def find_binary(arg, name, build_path): - """Get the path for a binary or exit""" - if arg: - if shutil.which(arg): - return arg - else: - raise SystemExit( - "error: passed binary '{}' was not found or is not executable" - .format(arg)) - - built_path = os.path.join(build_path, "bin", name) - binary = shutil.which(name) or shutil.which(built_path) - if binary: - return binary - else: - raise SystemExit( - "error: failed to find {} in $PATH or at {}" - .format(name, built_path)) - - -def apply_fixes(args, clang_apply_replacements_binary, tmpdir): - """Calls clang-apply-fixes on a given directory.""" - invocation = [clang_apply_replacements_binary] - invocation.append('-ignore-insert-conflict') - if args.format: - invocation.append('-format') - if args.style: - invocation.append('-style=' + args.style) - invocation.append(tmpdir) - subprocess.call(invocation) - - -def run_tidy(args, clang_tidy_binary, tmpdir, build_path, queue, lock, - failed_files): - """Takes filenames out of queue and runs clang-tidy on them.""" - while True: - name = queue.get() - invocation = get_tidy_invocation(name, clang_tidy_binary, args.checks, - tmpdir, build_path, args.header_filter, - args.allow_enabling_alpha_checkers, - args.extra_arg, args.extra_arg_before, - args.quiet, args.config_file, args.config, - args.line_filter, args.use_color, - args.plugins) - - proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output, err = proc.communicate() - if proc.returncode != 0: - if proc.returncode < 0: - msg = "%s: terminated by signal %d\n" % (name, -proc.returncode) - err += msg.encode('utf-8') - failed_files.append(name) - with lock: - sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) - if len(err) > 0: - sys.stdout.flush() - sys.stderr.write(err.decode('utf-8')) - queue.task_done() - - -def main(): - parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' - 'in a compilation database. Requires ' - 'clang-tidy and clang-apply-replacements in ' - '$PATH or in your build directory.') - parser.add_argument('-allow-enabling-alpha-checkers', - action='store_true', help='allow alpha checkers from ' - 'clang-analyzer.') - parser.add_argument('-clang-tidy-binary', metavar='PATH', - default='clang-tidy-18', - help='path to clang-tidy binary') - parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', - default='clang-apply-replacements-18', - help='path to clang-apply-replacements binary') - parser.add_argument('-checks', default=None, - help='checks filter, when not specified, use clang-tidy ' - 'default') - config_group = parser.add_mutually_exclusive_group() - config_group.add_argument('-config', default=None, - help='Specifies a configuration in YAML/JSON format: ' - ' -config="{Checks: \'*\', ' - ' CheckOptions: {x: y}}" ' - 'When the value is empty, clang-tidy will ' - 'attempt to find a file named .clang-tidy for ' - 'each source file in its parent directories.') - config_group.add_argument('-config-file', default=None, - help='Specify the path of .clang-tidy or custom config ' - 'file: e.g. -config-file=/some/path/myTidyConfigFile. ' - 'This option internally works exactly the same way as ' - '-config option after reading specified config file. ' - 'Use either -config-file or -config, not both.') - parser.add_argument('-header-filter', default=None, - help='regular expression matching the names of the ' - 'headers to output diagnostics from. Diagnostics from ' - 'the main file of each translation unit are always ' - 'displayed.') - parser.add_argument('-line-filter', default=None, - help='List of files with line ranges to filter the' - 'warnings.') - if yaml: - parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', - help='Create a yaml file to store suggested fixes in, ' - 'which can be applied with clang-apply-replacements.') - parser.add_argument('-j', type=int, default=0, - help='number of tidy instances to be run in parallel.') - parser.add_argument('files', nargs='*', default=['.*'], - help='files to be processed (regex on path)') - parser.add_argument('-fix', action='store_true', help='apply fix-its') - parser.add_argument('-format', action='store_true', help='Reformat code ' - 'after applying fixes') - parser.add_argument('-style', default='file', help='The style of reformat ' - 'code after applying fixes') - parser.add_argument('-use-color', type=strtobool, nargs='?', const=True, - help='Use colors in diagnostics, overriding clang-tidy\'s' - ' default behavior. This option overrides the \'UseColor' - '\' option in .clang-tidy file, if any.') - parser.add_argument('-p', dest='build_path', - help='Path used to read a compile command database.') - parser.add_argument('-extra-arg', dest='extra_arg', - action='append', default=[], - help='Additional argument to append to the compiler ' - 'command line.') - parser.add_argument('-extra-arg-before', dest='extra_arg_before', - action='append', default=[], - help='Additional argument to prepend to the compiler ' - 'command line.') - parser.add_argument('-ignore', default=DEFAULT_CLANG_TIDY_IGNORE, - help='File path to clang-tidy-ignore') - parser.add_argument('-quiet', action='store_true', - help='Run clang-tidy in quiet mode') - parser.add_argument('-load', dest='plugins', - action='append', default=[], - help='Load the specified plugin in clang-tidy.') - args = parser.parse_args() - - db_path = 'compile_commands.json' - - if args.build_path is not None: - build_path = args.build_path - else: - # Find our database - build_path = find_compilation_database(db_path) - - clang_tidy_binary = find_binary(args.clang_tidy_binary, "clang-tidy", - build_path) - - tmpdir = None - if args.fix or (yaml and args.export_fixes): - clang_apply_replacements_binary = find_binary( - args.clang_apply_replacements_binary, "clang-apply-replacements", - build_path) - tmpdir = tempfile.mkdtemp() - - try: - invocation = get_tidy_invocation("", clang_tidy_binary, args.checks, - None, build_path, args.header_filter, - args.allow_enabling_alpha_checkers, - args.extra_arg, args.extra_arg_before, - args.quiet, args.config_file, args.config, - args.line_filter, args.use_color, - args.plugins) - invocation.append('-list-checks') - invocation.append('-') - if args.quiet: - # Even with -quiet we still want to check if we can call clang-tidy. - with open(os.devnull, 'w') as dev_null: - subprocess.check_call(invocation, stdout=dev_null) - else: - subprocess.check_call(invocation) - except: - print("Unable to run clang-tidy.", file=sys.stderr) - sys.exit(1) - - # Load the database and extract all files. - database = json.load(open(os.path.join(build_path, db_path))) - files = set([make_absolute(entry['file'], entry['directory']) - for entry in database]) - files, excluded = filter_files(args.ignore, files) - if excluded: - print("Excluding the following files:\n" + "\n".join(excluded) + "\n") - - max_task = args.j - if max_task == 0: - max_task = multiprocessing.cpu_count() - - # Build up a big regexy filter from all command line arguments. - file_name_re = re.compile('|'.join(args.files)) - - return_code = 0 - try: - # Spin up a bunch of tidy-launching threads. - task_queue = queue.Queue(max_task) - # List of files with a non-zero return code. - failed_files = [] - lock = threading.Lock() - for _ in range(max_task): - t = threading.Thread(target=run_tidy, - args=(args, clang_tidy_binary, tmpdir, build_path, - task_queue, lock, failed_files)) - t.daemon = True - t.start() - - # Fill the queue with files. - for name in files: - if file_name_re.search(name): - task_queue.put(name) - - # Wait for all threads to be done. - task_queue.join() - if len(failed_files): - return_code = 1 - - except KeyboardInterrupt: - # This is a sad hack. Unfortunately subprocess goes - # bonkers with ctrl-c and we start forking merrily. - print('\nCtrl-C detected, goodbye.') - if tmpdir: - shutil.rmtree(tmpdir) - os.kill(0, 9) - - if yaml and args.export_fixes: - print('Writing fixes to ' + args.export_fixes + ' ...') - try: - merge_replacement_files(tmpdir, args.export_fixes) - except: - print('Error exporting fixes.\n', file=sys.stderr) - traceback.print_exc() - return_code=1 - - if args.fix: - print('Applying fixes ...') - try: - apply_fixes(args, clang_apply_replacements_binary, tmpdir) - except: - print('Error applying fixes.\n', file=sys.stderr) - traceback.print_exc() - return_code = 1 - - if tmpdir: - shutil.rmtree(tmpdir) - sys.exit(return_code) - - -if __name__ == '__main__': - main() diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index d5e8b4712..d51ab3165 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -44,7 +44,7 @@ else() option(LINK_RESOURCES "Link web resources as binary data" ON) endif() -set(REINDEXER_VERSION_DEFAULT "3.26.0") +set(REINDEXER_VERSION_DEFAULT "3.27.0") if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "RelWithDebInfo") diff --git a/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh new file mode 100755 index 000000000..d189d3841 --- /dev/null +++ b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh @@ -0,0 +1,195 @@ +#!/bin/bash +# Task: https://github.com/restream/reindexer/-/issues/1188 +set -e + +function KillAndRemoveServer { + local pid=$1 + kill $pid + wait $pid + yum remove -y 'reindexer*' > /dev/null +} + +function WaitForDB { + # wait until DB is loaded + set +e # disable "exit on error" so the script won't stop when DB's not loaded yet + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + while [[ $is_connected != "test" ]] + do + sleep 2 + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + done + set -e +} + +function CompareNamespacesLists { + local ns_list_actual=$1 + local ns_list_expected=$2 + local pid=$3 + + diff=$(echo ${ns_list_actual[@]} ${ns_list_expected[@]} | tr ' ' '\n' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: namespaces list not changed" + else + echo "##### FAIL: namespaces list was changed" + echo "expected: $ns_list_expected" + echo "actual: $ns_list_actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + +function CompareMemstats { + local actual=$1 + local expected=$2 + local pid=$3 + diff=$(echo ${actual[@]} ${expected[@]} | tr ' ' '\n' | sed 's/\(.*\),$/\1/' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: memstats not changed" + else + echo "##### FAIL: memstats was changed" + echo "expected: $expected" + echo "actual: $actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + + +RX_SERVER_CURRENT_VERSION_RPM="$(basename build/reindexer-*server*.rpm)" +VERSION_FROM_RPM=$(echo "$RX_SERVER_CURRENT_VERSION_RPM" | grep -o '.*server-..') +VERSION=$(echo ${VERSION_FROM_RPM: -2:1}) # one-digit version + +echo "## choose latest release rpm file" +if [ $VERSION == 3 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 3) + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +elif [ $VERSION == 4 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 4) + # replicationstats ns added for v4 + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\n#replicationstats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +else + echo "Unknown version" + exit 1 +fi + +echo "## downloading latest release rpm file: $LATEST_RELEASE" +curl "http://repo.itv.restr.im/itv-api-ng/7/x86_64/$LATEST_RELEASE" --output $LATEST_RELEASE; +echo "## downloading example DB" +curl "https://git.restream.ru/MaksimKravchuk/reindexer_testdata/-/raw/master/big.zip" --output big.zip; +unzip -o big.zip # unzips into mydb_big.rxdump; + +ADDRESS="cproto://127.0.0.1:6534/" +DB_NAME="test" + +memstats_expected=$'[ +{"replication":{"data_hash":24651210926,"data_count":3}}, +{"replication":{"data_hash":6252344969,"data_count":1}}, +{"replication":{"data_hash":37734732881,"data_count":28}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":1024095024522,"data_count":1145}}, +{"replication":{"data_hash":8373644068,"data_count":1315}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":7404222244,"data_count":97}}, +{"replication":{"data_hash":94132837196,"data_count":4}}, +{"replication":{"data_hash":1896088071,"data_count":2}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":-672103903,"data_count":33538}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":6833710705,"data_count":1}}, +{"replication":{"data_hash":5858155773472,"data_count":4500}}, +{"replication":{"data_hash":-473221280268823592,"data_count":65448}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":8288213744,"data_count":3}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":0,"data_count":0}}, +{"replication":{"data_hash":354171024786967,"data_count":3941}}, +{"replication":{"data_hash":-6520334670,"data_count":35886}}, +{"replication":{"data_hash":112772074632,"data_count":281}}, +{"replication":{"data_hash":-12679568198538,"data_count":1623116}} +] +Returned 27 rows' + +echo "##### Forward compatibility test #####" + +DB_PATH=$(pwd)"/rx_db" + +echo "Database: "$DB_PATH + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +# run RX server with disabled logging +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb; +sleep 1; + +namespaces_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_1; +CompareNamespacesLists "${namespaces_1[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_1[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l0 --corelog=none --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_2; +CompareNamespacesLists "${namespaces_2[@]}" "${namespaces_1[@]}" $server_pid; + +memstats_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_2[@]}" "${memstats_1[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; +sleep 1; + +echo "##### Backward compatibility test #####" + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f mydb_big.rxdump --createdb; +sleep 1; + +namespaces_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_3; +CompareNamespacesLists "${namespaces_3[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_3[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_4; +CompareNamespacesLists "${namespaces_4[@]}" "${namespaces_3[@]}" $server_pid; + +memstats_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select replication.data_hash, replication.data_count from #memstats'); +CompareMemstats "${memstats_4[@]}" "${memstats_3[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; diff --git a/cpp_src/cmd/reindexer_tool/CMakeLists.txt b/cpp_src/cmd/reindexer_tool/CMakeLists.txt index 0d6bd7117..4b39f2425 100644 --- a/cpp_src/cmd/reindexer_tool/CMakeLists.txt +++ b/cpp_src/cmd/reindexer_tool/CMakeLists.txt @@ -40,7 +40,7 @@ endif() file(GLOB_RECURSE SRCS *.h *.cc) add_executable(${TARGET} ${SRCS}) -# Enable export to provide readble stacktraces +# Enable export to provide readable stacktraces set_property(TARGET ${TARGET} PROPERTY ENABLE_EXPORTS 1) if (NOT MSVC AND NOT WITH_STDLIB_DEBUG) diff --git a/cpp_src/core/cjson/cjsonbuilder.cc b/cpp_src/core/cjson/cjsonbuilder.cc index bd70aaee2..9c32c5c4d 100644 --- a/cpp_src/core/cjson/cjsonbuilder.cc +++ b/cpp_src/core/cjson/cjsonbuilder.cc @@ -31,7 +31,7 @@ CJsonBuilder CJsonBuilder::Array(int tagName, ObjType type) { return CJsonBuilder(*ser_, type, tm_, tagName); } -void CJsonBuilder::Array(int tagName, span data, int /*offset*/) { +void CJsonBuilder::Array(int tagName, span data, int /*offset*/) { ser_->PutCTag(ctag{TAG_ARRAY, tagName}); ser_->PutCArrayTag(carraytag(data.size(), TAG_UUID)); for (auto d : data) { diff --git a/cpp_src/core/cjson/cjsonbuilder.h b/cpp_src/core/cjson/cjsonbuilder.h index a64494a34..ed3d79c8d 100644 --- a/cpp_src/core/cjson/cjsonbuilder.h +++ b/cpp_src/core/cjson/cjsonbuilder.h @@ -34,28 +34,28 @@ class CJsonBuilder { } CJsonBuilder Object(std::nullptr_t) { return Object(0); } - void Array(int tagName, span data, int /*offset*/ = 0) { + void Array(int tagName, span data, int /*offset*/ = 0) { ser_->PutCTag(ctag{TAG_ARRAY, tagName}); ser_->PutCArrayTag(carraytag(data.size(), TAG_STRING)); for (auto d : data) ser_->PutVString(d); } - void Array(int tagName, span data, int offset = 0); - void Array(int tagName, span data, int /*offset*/ = 0) { + void Array(int tagName, span data, int offset = 0); + void Array(int tagName, span data, int /*offset*/ = 0) { ser_->PutCTag(ctag{TAG_ARRAY, tagName}); ser_->PutCArrayTag(carraytag(data.size(), TAG_VARINT)); for (auto d : data) ser_->PutVarint(d); } - void Array(int tagName, span data, int /*offset*/ = 0) { + void Array(int tagName, span data, int /*offset*/ = 0) { ser_->PutCTag(ctag{TAG_ARRAY, tagName}); ser_->PutCArrayTag(carraytag(data.size(), TAG_VARINT)); for (auto d : data) ser_->PutVarint(d); } - void Array(int tagName, span data, int /*offset*/ = 0) { + void Array(int tagName, span data, int /*offset*/ = 0) { ser_->PutCTag(ctag{TAG_ARRAY, tagName}); ser_->PutCArrayTag(carraytag(data.size(), TAG_BOOL)); for (auto d : data) ser_->PutBool(d); } - void Array(int tagName, span data, int /*offset*/ = 0) { + void Array(int tagName, span data, int /*offset*/ = 0) { ser_->PutCTag(ctag{TAG_ARRAY, tagName}); ser_->PutCArrayTag(carraytag(data.size(), TAG_DOUBLE)); for (auto d : data) ser_->PutDouble(d); diff --git a/cpp_src/core/cjson/cjsondecoder.cc b/cpp_src/core/cjson/cjsondecoder.cc index a2022694e..10bd1a7f8 100644 --- a/cpp_src/core/cjson/cjsondecoder.cc +++ b/cpp_src/core/cjson/cjsondecoder.cc @@ -11,6 +11,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs const ctag tag = rdser.GetCTag(); TagType tagType = tag.Type(); if (tagType == TAG_END) { + recoder.Serialize(wrser); wrser.PutCTag(kCTagEnd); return false; } @@ -70,7 +71,7 @@ bool CJsonDecoder::decodeCJson(Payload &pl, Serializer &rdser, WrSerializer &wrs [&](OneOf) { wrser.PutCTag(ctag{fieldType.ToTagType(), tagName, field}); }, - [&](OneOf) { assertrx(0); }); + [&](OneOf) { assertrx(false); }); } } } else { diff --git a/cpp_src/core/cjson/cjsondecoder.h b/cpp_src/core/cjson/cjsondecoder.h index 290edb4e7..69221c7b6 100644 --- a/cpp_src/core/cjson/cjsondecoder.h +++ b/cpp_src/core/cjson/cjsondecoder.h @@ -1,8 +1,8 @@ #pragma once +#include #include "core/cjson/tagspath.h" #include "core/payload/fieldsset.h" -#include #include "core/payload/payloadiface.h" #include "core/type_consts.h" @@ -17,8 +17,10 @@ class Recoder { [[nodiscard]] virtual TagType Type(TagType oldTagType) = 0; virtual void Recode(Serializer &, WrSerializer &) const = 0; virtual void Recode(Serializer &, Payload &, int tagName, WrSerializer &) = 0; - [[nodiscard]] virtual bool Match(int field) const noexcept = 0; - [[nodiscard]] virtual bool Match(const TagsPath &) const noexcept = 0; + [[nodiscard]] virtual bool Match(int field) noexcept = 0; + [[nodiscard]] virtual bool Match(TagType, const TagsPath &) = 0; + virtual void Serialize(WrSerializer &wrser) = 0; + virtual bool Reset() = 0; virtual ~Recoder() = default; }; @@ -91,9 +93,10 @@ class CJsonDecoder { public: RX_ALWAYS_INLINE DummyRecoder MakeCleanCopy() const noexcept { return DummyRecoder(); } RX_ALWAYS_INLINE bool Recode(Serializer &, WrSerializer &) const noexcept { return false; } - RX_ALWAYS_INLINE bool Recode(Serializer &, Payload &, [[maybe_unused]] int tagName, WrSerializer &) const noexcept { return false; } - RX_ALWAYS_INLINE TagType RegisterTagType(TagType oldTagType, [[maybe_unused]] int field) const noexcept { return oldTagType; } - RX_ALWAYS_INLINE TagType RegisterTagType(TagType oldTagType, const TagsPath &) const noexcept { return oldTagType; } + RX_ALWAYS_INLINE bool Recode(Serializer &, Payload &, int, WrSerializer &) const noexcept { return false; } + RX_ALWAYS_INLINE TagType RegisterTagType(TagType tagType, int) const noexcept { return tagType; } + RX_ALWAYS_INLINE TagType RegisterTagType(TagType tagType, const TagsPath &) const noexcept { return tagType; } + RX_ALWAYS_INLINE void Serialize(WrSerializer &) const {} }; class DefaultRecoder { public: @@ -107,20 +110,21 @@ class CJsonDecoder { } return needToRecode_; } - RX_ALWAYS_INLINE bool Recode(Serializer &s, Payload &p, int tagName, WrSerializer &wser) const { + RX_ALWAYS_INLINE bool Recode(Serializer &ser, Payload &pl, int tagName, WrSerializer &wser) const { if (needToRecode_) { - r_->Recode(s, p, tagName, wser); + r_->Recode(ser, pl, tagName, wser); } return needToRecode_; } - RX_ALWAYS_INLINE TagType RegisterTagType(TagType oldTagType, int field) { + RX_ALWAYS_INLINE TagType RegisterTagType(TagType tagType, int field) { needToRecode_ = r_->Match(field); - return needToRecode_ ? r_->Type(oldTagType) : oldTagType; + return needToRecode_ ? r_->Type(tagType) : tagType; } - RX_ALWAYS_INLINE TagType RegisterTagType(TagType oldTagType, const TagsPath &tagsPath) { - needToRecode_ = r_->Match(tagsPath); - return needToRecode_ ? r_->Type(oldTagType) : oldTagType; + RX_ALWAYS_INLINE TagType RegisterTagType(TagType tagType, const TagsPath &tagsPath) { + needToRecode_ = r_->Match(tagType, tagsPath); + return needToRecode_ ? r_->Type(tagType) : tagType; } + RX_ALWAYS_INLINE void Serialize(WrSerializer &wser) const { r_->Serialize(wser); } private: Recoder *r_; @@ -143,9 +147,9 @@ class CJsonDecoder { #ifdef RX_WITH_STDLIB_DEBUG std::abort(); #else - // Search of the indexed fields inside the object arrays is not imlpemented - // Possible implementation has noticable negative effect on 'FromCJSONPKOnly' benchmark. - // Currently we are using filter for PKs only, and PKs can not be arrays, so this code actually will never be called at the + // Search of the indexed fields inside the object arrays is not implemented + // Possible implementation has noticeable negative impact on 'FromCJSONPKOnly' benchmark. + // Currently, we are using filter for PKs only, and PKs can not be arrays, so this code actually will never be called at the // current moment decodeCJson(pl, rdSer, wrSer, DummyFilter(), recoder, NamelessTagOpt{}); #endif // RX_WITH_STDLIB_DEBUG diff --git a/cpp_src/core/cjson/cjsonmodifier.cc b/cpp_src/core/cjson/cjsonmodifier.cc index 72e970bd6..ab6506079 100644 --- a/cpp_src/core/cjson/cjsonmodifier.cc +++ b/cpp_src/core/cjson/cjsonmodifier.cc @@ -25,8 +25,7 @@ class CJsonModifier::Context { throw Error(errParams, "Array item should not be an empty value"); } } - - std::fill(fieldsArrayOffsets.begin(), fieldsArrayOffsets.end(), 0); + fieldsArrayOffsets.fill(0); } [[nodiscard]] bool IsForAllItems() const noexcept { return isForAllItems_; } @@ -187,7 +186,7 @@ void CJsonModifier::setArray(Context &ctx) const { const bool isObjsArr = (type == TAG_OBJECT); for (const auto &item : ctx.value) { if (isObjsArr) { - type = kvType2Tag(item.Type()); + type = item.Type().ToTagType(); ctx.wrser.PutCTag(ctag{type}); } copyCJsonValue(type, item, ctx.wrser); @@ -243,7 +242,7 @@ void CJsonModifier::writeCTag(const ctag &tag, Context &ctx) { } void CJsonModifier::updateArray(TagType atagType, uint32_t count, int tagName, Context &ctx) { - assertrx_throw(!ctx.value.IsArrayValue()); // Unable to update array's element with array-value + assertrx_throw(!ctx.value.IsArrayValue()); // Unable to update array's element with array-value Variant value; if (!ctx.value.empty()) { @@ -252,7 +251,7 @@ void CJsonModifier::updateArray(TagType atagType, uint32_t count, int tagName, C // situation is possible when array was homogeneous, and new element of different type is added // in this case array must change type and become heterogeneous - const auto valueType = kvType2Tag(value.Type()); + const auto valueType = value.Type().ToTagType(); assertrx((atagType != valueType) || (atagType != TAG_OBJECT)); ctx.wrser.PutCArrayTag(carraytag{count, TAG_OBJECT}); @@ -318,7 +317,7 @@ void CJsonModifier::copyArray(int tagName, Context &ctx) { value = ctx.value.front(); } // situation is possible when array was homogeneous, and new element of different type is added - const auto valueType = kvType2Tag(value.Type()); + const auto valueType = value.Type().ToTagType(); if ((atagType != valueType) && (atagType != TAG_OBJECT)) { // back to beginning of array and rewrite as an array of objects ctx.rdser.SetPos(rdserPos); @@ -396,7 +395,7 @@ bool CJsonModifier::updateFieldInTuple(Context &ctx) { throw Error(errLogic, "Update value for field [%s] cannot be empty", tagsMatcher_.tag2name(tagName)); } else if (ctx.value.size() == 1) { const auto item = ctx.value.front(); - copyCJsonValue(kvType2Tag(item.Type()), item, ctx.wrser); + copyCJsonValue(item.Type().ToTagType(), item, ctx.wrser); } else { throw Error(errParams, "Unexpected value to update"); } @@ -540,7 +539,7 @@ bool CJsonModifier::buildCJSON(Context &ctx) { } if (tagType == TAG_ARRAY) { - const carraytag atag{isIndexed(field) ? carraytag(ctx.rdser.GetVarUint(), kvType2Tag(pt_.Field(tag.Field()).Type())) + const carraytag atag{isIndexed(field) ? carraytag(ctx.rdser.GetVarUint(), pt_.Field(tag.Field()).Type().ToTagType()) : ctx.rdser.GetCArrayTag()}; ctx.wrser.PutCArrayTag(atag); const auto arrSize = atag.Count(); diff --git a/cpp_src/core/cjson/cjsontools.cc b/cpp_src/core/cjson/cjsontools.cc index d8d3dc40d..4b95f3761 100644 --- a/cpp_src/core/cjson/cjsontools.cc +++ b/cpp_src/core/cjson/cjsontools.cc @@ -4,29 +4,17 @@ namespace reindexer { -TagType kvType2Tag(KeyValueType kvType) { - return kvType.EvaluateOneOf([](OneOf) noexcept { return TAG_VARINT; }, - [](KeyValueType::Bool) noexcept { return TAG_BOOL; }, - [](KeyValueType::Double) noexcept { return TAG_DOUBLE; }, - [](KeyValueType::String) noexcept { return TAG_STRING; }, - [](OneOf) noexcept { return TAG_NULL; }, - [](KeyValueType::Uuid) noexcept { return TAG_UUID; }, - [kvType](OneOf) -> TagType { - throw Error(errLogic, "Unexpected value type: '%s'", kvType.Name()); - }); -} - TagType arrayKvType2Tag(const VariantArray &values) { if (values.empty()) { return TAG_NULL; } auto it = values.begin(); - const auto type = kvType2Tag(it->Type()); + const auto type = it->Type().ToTagType(); ++it; for (auto end = values.end(); it != end; ++it) { - if (type != kvType2Tag(it->Type())) { + if (type != it->Type().ToTagType()) { return TAG_OBJECT; // heterogeneously array detected } } @@ -83,7 +71,7 @@ void putCJsonValue(TagType tagType, int tagName, const VariantArray &values, WrS wrser.PutCArrayTag(carraytag{values.size(), elemType}); if (elemType == TAG_OBJECT) { for (const Variant &value : values) { - auto itemType = kvType2Tag(value.Type()); + auto itemType = value.Type().ToTagType(); wrser.PutCTag(ctag{itemType}); copyCJsonValue(itemType, value, wrser); } diff --git a/cpp_src/core/cjson/cjsontools.h b/cpp_src/core/cjson/cjsontools.h index 48448ef40..dddfbf2a5 100644 --- a/cpp_src/core/cjson/cjsontools.h +++ b/cpp_src/core/cjson/cjsontools.h @@ -12,7 +12,6 @@ void copyCJsonValue(TagType tagType, const Variant &value, WrSerializer &wrser); void putCJsonRef(TagType tagType, int tagName, int tagField, const VariantArray &values, WrSerializer &wrser); void putCJsonValue(TagType tagType, int tagName, const VariantArray &values, WrSerializer &wrser); -[[nodiscard]] TagType kvType2Tag(KeyValueType kvType); [[nodiscard]] TagType arrayKvType2Tag(const VariantArray &values); void skipCjsonTag(ctag tag, Serializer &rdser, std::array *fieldsArrayOffsets = nullptr); [[nodiscard]] Variant cjsonValueToVariant(TagType tag, Serializer &rdser, KeyValueType dstType); diff --git a/cpp_src/core/cjson/defaultvaluecoder.cc b/cpp_src/core/cjson/defaultvaluecoder.cc new file mode 100644 index 000000000..6c6b5eb96 --- /dev/null +++ b/cpp_src/core/cjson/defaultvaluecoder.cc @@ -0,0 +1,161 @@ +#include "defaultvaluecoder.h" + +namespace reindexer { + +DefaultValueCoder::DefaultValueCoder(std::string_view ns, const PayloadFieldType &fld, std::vector &&tps, int16_t fieldIdx) + : ns_(ns), + field_(fld.Name()), + tags_(std::move(tps)), + fieldIdx_(fieldIdx), + type_(fld.Type().ToTagType()), + array_(fld.IsArray()), + basePath_(&tags_.front()) {} + +bool DefaultValueCoder::Match(int field) noexcept { + // non-nested field present in tuple + if ((field == fieldIdx_) && ready()) { + state_ = State::found; + } + return false; // returned result is always same +} + +bool DefaultValueCoder::Match(TagType tt, const TagsPath &tp) { + static const bool result = false; // returned result is always same + + // nothing to look for (start tuple global object) + if (tp.empty()) { + state_ = State::wait; + inArray_ = false; + arrField_ = 0; + return result; + } + + // found\recorded earlier + if ((state_ == State::found) || ((state_ == State::write) && !inArray_)) { + return result; + } + + // check if active array has been processed + const bool arrayTag = (tt == TAG_ARRAY); + if (inArray_) { + inArray_ = ((tt == TAG_OBJECT) || arrayTag) ? (tp.back() == arrField_) : (tp[tp.size() - 2] == arrField_); // -2 pre-last item + // recorded earlier - stop it + if (!inArray_ && (state_ == State::write)) { + return result; + } + } + + // try match nested field + if (tt == TAG_OBJECT) { + assertrx(state_ != State::found); + match(tp); + return result; + } + + // may be end element of adjacent nested field + if (arrayTag) { + inArray_ = (tp.front() == basePath_->front()); + arrField_ = tp.back(); + } + + // not nested + if (copyPos_ == 0) { + return result; + } + + // detect array insertion into array (not supported) + if (arrayTag && array_) { + state_ = State::found; // do nothing + } else if ((tp.front() == basePath_->front()) && (tp.size() > basePath_->size())) { + ++nestingLevel_; + } + + return result; +} + +void DefaultValueCoder::Serialize(WrSerializer &wrser) { + if (blocked()) { + return; // skip processing + } + + // skip nested levels + if ((basePath_->size() > 1) || (nestingLevel_ > 1)) { + assertrx(nestingLevel_ > 0); + --nestingLevel_; + + // new field - move to valid level + if (nestingLevel_ > copyPos_) { + return; + } + } + + write(wrser); + Reset(); + state_ = State::write; +} + +bool DefaultValueCoder::Reset() noexcept { + nestingLevel_ = 1; + copyPos_ = 0; + // NOTE: return true when updating tuple + return (state_ == State::write); +} + +void DefaultValueCoder::match(const TagsPath &tp) { + ++nestingLevel_; + + for (auto &path : tags_) { + if (path.front() != tp.front()) { + continue; + } + + copyPos_ = 1; + auto pathSize = path.size(); + auto sz = std::min(pathSize, tp.size()); + for (size_t idx = 1; idx < sz; ++idx) { + if (path[idx] != tp[idx]) { + break; + } + ++copyPos_; + + // we are trying to add field with non-nested paths, but an intersection was found in additional nested paths. + // Stop, throw an error + if (tags_.front().size() == 1) { + throw Error(errLogic, "Cannot add field with name '%s' to namespace '%s'. One of nested json paths is already in use", + field_, ns_); + } + } + state_ = State::match; + basePath_ = &path; + break; + } +} + +void DefaultValueCoder::write(WrSerializer &wrser) const { + int32_t nestedObjects = 0; + for (size_t idx = copyPos_, sz = basePath_->size(); idx < sz; ++idx) { + auto tagName = (*basePath_)[idx]; + // real index field in last tag + const bool finalTag = (idx == (sz - 1)); + if (finalTag) { + if (array_) { + wrser.PutCTag(ctag{TAG_ARRAY, tagName, fieldIdx_}); + wrser.PutVarUint(0); + } else { + wrser.PutCTag(ctag{type_, tagName, fieldIdx_}); + } + break; + } + + // start nested object + wrser.PutCTag(ctag{TAG_OBJECT, tagName}); + ++nestedObjects; + } + + // add end tags to all objects + while (nestedObjects-- > 0) { + wrser.PutCTag(kCTagEnd); + } +} + +} // namespace reindexer diff --git a/cpp_src/core/cjson/defaultvaluecoder.h b/cpp_src/core/cjson/defaultvaluecoder.h new file mode 100644 index 000000000..3cf9a5dac --- /dev/null +++ b/cpp_src/core/cjson/defaultvaluecoder.h @@ -0,0 +1,41 @@ +#pragma once + +#include "cjsondecoder.h" + +namespace reindexer { + +class DefaultValueCoder : public Recoder { +public: + DefaultValueCoder(std::string_view ns, const PayloadFieldType &fld, std::vector &&tps, int16_t fieldIdx); + RX_ALWAYS_INLINE TagType Type(TagType tt) noexcept override final { return tt; } + [[nodiscard]] bool Match(int f) noexcept override final; + [[nodiscard]] bool Match(TagType tt, const TagsPath &tp) override final; + RX_ALWAYS_INLINE void Recode(Serializer &, WrSerializer &) const noexcept override final { assertrx(false); } + RX_ALWAYS_INLINE void Recode(Serializer &, Payload &, int, WrSerializer &) noexcept override final { assertrx(false); } + void Serialize(WrSerializer &wrser) override final; + bool Reset() noexcept override final; + +private: + void match(const TagsPath &tp); + void write(WrSerializer &wrser) const; + [[nodiscard]] RX_ALWAYS_INLINE bool blocked() const noexcept { return ((state_ == State::found) || (state_ == State::write)); } + [[nodiscard]] RX_ALWAYS_INLINE bool ready() const noexcept { return ((state_ == State::wait) || (state_ == State::match)); } + +private: + const std::string ns_; + const std::string field_; + const std::vector tags_; + const int16_t fieldIdx_{0}; + const TagType type_; + const bool array_{false}; + + const TagsPath *basePath_{nullptr}; + enum class State { wait, found, match, write } state_{State::wait}; + uint32_t nestingLevel_{1}; + uint32_t copyPos_{0}; + + bool inArray_{false}; + int16_t arrField_{0}; +}; + +} // namespace reindexer diff --git a/cpp_src/core/cjson/jsonbuilder.h b/cpp_src/core/cjson/jsonbuilder.h index ea8b82e65..813a2fd42 100644 --- a/cpp_src/core/cjson/jsonbuilder.h +++ b/cpp_src/core/cjson/jsonbuilder.h @@ -31,12 +31,12 @@ class JsonBuilder { JsonBuilder Array(int tagName, int size = KUnknownFieldSize) { return Array(getNameByTag(tagName), size); } template - void Array(int tagName, span data, int /*offset*/ = 0) { + void Array(int tagName, span data, int /*offset*/ = 0) { JsonBuilder node = Array(tagName); for (const auto &d : data) node.Put({}, d); } template - void Array(std::string_view n, span data, int /*offset*/ = 0) { + void Array(std::string_view n, span data, int /*offset*/ = 0) { JsonBuilder node = Array(n); for (const auto &d : data) node.Put({}, d); } diff --git a/cpp_src/core/cjson/jsondecoder.cc b/cpp_src/core/cjson/jsondecoder.cc index 5a5ebf3da..aa153a828 100644 --- a/cpp_src/core/cjson/jsondecoder.cc +++ b/cpp_src/core/cjson/jsondecoder.cc @@ -70,9 +70,9 @@ void JsonDecoder::decodeJsonObject(Payload &pl, CJsonBuilder &builder, const gas case gason::JSON_FALSE: { validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, f, field, isInArray(), "json"); objectScalarIndexes_.set(field); - Variant v = jsonValue2Variant(elem.value, f.Type(), f.Name()); - builder.Ref(tagName, v, field); - pl.Set(field, std::move(v), true); + Variant value = jsonValue2Variant(elem.value, f.Type(), f.Name()); + builder.Ref(tagName, value, field); + pl.Set(field, std::move(value), true); } break; default: throw Error(errLogic, "Unexpected '%d' tag", elem.value.getTag()); diff --git a/cpp_src/core/cjson/protobufbuilder.h b/cpp_src/core/cjson/protobufbuilder.h index 0a75fa5db..22ac1e7c0 100644 --- a/cpp_src/core/cjson/protobufbuilder.h +++ b/cpp_src/core/cjson/protobufbuilder.h @@ -70,7 +70,7 @@ class ProtobufBuilder { template ::value || std::is_floating_point::value || std::is_same::value>::type* = nullptr> - void Array(int fieldIdx, span data, int /*offset*/ = 0) { + void Array(int fieldIdx, span data, int /*offset*/ = 0) { auto array = ArrayPacked(fieldIdx); for (const T& item : data) { array.put(0, item); @@ -78,13 +78,13 @@ class ProtobufBuilder { } template ::value>::type* = nullptr> - void Array(int fieldIdx, span data, int /*offset*/ = 0) { + void Array(int fieldIdx, span data, int /*offset*/ = 0) { auto array = ArrayNotPacked(fieldIdx); for (const T& item : data) { array.put(fieldIdx, std::string_view(item)); } } - void Array(int fieldIdx, span data, int /*offset*/ = 0) { + void Array(int fieldIdx, span data, int /*offset*/ = 0) { auto array = ArrayNotPacked(fieldIdx); for (Uuid item : data) { array.put(fieldIdx, item); diff --git a/cpp_src/core/cjson/tagsmatcher.h b/cpp_src/core/cjson/tagsmatcher.h index c410a0065..a69a31e71 100644 --- a/cpp_src/core/cjson/tagsmatcher.h +++ b/cpp_src/core/cjson/tagsmatcher.h @@ -28,15 +28,15 @@ class TagsMatcher { auto res = path2tag(jsonPath); return res.empty() && canAdd ? impl_.clone()->path2tag(jsonPath, canAdd, updated_) : res; } - IndexedTagsPath path2indexedtag(std::string_view jsonPath, const IndexExpressionEvaluator& ev) const { - IndexedTagsPath tagsPath = impl_->path2indexedtag(jsonPath, ev); + IndexedTagsPath path2indexedtag(std::string_view jsonPath) const { + IndexedTagsPath tagsPath = impl_->path2indexedtag(jsonPath); assertrx(!updated_); return tagsPath; } - IndexedTagsPath path2indexedtag(std::string_view jsonPath, const IndexExpressionEvaluator& ev, bool canAdd) { + IndexedTagsPath path2indexedtag(std::string_view jsonPath, bool canAdd) { if (jsonPath.empty()) return IndexedTagsPath(); - auto res = impl_->path2indexedtag(jsonPath, ev); - return res.empty() && canAdd ? impl_.clone()->path2indexedtag(jsonPath, ev, canAdd, updated_) : res; + auto res = impl_->path2indexedtag(jsonPath); + return res.empty() && canAdd ? impl_.clone()->path2indexedtag(jsonPath, canAdd, updated_) : res; } int version() const noexcept { return impl_->version(); } size_t size() const noexcept { return impl_->size(); } diff --git a/cpp_src/core/cjson/tagsmatcherimpl.h b/cpp_src/core/cjson/tagsmatcherimpl.h index 8d50e90f2..c23c757e3 100644 --- a/cpp_src/core/cjson/tagsmatcherimpl.h +++ b/cpp_src/core/cjson/tagsmatcherimpl.h @@ -49,12 +49,12 @@ class TagsMatcherImpl { return fieldTags; } - IndexedTagsPath path2indexedtag(std::string_view jsonPath, const IndexExpressionEvaluator &ev) const { + IndexedTagsPath path2indexedtag(std::string_view jsonPath) const { bool updated = false; - return const_cast(this)->path2indexedtag(jsonPath, ev, false, updated); + return const_cast(this)->path2indexedtag(jsonPath, false, updated); } - IndexedTagsPath path2indexedtag(std::string_view jsonPath, const IndexExpressionEvaluator &ev, bool canAdd, bool &updated) { + IndexedTagsPath path2indexedtag(std::string_view jsonPath, bool canAdd, bool &updated) { using namespace std::string_view_literals; IndexedTagsPath fieldTags; for (size_t pos = 0, lastPos = 0; pos != jsonPath.length(); lastPos = pos + 1) { @@ -80,22 +80,7 @@ class TagsMatcherImpl { } else { auto index = try_stoi(content); if (!index) { - if (ev) { - VariantArray values = ev(content); - if (values.size() != 1) { - throw Error(errParams, "Index expression_ has wrong syntax: '%s'", content); - } - values.front().Type().EvaluateOneOf( - [](OneOf) noexcept {}, - [&](OneOf) { - throw Error(errParams, "Wrong type of index: '%s'", content); - }); - node.SetExpression(content); - index = values.front().As(); - } else { - throw Error(errParams, "Can't convert '%s' to number", content); - } + throw Error(errParams, "Can't convert '%s' to number", content); } if (index < 0) { throw Error(errLogic, "Array index value cannot be negative"); diff --git a/cpp_src/core/cjson/tagspath.h b/cpp_src/core/cjson/tagspath.h index 01faf64d0..390c2d9db 100644 --- a/cpp_src/core/cjson/tagspath.h +++ b/cpp_src/core/cjson/tagspath.h @@ -106,17 +106,28 @@ class IndexedTagsPathImpl : public h_vector { return true; } bool Compare(const TagsPath &obj) const noexcept { - if (obj.size() != this->size()) return false; - for (size_t i = 0; i < this->size(); ++i) { - if (this->operator[](i).NameTag() != obj[i]) return false; + const auto sz = this->size(); + if (obj.size() != sz) { + return false; + } + for (size_t i = 0; i < sz; ++i) { + if ((*this)[i].NameTag() != obj[i]) return false; + } + return true; + } + bool IsNestedOrEqualTo(const TagsPath &obj) const noexcept { + const auto sz = this->size(); + if (sz > obj.size()) { + return false; + } + for (size_t i = 0; i < sz; ++i) { + if ((*this)[i].NameTag() != obj[i]) return false; } return true; } }; using IndexedTagsPath = IndexedTagsPathImpl<6>; -using IndexExpressionEvaluator = std::function; - template class TagsPathScope { public: diff --git a/cpp_src/core/cjson/uuid_recoders.h b/cpp_src/core/cjson/uuid_recoders.h index fbc0d60fe..c74af8e48 100644 --- a/cpp_src/core/cjson/uuid_recoders.h +++ b/cpp_src/core/cjson/uuid_recoders.h @@ -7,7 +7,7 @@ namespace reindexer { template class RecoderUuidToString : public Recoder { public: - RecoderUuidToString(TagsPath tp) noexcept : tagsPath_{std::move(tp)} {} + explicit RecoderUuidToString(TagsPath tp) noexcept : tagsPath_{std::move(tp)} {} [[nodiscard]] TagType Type([[maybe_unused]] TagType oldTagType) noexcept override final { if constexpr (Array) { assertrx(oldTagType == TAG_ARRAY); @@ -18,9 +18,11 @@ class RecoderUuidToString : public Recoder { } } void Recode(Serializer &, WrSerializer &) const override final; - void Recode(Serializer &, Payload &, int /*tagName*/, WrSerializer &) override final { assertrx(0); } - [[nodiscard]] bool Match(int) const noexcept override final { return false; } - [[nodiscard]] bool Match(const TagsPath &tp) const noexcept override final { return tagsPath_ == tp; } + void Recode(Serializer &, Payload &, int, WrSerializer &) override final { assertrx(false); } + [[nodiscard]] bool Match(int) noexcept override final { return false; } + [[nodiscard]] bool Match(TagType, const TagsPath &tp) noexcept override final { return tagsPath_ == tp; } + void Serialize(WrSerializer &) override final {} + bool Reset() override final { return false; } private: TagsPath tagsPath_; @@ -44,7 +46,7 @@ inline void RecoderUuidToString::Recode(Serializer &rdser, WrSerializer &w class RecoderStringToUuidArray : public Recoder { public: - RecoderStringToUuidArray(int f) noexcept : field_{f} {} + explicit RecoderStringToUuidArray(int f) noexcept : field_{f} {} [[nodiscard]] TagType Type(TagType oldTagType) override final { fromNotArrayField_ = oldTagType != TAG_ARRAY; if (fromNotArrayField_ && oldTagType != TAG_STRING) { @@ -52,9 +54,9 @@ class RecoderStringToUuidArray : public Recoder { } return TAG_ARRAY; } - [[nodiscard]] bool Match(int f) const noexcept override final { return f == field_; } - [[nodiscard]] bool Match(const TagsPath &) const noexcept override final { return false; } - void Recode(Serializer &, WrSerializer &) const override final { assertrx(0); } + [[nodiscard]] bool Match(int f) noexcept override final { return f == field_; } + [[nodiscard]] bool Match(TagType, const TagsPath &) noexcept override final { return false; } + void Recode(Serializer &, WrSerializer &) const override final { assertrx(false); } void Recode(Serializer &rdser, Payload &pl, int tagName, WrSerializer &wrser) override final { if (fromNotArrayField_) { pl.Set(field_, Variant{rdser.GetStrUuid()}, true); @@ -76,16 +78,18 @@ class RecoderStringToUuidArray : public Recoder { wrser.PutVarUint(count); } } + void Serialize(WrSerializer &) override final {} + bool Reset() override final { return false; } private: + const int field_{std::numeric_limits::max()}; VariantArray varBuf_; - int field_; bool fromNotArrayField_{false}; }; class RecoderStringToUuid : public Recoder { public: - RecoderStringToUuid(int f) noexcept : field_{f} {} + explicit RecoderStringToUuid(int f) noexcept : field_{f} {} [[nodiscard]] TagType Type(TagType oldTagType) override final { if (oldTagType == TAG_ARRAY) { throw Error(errLogic, "Cannot convert array field to not array UUID"); @@ -94,16 +98,18 @@ class RecoderStringToUuid : public Recoder { } return TAG_UUID; } - [[nodiscard]] bool Match(int f) const noexcept override final { return f == field_; } - [[nodiscard]] bool Match(const TagsPath &) const noexcept override final { return false; } - void Recode(Serializer &, WrSerializer &) const override final { assertrx(0); } + [[nodiscard]] bool Match(int f) noexcept override final { return f == field_; } + [[nodiscard]] bool Match(TagType, const TagsPath &) noexcept override final { return false; } + void Recode(Serializer &, WrSerializer &) const override final { assertrx(false); } void Recode(Serializer &rdser, Payload &pl, int tagName, WrSerializer &wrser) override final { pl.Set(field_, Variant{rdser.GetStrUuid()}, true); wrser.PutCTag(ctag{TAG_UUID, tagName, field_}); } + void Serialize(WrSerializer &) override final {} + bool Reset() override final { return false; } private: - int field_; + const int field_{std::numeric_limits::max()}; }; } // namespace reindexer diff --git a/cpp_src/core/ft/config/baseftconfig.cc b/cpp_src/core/ft/config/baseftconfig.cc index 162b6cf1b..1a1046b91 100644 --- a/cpp_src/core/ft/config/baseftconfig.cc +++ b/cpp_src/core/ft/config/baseftconfig.cc @@ -15,7 +15,6 @@ void BaseFTConfig::parseBase(const gason::JsonNode &root) { enableTranslit = root["enable_translit"].As<>(enableTranslit); enableNumbersSearch = root["enable_numbers_search"].As<>(enableNumbersSearch); enableKbLayout = root["enable_kb_layout"].As<>(enableKbLayout); - enableWarmupOnNsCopy = root["enable_warmup_on_ns_copy"].As<>(enableWarmupOnNsCopy); mergeLimit = root["merge_limit"].As<>(mergeLimit, kMinMergeLimitValue, kMaxMergeLimitValue); logLevel = root["log_level"].As<>(logLevel, 0, 5); extraWordSymbols = root["extra_word_symbols"].As<>(extraWordSymbols); @@ -74,7 +73,6 @@ void BaseFTConfig::getJson(JsonBuilder &jsonBuilder) const { jsonBuilder.Put("enable_translit", enableTranslit); jsonBuilder.Put("enable_numbers_search", enableNumbersSearch); jsonBuilder.Put("enable_kb_layout", enableKbLayout); - jsonBuilder.Put("enable_warmup_on_ns_copy", enableWarmupOnNsCopy); jsonBuilder.Put("merge_limit", mergeLimit); jsonBuilder.Put("log_level", logLevel); jsonBuilder.Put("extra_word_symbols", extraWordSymbols); diff --git a/cpp_src/core/ft/config/baseftconfig.h b/cpp_src/core/ft/config/baseftconfig.h index 7c41ba349..ec0ec07f9 100644 --- a/cpp_src/core/ft/config/baseftconfig.h +++ b/cpp_src/core/ft/config/baseftconfig.h @@ -33,7 +33,6 @@ class BaseFTConfig { bool enableTranslit = true; bool enableKbLayout = true; bool enableNumbersSearch = false; - bool enableWarmupOnNsCopy = false; StopWordsSetT stopWords; std::vector synonyms; diff --git a/cpp_src/core/ft/filters/translit.cc b/cpp_src/core/ft/filters/translit.cc index 7a882d309..114db5807 100644 --- a/cpp_src/core/ft/filters/translit.cc +++ b/cpp_src/core/ft/filters/translit.cc @@ -1,6 +1,7 @@ #include "translit.h" #include #include +#include "estl/span.h" namespace reindexer { @@ -12,6 +13,11 @@ Translit::Translit() { void Translit::GetVariants(const std::wstring &data, std::vector &result, int proc) { std::wstring strings[maxTranslitVariants]; Context ctx; + if (data.length()) { + for (int j = 0; j < maxTranslitVariants; ++j) { + strings[j].reserve(data.length()); + } + } for (size_t i = 0; i < data.length(); ++i) { wchar_t symbol = data[i]; @@ -43,18 +49,18 @@ void Translit::GetVariants(const std::wstring &data, std::vector & ctx.Clear(); } } - - std::wstring result_string; - + int64_t lastResultIdx = -1; for (int i = 0; i < maxTranslitVariants; ++i) { - auto &curent = strings[i]; - bool skip = false; + auto ¤t = strings[i]; for (int j = i + 1; j < maxTranslitVariants; ++j) { - if (curent == strings[j]) skip = true; + if (current == strings[j]) { + current.clear(); + break; + } } - if (!skip && curent != result_string && curent.length()) { - result_string = curent; - result.emplace_back(std::move(curent), proc); + if (current.length() && (lastResultIdx < 0 || current != result[lastResultIdx].pattern)) { + lastResultIdx = result.size(); + result.emplace_back(std::move(current), proc); } } } diff --git a/cpp_src/core/ft/ft_fast/selecter.cc b/cpp_src/core/ft/ft_fast/selecter.cc index fd5469ab8..56ba3787e 100644 --- a/cpp_src/core/ft/ft_fast/selecter.cc +++ b/cpp_src/core/ft/ft_fast/selecter.cc @@ -33,6 +33,7 @@ void Selecter::prepareVariants(std::vector& variants, RV variants.clear(); std::vector variantsUtf16{{term.pattern, holder_.cfg_->rankingConfig.fullMatch}}; + variantsUtf16.reserve(256); if (synonymsDsl && (!holder_.cfg_->enableNumbersSearch || !term.opts.number)) { // Make translit and kblayout variants @@ -132,34 +133,7 @@ RX_NO_INLINE MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransa } if rx_unlikely (holder_.cfg_->logLevel >= LogInfo) { - WrSerializer wrSer; - wrSer << "variants: ["; - for (auto& variant : ctx.variants) { - if (&variant != &*ctx.variants.begin()) wrSer << ", "; - wrSer << variant.pattern; - } - wrSer << "], variants_with_low_relevancy: ["; - for (auto& variant : ctx.lowRelVariants) { - if (&variant != &*ctx.lowRelVariants.begin()) wrSer << ", "; - wrSer << variant.pattern; - } - wrSer << "], typos: ["; - if (res.term.opts.typos) { - typos_context tctx[kMaxTyposInWord]; - mktypos(tctx, res.term.pattern, holder_.cfg_->MaxTyposInWord(), holder_.cfg_->maxTypoLen, - [&wrSer](std::string_view typo, int, const typos_context::TyposVec& positions) { - wrSer << typo; - wrSer << ":("; - for (unsigned j = 0, sz = positions.size(); j < sz; ++j) { - if (j) { - wrSer << ','; - } - wrSer << positions[j]; - } - wrSer << "), "; - }); - } - logPrintf(LogInfo, "Variants: [%s]", wrSer.Slice()); + printVariants(ctx, res); } processVariants(ctx, mergeStatuses); @@ -1028,7 +1002,7 @@ size_t Selecter::TyposHandler::Process(std::vector& r const FtDSLEntry& term) { TextSearchResults& res = rawResults.back(); const unsigned curRawResultIdx = rawResults.size() - 1; - const size_t patternSize = utf16_to_utf8(term.pattern).size(); + const size_t patternSize = utf16_to_utf8_size(term.pattern); size_t totalVids = 0; for (auto& step : holder.steps) { typos_context tctx[kMaxTyposInWord]; @@ -1375,6 +1349,38 @@ MergeData Selecter::mergeResults(std::vector&& rawRes return merged; } +template +void Selecter::printVariants(const FtSelectContext& ctx, const TextSearchResults& res) { + WrSerializer wrSer; + wrSer << "variants: ["; + for (auto& variant : ctx.variants) { + if (&variant != &*ctx.variants.begin()) wrSer << ", "; + wrSer << variant.pattern; + } + wrSer << "], variants_with_low_relevancy: ["; + for (auto& variant : ctx.lowRelVariants) { + if (&variant != &*ctx.lowRelVariants.begin()) wrSer << ", "; + wrSer << variant.pattern; + } + wrSer << "], typos: ["; + if (res.term.opts.typos) { + typos_context tctx[kMaxTyposInWord]; + mktypos(tctx, res.term.pattern, holder_.cfg_->MaxTyposInWord(), holder_.cfg_->maxTypoLen, + [&wrSer](std::string_view typo, int, const typos_context::TyposVec& positions) { + wrSer << typo; + wrSer << ":("; + for (unsigned j = 0, sz = positions.size(); j < sz; ++j) { + if (j) { + wrSer << ','; + } + wrSer << positions[j]; + } + wrSer << "), "; + }); + } + logPrintf(LogInfo, "Variants: [%s]", wrSer.Slice()); +} + template class Selecter; template MergeData Selecter::Process(FtDSLQuery&&, bool, FtMergeStatuses::Statuses&&, const RdxContext&); diff --git a/cpp_src/core/ft/ft_fast/selecter.h b/cpp_src/core/ft/ft_fast/selecter.h index 0a6537fd6..9cb546d53 100644 --- a/cpp_src/core/ft/ft_fast/selecter.h +++ b/cpp_src/core/ft/ft_fast/selecter.h @@ -57,7 +57,7 @@ class Selecter { private: struct TextSearchResult { - const IdCont* vids; // indexes of documents (vdoc) containing the given word + position + field + const IdCont* vids; // indexes of documents (vdoc) containing the given word + position + field std::string_view pattern; // word,translit,..... int proc; int16_t wordLen; @@ -256,6 +256,7 @@ class Selecter { template void processStepVariants(FtSelectContext& ctx, typename DataHolder::CommitStep& step, const FtVariantEntry& variant, unsigned curRawResultIdx, const FtMergeStatuses::Statuses& mergeStatuses, int vidsLimit); + RX_NO_INLINE void printVariants(const FtSelectContext& ctx, const TextSearchResults& res); DataHolder& holder_; size_t fieldSize_; diff --git a/cpp_src/core/ft/ft_fuzzy/baseseacher.cc b/cpp_src/core/ft/ft_fuzzy/baseseacher.cc index b1207bd9b..169367607 100644 --- a/cpp_src/core/ft/ft_fuzzy/baseseacher.cc +++ b/cpp_src/core/ft/ft_fuzzy/baseseacher.cc @@ -75,29 +75,29 @@ SearchResult BaseSearcher::Compare(const BaseHolder::Ptr &holder, const FtDSLQue std::pair pos; pos.first = 0; - std::vector rusults; + std::vector results; int max_id = 0; int min_id = INT32_MAX; if (!inTransaction) ThrowOnCancel(rdxCtx); for (auto &term : dsl) { - data_size += ParseData(holder, term.pattern, max_id, min_id, rusults, term.opts, 1); + data_size += ParseData(holder, term.pattern, max_id, min_id, results, term.opts, 1); if (holder->cfg_.enableTranslit) { searchers_[0]->GetVariants(term.pattern, data, holder->cfg_.rankingConfig.translit); - ParseData(holder, data[0].pattern, max_id, min_id, rusults, term.opts, holder->cfg_.startDefaultDecreese); + ParseData(holder, data[0].pattern, max_id, min_id, results, term.opts, holder->cfg_.startDefaultDecreese); } if (holder->cfg_.enableKbLayout) { data.clear(); searchers_[1]->GetVariants(term.pattern, data, holder->cfg_.rankingConfig.kblayout); - ParseData(holder, data[0].pattern, max_id, min_id, rusults, term.opts, holder->cfg_.startDefaultDecreese); + ParseData(holder, data[0].pattern, max_id, min_id, results, term.opts, holder->cfg_.startDefaultDecreese); } } BaseMerger mrg(max_id, min_id); - MergeCtx ctx{&rusults, &holder->cfg_, data_size, &holder->words_}; + MergeCtx ctx{&results, &holder->cfg_, data_size, &holder->words_}; auto res = mrg.Merge(ctx, inTransaction, rdxCtx); #ifdef FULL_LOG_FT diff --git a/cpp_src/core/ft/ftdsl.cc b/cpp_src/core/ft/ftdsl.cc index 92f606c1a..c14246bcd 100644 --- a/cpp_src/core/ft/ftdsl.cc +++ b/cpp_src/core/ft/ftdsl.cc @@ -21,7 +21,7 @@ static bool is_dslbegin(int ch, const std::string &extraWordSymbols) noexcept { ch == '\\'; } -void FtDSLQuery::parse(const std::string &q) { +void FtDSLQuery::parse(std::string_view q) { std::wstring utf16str; utf8_to_utf16(q, utf16str); parse(utf16str); diff --git a/cpp_src/core/ft/ftdsl.h b/cpp_src/core/ft/ftdsl.h index f4104d359..3f39f3c65 100644 --- a/cpp_src/core/ft/ftdsl.h +++ b/cpp_src/core/ft/ftdsl.h @@ -53,7 +53,7 @@ class FtDSLQuery : public RVector { FtDSLQuery(const RHashMap &fields, const StopWordsSetT &stopWords, const std::string &extraWordSymbols) noexcept : fields_(fields), stopWords_(stopWords), extraWordSymbols_(extraWordSymbols) {} void parse(std::wstring &utf16str); - void parse(const std::string &q); + void parse(std::string_view q); FtDSLQuery CopyCtx() const noexcept { return {fields_, stopWords_, extraWordSymbols_}; } protected: diff --git a/cpp_src/core/ft/idrelset.cc b/cpp_src/core/ft/idrelset.cc index d2c475b12..73aeb76eb 100644 --- a/cpp_src/core/ft/idrelset.cc +++ b/cpp_src/core/ft/idrelset.cc @@ -19,14 +19,14 @@ size_t IdRelType::pack(uint8_t* buf) const { size_t IdRelType::unpack(const uint8_t* buf, unsigned len) { auto p = buf; - assertrx(len != 0); + assertrx_dbg(len != 0); auto l = scan_varint(len, p); - assertrx(l != 0); + assertrx_dbg(l != 0); id_ = parse_uint32(l, p); p += l, len -= l; l = scan_varint(len, p); - assertrx(l != 0); + assertrx_dbg(l != 0); int sz = parse_uint32(l, p); p += l, len -= l; @@ -35,7 +35,7 @@ size_t IdRelType::unpack(const uint8_t* buf, unsigned len) { uint32_t last = 0; for (int i = 0; i < sz; i++) { l = scan_varint(len, p); - assertrx(l != 0); + assertrx_dbg(l != 0); pos_[i].fpos = parse_uint32(l, p) + last; last = pos_[i].fpos; addField(pos_[i].field()); @@ -72,6 +72,4 @@ int IdRelType::MinPositionInField(int field) const noexcept { return res; } - - } // namespace reindexer diff --git a/cpp_src/core/ft/stopwords/stop_ru.cc b/cpp_src/core/ft/stopwords/stop_ru.cc index 4de9a0f74..5b1b26d98 100644 --- a/cpp_src/core/ft/stopwords/stop_ru.cc +++ b/cpp_src/core/ft/stopwords/stop_ru.cc @@ -1,6 +1,6 @@ namespace reindexer { const char *stop_words_ru[] = { - // + // clang-format off "а", "е", "и", @@ -27,7 +27,6 @@ const char *stop_words_ru[] = { "могут", "можно", "может", - "мор", "моя", "моё", "мочь", @@ -39,7 +38,6 @@ const char *stop_words_ru[] = { "нами", "ними", "мимо", - "немного", "одной", "одного", "менее", @@ -55,16 +53,13 @@ const char *stop_words_ru[] = { "мало", "надо", "назад", - "наиболее", "недавно", "миллионов", "недалеко", "между", "низко", - "меля", "нельзя", "нибудь", - "непрерывно", "наконец", "никогда", "никуда", @@ -74,14 +69,11 @@ const char *stop_words_ru[] = { "нею", "неё", "них", - "мира", "наша", "наше", "наши", "ничего", - "начала", "нередко", - "несколько", "обычно", "опять", "около", @@ -89,8 +81,6 @@ const char *stop_words_ru[] = { "ну", "нх", "от", - "отовсюду", - "особенно", "нужно", "очень", "отсюда", @@ -121,11 +111,8 @@ const char *stop_words_ru[] = { "вдруг", "вы", "все", - "второй", "всем", "всеми", - "времени", - "время", "всему", "всего", "всегда", @@ -136,8 +123,6 @@ const char *stop_words_ru[] = { "всё", "всюду", "год", - "говорил", - "говорит", "года", "году", "где", @@ -184,7 +169,6 @@ const char *stop_words_ru[] = { "занята", "занято", "заняты", - "действительно", "давно", "даже", "алло", @@ -197,7 +181,6 @@ const char *stop_words_ru[] = { "лет", "зато", "даром", - "первый", "перед", "затем", "зачем", @@ -210,7 +193,6 @@ const char *stop_words_ru[] = { "при", "был", "про", - "процентов", "против", "просто", "бывает", @@ -224,7 +206,6 @@ const char *stop_words_ru[] = { "будет", "будете", "будешь", - "прекрасно", "буду", "будь", "будто", @@ -322,17 +303,13 @@ const char *stop_words_ru[] = { "самих", "саму", "чему", - "раньше", - "сейчас", "чего", - "сегодня", "себе", "тебе", "разве", "теперь", "себя", "тебя", - "седьмой", "спасибо", "слишком", "так", @@ -347,9 +324,6 @@ const char *stop_words_ru[] = { "через", "часто", "сколько", - "сказал", - "сказала", - "сказать", "ту", "ты", "эта", @@ -369,11 +343,11 @@ const char *stop_words_ru[] = { "этими", "рядом", "этих", - "третий", "тут", "эту", "суть", "чуть", "тысяч", nullptr}; -} +// clang-format on +} // namespace reindexer diff --git a/cpp_src/core/idset.h b/cpp_src/core/idset.h index e2e501c00..df16e24d3 100644 --- a/cpp_src/core/idset.h +++ b/cpp_src/core/idset.h @@ -227,5 +227,6 @@ class IdSet : public IdSetPlain { }; using IdSetRef = span; +using IdSetCRef = span; } // namespace reindexer diff --git a/cpp_src/core/index/index.h b/cpp_src/core/index/index.h index e6254dab3..5b6aac3e5 100644 --- a/cpp_src/core/index/index.h +++ b/cpp_src/core/index/index.h @@ -78,7 +78,6 @@ class Index { virtual IndexMemStat GetMemStat(const RdxContext&) = 0; virtual int64_t GetTTLValue() const noexcept { return 0; } virtual IndexIterator::Ptr CreateIterator() const { return nullptr; } - virtual bool RequireWarmupOnNsCopy() const noexcept { return false; } virtual bool IsDestroyPartSupported() const noexcept { return false; } virtual void AddDestroyTask(tsl::detail_sparse_hash::ThreadTaskQueue&) {} diff --git a/cpp_src/core/index/indextext/fastindextext.cc b/cpp_src/core/index/indextext/fastindextext.cc index 8de162fb3..c98c4ae40 100644 --- a/cpp_src/core/index/indextext/fastindextext.cc +++ b/cpp_src/core/index/indextext/fastindextext.cc @@ -70,7 +70,7 @@ Variant FastIndexText::Upsert(const Variant &key, IdType id, bool &clearCache template void FastIndexText::Delete(const Variant &key, IdType id, StringsHolder &strHolder, bool &clearCache) { if rx_unlikely (key.Type().Is()) { - this->empty_ids_.Unsorted().Erase(id); // ignore result + this->empty_ids_.Unsorted().Erase(id); // ignore result this->isBuilt_ = false; return; } @@ -182,13 +182,13 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr if (!fctx->NeedArea()) { if (useExternSt == FtUseExternStatuses::No) { appendMergedIds(mergeData, releventDocs, - [&fctx, &mergedIds](IdSetRef::iterator ebegin, IdSetRef::iterator eend, const MergeInfo &vid) { + [&fctx, &mergedIds](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo &vid) { fctx->Add(ebegin, eend, vid.proc); mergedIds->Append(ebegin, eend, IdSet::Unordered); }); } else { appendMergedIds(mergeData, releventDocs, - [&fctx, &mergedIds, &statuses](IdSetRef::iterator ebegin, IdSetRef::iterator eend, const MergeInfo &vid) { + [&fctx, &mergedIds, &statuses](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo &vid) { fctx->Add(ebegin, eend, vid.proc, statuses.rowIds); mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); }); @@ -196,7 +196,7 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr } else { if (useExternSt == FtUseExternStatuses::No) { appendMergedIds(mergeData, releventDocs, - [&fctx, &mergedIds, &mergeData](IdSetRef::iterator ebegin, IdSetRef::iterator eend, const MergeInfo &vid) { + [&fctx, &mergedIds, &mergeData](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo &vid) { assertrx_throw(vid.areaIndex != std::numeric_limits::max()); fctx->Add(ebegin, eend, vid.proc, std::move(mergeData.vectorAreas[vid.areaIndex])); mergedIds->Append(ebegin, eend, IdSet::Unordered); @@ -204,7 +204,7 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr } else { appendMergedIds( mergeData, releventDocs, - [&fctx, &mergedIds, &mergeData, &statuses](IdSetRef::iterator ebegin, IdSetRef::iterator eend, const MergeInfo &vid) { + [&fctx, &mergedIds, &mergeData, &statuses](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo &vid) { assertrx_throw(vid.areaIndex != std::numeric_limits::max()); fctx->Add(ebegin, eend, vid.proc, statuses.rowIds, std::move(mergeData.vectorAreas[vid.areaIndex])); mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); @@ -215,10 +215,9 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr logPrintf(LogInfo, "Total merge out: %d ids", mergedIds->size()); std::string str; - for (size_t i = 0; i < fctx->GetSize();) { + for (size_t i = 0; i < fctx->Size();) { size_t j = i; - for (; j < fctx->GetSize() && fctx->Proc(i) == fctx->Proc(j); j++) - ; + for (; j < fctx->Size() && fctx->Proc(i) == fctx->Proc(j); j++); str += std::to_string(fctx->Proc(i)) + "%"; if (j - i > 1) { str += "("; @@ -228,9 +227,9 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery &&dsl, bool inTr str += " "; i = j; } - logPrintf(LogInfo, "Relevancy(%d): %s", fctx->GetSize(), str); + logPrintf(LogInfo, "Relevancy(%d): %s", fctx->Size(), str); } - assertrx_throw(mergedIds->size() == fctx->GetSize()); + assertrx_throw(mergedIds->size() == fctx->Size()); return mergedIds; } template diff --git a/cpp_src/core/index/indextext/fastindextext.h b/cpp_src/core/index/indextext/fastindextext.h index c394195e9..ae6f74d5b 100644 --- a/cpp_src/core/index/indextext/fastindextext.h +++ b/cpp_src/core/index/indextext/fastindextext.h @@ -19,14 +19,16 @@ class FastIndexText : public IndexText { FastIndexText(const FastIndexText& other) : Base(other) { initConfig(other.getConfig()); for (auto& idx : this->idx_map) idx.second.SetVDocID(FtKeyEntryData::ndoc); - this->CommitFulltext(); } FastIndexText(const IndexDef& idef, PayloadType&& payloadType, FieldsSet&& fields, const NamespaceCacheConfigData& cacheCfg) : Base(idef, std::move(payloadType), std::move(fields), cacheCfg) { initConfig(); } - std::unique_ptr Clone() const override { return std::make_unique>(*this); } + std::unique_ptr Clone() const override { + // Creates uncommited copy + return std::make_unique>(*this); + } IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtMergeStatuses&&, FtUseExternStatuses, const RdxContext&) override final; IndexMemStat GetMemStat(const RdxContext&) override final; diff --git a/cpp_src/core/index/indextext/indextext.cc b/cpp_src/core/index/indextext/indextext.cc index f144bdf6e..dbf99e8d1 100644 --- a/cpp_src/core/index/indextext/indextext.cc +++ b/cpp_src/core/index/indextext/indextext.cc @@ -73,7 +73,7 @@ void IndexText::ReconfigureCache(const NamespaceCacheConfigData &cacheCfg) { template FtCtx::Ptr IndexText::prepareFtCtx(const BaseFunctionCtx::Ptr &ctx) { - FtCtx::Ptr ftctx = reindexer::reinterpret_pointer_cast(ctx); + FtCtx::Ptr ftctx = reindexer::static_ctx_pointer_cast(ctx); if rx_unlikely (!ftctx) { throw Error(errParams, "Full text index (%s) may not be used without context", Index::Name()); } @@ -111,7 +111,7 @@ SelectKeyResults IndexText::SelectKey(const VariantArray &keys, CondType cond if (cache_ft.valid) { if (!cache_ft.val.ids) { needPutCache = true; - } else if (ftctx->NeedArea() && (!cache_ft.val.ctx || !cache_ft.val.ctx->need_area_)) { + } else if (ftctx->NeedArea() && (!cache_ft.val.ctx || !cache_ft.val.ctx->NeedArea())) { needPutCache = true; } else { return resultFromCache(keys, std::move(cache_ft), ftctx); @@ -143,7 +143,7 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, const std:: // STEP 1: Parse search query dsl FtDSLQuery dsl(this->ftFields_, this->cfg_->stopWords, this->cfg_->extraWordSymbols); - dsl.parse(keys[0].As()); + dsl.parse(keys[0].As()); IdSet::Ptr mergedIds = Select(ftctx, std::move(dsl), inTransaction, std::move(mergeStatuses), useExternSt, rdxCtx); SelectKeyResult res; @@ -152,10 +152,11 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, const std:: if (ftctx->NeedArea() && need_put && mergedIds->size()) { auto config = dynamic_cast(cfg_.get()); if (config && config->maxTotalAreasToCache >= 0) { - auto d = ftctx->GetData(); + auto &d = *ftctx->GetData(); size_t totalAreas = 0; - for (auto &area : d->holders_) { - totalAreas += d->area_[area.second].GetAreasCount(); + assertrx_throw(d.holders_.has_value()); + for (auto &area : d.holders_.value()) { + totalAreas += d.area_[area.second].GetAreasCount(); } if (totalAreas > unsigned(config->maxTotalAreasToCache)) { need_put = false; @@ -164,13 +165,16 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray &keys, const std:: } if (need_put && mergedIds->size()) { // This areas will be shared via cache, so lazy commit may race - auto d = ftctx->GetData(); - for (auto &area : d->holders_) { - if (!d->area_[area.second].IsCommited()) { - d->area_[area.second].Commit(); + auto dPtr = ftctx->GetData(); + auto &d = *dPtr; + if (d.holders_.has_value()) { + for (auto &area : d.holders_.value()) { + if (auto &aData = d.area_[area.second]; !aData.IsCommited()) { + aData.Commit(); + } } } - cache_ft_->Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(d)}); + cache_ft_->Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(dPtr)}); } res.emplace_back(std::move(mergedIds)); diff --git a/cpp_src/core/index/indextext/indextext.h b/cpp_src/core/index/indextext/indextext.h index d28038922..9580d50c3 100644 --- a/cpp_src/core/index/indextext/indextext.h +++ b/cpp_src/core/index/indextext/indextext.h @@ -46,7 +46,6 @@ class IndexText : public IndexUnordered { this->isBuilt_ = true; } void SetSortedIdxCount(int) override final {} - bool RequireWarmupOnNsCopy() const noexcept override final { return cfg_ && cfg_->enableWarmupOnNsCopy; } void DestroyCache() override { Base::DestroyCache(); cache_ft_.reset(); diff --git a/cpp_src/core/index/indexunordered.cc b/cpp_src/core/index/indexunordered.cc index af7fb2f59..bfd90c2dd 100644 --- a/cpp_src/core/index/indexunordered.cc +++ b/cpp_src/core/index/indexunordered.cc @@ -205,7 +205,7 @@ Variant IndexUnordered::Upsert(const Variant &key, IdType id, bool &clearCach template void IndexUnordered::Delete(const Variant &key, IdType id, StringsHolder &strHolder, bool &clearCache) { if (key.Type().Is()) { - this->empty_ids_.Unsorted().Erase(id); // ignore result + this->empty_ids_.Unsorted().Erase(id); // ignore result this->isBuilt_ = false; cache_.reset(); clearCache = true; @@ -213,18 +213,20 @@ void IndexUnordered::Delete(const Variant &key, IdType id, StringsHolder &str } typename T::iterator keyIt = this->idx_map.find(static_cast(key)); - if (keyIt == idx_map.end()) return; - - delMemStat(keyIt); - int delcnt = keyIt->second.Unsorted().Erase(id); - (void)delcnt; - this->isBuilt_ = false; - cache_.reset(); - clearCache = true; - // TODO: we have to implement removal of composite indexes (doesn't work right now) - assertf(this->opts_.IsArray() || this->Opts().IsSparse() || delcnt, "Delete unexists id from index '%s' id=%d,key=%s (%s)", this->name_, - id, key.As(this->payloadType_, this->Fields()), + [[maybe_unused]] int delcnt = 0; + if (keyIt != idx_map.end()) { + delMemStat(keyIt); + delcnt = keyIt->second.Unsorted().Erase(id); + this->isBuilt_ = false; + cache_.reset(); + clearCache = true; + } + assertf(delcnt || this->opts_.IsArray() || this->Opts().IsSparse(), "Delete non-existing id from index '%s' id=%d,key=%s (%s)", + this->name_, id, key.As(this->payloadType_, this->Fields()), Variant(keyIt->first).As(this->payloadType_, this->Fields())); + if (keyIt == idx_map.end()) { + return; + } if (keyIt->second.Unsorted().IsEmpty()) { this->tracker_.markDeleted(keyIt); @@ -302,11 +304,11 @@ SelectKeyResults IndexUnordered::SelectKey(const VariantArray &keys, CondType const VariantArray &keys; SortType sortId; Index::SelectOpts opts; - } ctx = {&this->idx_map, keys, sortId, opts}; + bool isSparse; + } ctx = {&this->idx_map, keys, sortId, opts, this->opts_.IsSparse()}; bool selectorWasSkipped = false; - bool isSparse = this->opts_.IsSparse(); // should return true, if fallback to comparator required - auto selector = [&ctx, &selectorWasSkipped, isSparse](SelectKeyResult &res, size_t &idsCount) -> bool { + auto selector = [&ctx, &selectorWasSkipped](SelectKeyResult &res, size_t &idsCount) -> bool { idsCount = 0; // Skip this index if there are some other indexes with potentially higher selectivity if (!ctx.opts.distinct && ctx.keys.size() > 1 && 8 * ctx.keys.size() > size_t(ctx.opts.maxIterations) && @@ -326,7 +328,7 @@ SelectKeyResults IndexUnordered::SelectKey(const VariantArray &keys, CondType res.deferedExplicitSort = SelectKeyResult::IsGenericSortRecommended(res.size(), idsCount, idsCount); // avoid comparator for sparse index - if (isSparse || !ctx.opts.itemsCountInNamespace) return false; + if (ctx.isSparse || !ctx.opts.itemsCountInNamespace) return false; // Check selectivity: // if ids count too much (more than maxSelectivityPercentForIdset() of namespace), // and index not optimized, or we have >4 other conditions diff --git a/cpp_src/core/index/keyentry.h b/cpp_src/core/index/keyentry.h index 9c500dba1..169e233c1 100644 --- a/cpp_src/core/index/keyentry.h +++ b/cpp_src/core/index/keyentry.h @@ -21,11 +21,16 @@ class KeyEntry { public: IdSetT& Unsorted() noexcept { return ids_; } const IdSetT& Unsorted() const noexcept { return ids_; } - IdSetRef Sorted(unsigned sortId) const noexcept { + IdSetRef Sorted(unsigned sortId) noexcept { assertf(ids_.capacity() >= (sortId + 1) * ids_.size(), "error ids_.capacity()=%d,sortId=%d,ids_.size()=%d", ids_.capacity(), sortId, ids_.size()); return IdSetRef(ids_.data() + sortId * ids_.size(), ids_.size()); } + IdSetCRef Sorted(unsigned sortId) const noexcept { + assertf(ids_.capacity() >= (sortId + 1) * ids_.size(), "error ids_.capacity()=%d,sortId=%d,ids_.size()=%d", ids_.capacity(), sortId, + ids_.size()); + return IdSetCRef(ids_.data() + sortId * ids_.size(), ids_.size()); + } void UpdateSortedIds(const UpdateSortedContext& ctx) { ids_.reserve((ctx.getSortedIdxCount() + 1) * ids_.size()); assertrx(ctx.getCurSortId()); diff --git a/cpp_src/core/item.cc b/cpp_src/core/item.cc index c4b2c846e..4454c7778 100644 --- a/cpp_src/core/item.cc +++ b/cpp_src/core/item.cc @@ -69,7 +69,7 @@ Item::FieldRef &Item::FieldRef::operator=(Variant kr) { if (field_ >= 0) { itemImpl_->SetField(field_, VariantArray{std::move(kr)}); } else { - itemImpl_->SetField(jsonPath_, VariantArray{std::move(kr)}, nullptr); + itemImpl_->SetField(jsonPath_, VariantArray{std::move(kr)}); } return *this; @@ -82,13 +82,13 @@ Item::FieldRef &Item::FieldRef::operator=(const VariantArray &krs) { if (field_ >= 0) { itemImpl_->SetField(field_, krs); } else { - itemImpl_->SetField(jsonPath_, krs, nullptr); + itemImpl_->SetField(jsonPath_, krs); } return *this; } template -Item::FieldRef &Item::FieldRef::operator=(span arr) { +Item::FieldRef &Item::FieldRef::operator=(span arr) { constexpr static bool kIsStr = std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v; if (field_ < 0) { @@ -96,7 +96,7 @@ Item::FieldRef &Item::FieldRef::operator=(span arr) { krs.MarkArray(); krs.reserve(arr.size()); std::transform(arr.begin(), arr.end(), std::back_inserter(krs), [](const T &t) { return Variant(t); }); - itemImpl_->SetField(jsonPath_, krs, nullptr); + itemImpl_->SetField(jsonPath_, krs); return *this; } @@ -192,10 +192,10 @@ Item &Item::Unsafe(bool enable) & noexcept { int64_t Item::GetLSN() { return impl_->Value().GetLSN(); } void Item::setLSN(int64_t lsn) { impl_->Value().SetLSN(lsn); } -template Item::FieldRef &Item::FieldRef::operator=(span arr); -template Item::FieldRef &Item::FieldRef::operator=(span arr); -template Item::FieldRef &Item::FieldRef::operator=(span arr); -template Item::FieldRef &Item::FieldRef::operator=(span); -template Item::FieldRef &Item::FieldRef::operator=(span); +template Item::FieldRef &Item::FieldRef::operator=(span arr); +template Item::FieldRef &Item::FieldRef::operator=(span arr); +template Item::FieldRef &Item::FieldRef::operator=(span arr); +template Item::FieldRef &Item::FieldRef::operator=(span); +template Item::FieldRef &Item::FieldRef::operator=(span); } // namespace reindexer diff --git a/cpp_src/core/item.h b/cpp_src/core/item.h index 4a9b1aca9..6234a2f60 100644 --- a/cpp_src/core/item.h +++ b/cpp_src/core/item.h @@ -67,20 +67,20 @@ class Item { /// @param p - point value, which will be setted to field FieldRef &operator=(Point p) { double arr[]{p.X(), p.Y()}; - return operator=(span(arr, 2)); + return operator=(span(arr, 2)); } /// Set array of values to field /// @tparam T - type. Must be one of: int, int64_t, double /// @param arr - std::vector of T values, which will be setted to field template - FieldRef &operator=(span arr); + FieldRef &operator=(span arr); /// Set array of values to field /// @tparam T - type. Must be one of: int, int64_t, double /// @param arr - std::vector of T values, which will be setted to field template FieldRef &operator=(const std::vector &arr) { - return operator=(span(arr)); + return operator=(span>(arr)); } /// Set string value to field /// If Item is in Unsafe Mode, then Item will not store str, but just keep pointer to str, diff --git a/cpp_src/core/itemimpl.cc b/cpp_src/core/itemimpl.cc index bb62837b1..e96d2c75f 100644 --- a/cpp_src/core/itemimpl.cc +++ b/cpp_src/core/itemimpl.cc @@ -31,8 +31,8 @@ void ItemImpl::SetField(int field, const VariantArray &krs) { } } -void ItemImpl::ModifyField(std::string_view jsonPath, const VariantArray &keys, const IndexExpressionEvaluator &ev, FieldModifyMode mode) { - ModifyField(tagsMatcher_.path2indexedtag(jsonPath, ev, mode != FieldModeDrop), keys, mode); +void ItemImpl::ModifyField(std::string_view jsonPath, const VariantArray &keys, FieldModifyMode mode) { + ModifyField(tagsMatcher_.path2indexedtag(jsonPath, mode != FieldModeDrop), keys, mode); } void ItemImpl::ModifyField(const IndexedTagsPath &tagsPath, const VariantArray &keys, FieldModifyMode mode) { @@ -76,10 +76,8 @@ void ItemImpl::ModifyField(const IndexedTagsPath &tagsPath, const VariantArray & pl.Set(0, Variant(p_string(reinterpret_cast(tupleData_.get())), Variant::no_hold_t{})); } -void ItemImpl::SetField(std::string_view jsonPath, const VariantArray &keys, const IndexExpressionEvaluator &ev) { - ModifyField(jsonPath, keys, ev, FieldModeSet); -} -void ItemImpl::DropField(std::string_view jsonPath, const IndexExpressionEvaluator &ev) { ModifyField(jsonPath, {}, ev, FieldModeDrop); } +void ItemImpl::SetField(std::string_view jsonPath, const VariantArray &keys) { ModifyField(jsonPath, keys, FieldModeSet); } +void ItemImpl::DropField(std::string_view jsonPath) { ModifyField(jsonPath, {}, FieldModeDrop); } Variant ItemImpl::GetField(int field) { return GetPayload().Get(field, 0); } void ItemImpl::GetField(int field, VariantArray &values) { GetPayload().Get(field, values); } @@ -221,7 +219,7 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { } } - size_t len; + size_t len = 0; gason::JsonNode node; gason::JsonParser parser(&largeJSONStrings_); try { @@ -249,9 +247,9 @@ Error ItemImpl::FromJSON(std::string_view slice, char **endp, bool pkOnly) { return err; } -void ItemImpl::FromCJSON(ItemImpl *other, Recoder *recoder) { - FromCJSON(other->GetCJSON(), false, recoder); - cjson_ = std::string_view(); +void ItemImpl::FromCJSON(ItemImpl &other, Recoder *recoder) { + FromCJSON(other.GetCJSON(), false, recoder); + cjson_ = {}; } std::string_view ItemImpl::GetJSON() { diff --git a/cpp_src/core/itemimpl.h b/cpp_src/core/itemimpl.h index cbf49779f..18fe391db 100644 --- a/cpp_src/core/itemimpl.h +++ b/cpp_src/core/itemimpl.h @@ -14,7 +14,7 @@ namespace reindexer { struct ItemImplRawData { ItemImplRawData() = default; - ItemImplRawData(PayloadValue v) : payloadValue_(std::move(v)) {} + explicit ItemImplRawData(PayloadValue v) : payloadValue_(std::move(v)) {} ItemImplRawData(const ItemImplRawData &) = delete; ItemImplRawData(ItemImplRawData &&) = default; ItemImplRawData &operator=(const ItemImplRawData &) = delete; @@ -39,7 +39,7 @@ class ItemImpl : public ItemImplRawData { // Construct empty item ItemImpl(PayloadType type, const TagsMatcher &tagsMatcher, const FieldsSet &pkFields = {}, std::shared_ptr schema = {}) - : ItemImplRawData(PayloadValue(type.TotalSize(), 0, type.TotalSize() + 0x100)), + : ItemImplRawData(PayloadValue(type.TotalSize(), nullptr, type.TotalSize() + 0x100)), payloadType_(std::move(type)), tagsMatcher_(tagsMatcher), pkFields_(pkFields), @@ -66,11 +66,11 @@ class ItemImpl : public ItemImplRawData { ItemImpl &operator=(ItemImpl &&) = default; ItemImpl &operator=(const ItemImpl &) = delete; - void ModifyField(std::string_view jsonPath, const VariantArray &keys, const IndexExpressionEvaluator &ev, FieldModifyMode mode); + void ModifyField(std::string_view jsonPath, const VariantArray &keys, FieldModifyMode mode); void ModifyField(const IndexedTagsPath &tagsPath, const VariantArray &keys, FieldModifyMode mode); void SetField(int field, const VariantArray &krs); - void SetField(std::string_view jsonPath, const VariantArray &keys, const IndexExpressionEvaluator &ev); - void DropField(std::string_view jsonPath, const IndexExpressionEvaluator &ev); + void SetField(std::string_view jsonPath, const VariantArray &keys); + void DropField(std::string_view jsonPath); Variant GetField(int field); void GetField(int field, VariantArray &); FieldsSet PkFields() const { return pkFields_; } @@ -80,7 +80,7 @@ class ItemImpl : public ItemImplRawData { std::string_view GetJSON(); Error FromJSON(std::string_view slice, char **endp = nullptr, bool pkOnly = false); - void FromCJSON(ItemImpl *other, Recoder *); + void FromCJSON(ItemImpl &other, Recoder *); std::string_view GetCJSON(bool withTagsMatcher = false); std::string_view GetCJSON(WrSerializer &ser, bool withTagsMatcher = false); diff --git a/cpp_src/core/itemmodifier.cc b/cpp_src/core/itemmodifier.cc index d1ca8e7bd..3676bff56 100644 --- a/cpp_src/core/itemmodifier.cc +++ b/cpp_src/core/itemmodifier.cc @@ -7,7 +7,65 @@ namespace reindexer { -const std::string &ItemModifier::FieldData::name() const noexcept { return entry_.Column(); } +std::string_view ItemModifier::FieldData::Name() const noexcept { return entry_.Column(); } + +void ItemModifier::FieldData::appendAffectedIndexes(const NamespaceImpl &ns, CompositeFlags &affectedComposites) const { + const auto firstCompositePos = ns.indexes_.firstCompositePos(); + const auto firstSparsePos = ns.indexes_.firstSparsePos(); + const auto totalIndexes = ns.indexes_.totalSize(); + const bool isRegularIndex = IsIndex() && Index() < firstSparsePos; + const bool isSparseIndex = IsIndex() && Index() >= firstSparsePos && Index() < firstCompositePos; + if (isSparseIndex) { + // Composite indexes can not be created over sparse indexes, so just skipping rest of the checks for them + return; + } + const bool isCompositeIndex = IsIndex() && Index() >= firstCompositePos; + if (isCompositeIndex) { + // Composite indexes can not be created over another composite indexes, so just skipping rest of the checks for them + return; + } + std::bitset affected; + if (isRegularIndex) { + affected.set(Index()); + } else { + for (int i = 0; i < firstSparsePos; ++i) { + const auto &ptField = ns.payloadType_.Field(i); + for (const auto &jpath : ptField.JsonPaths()) { + auto tp = ns.tagsMatcher_.path2tag(jpath); + if (Tagspath().IsNestedOrEqualTo(tp)) { + affected.set(i); + break; + } + } + } + } + + for (int i = firstCompositePos; i < totalIndexes; ++i) { + const auto &fields = ns.indexes_[i]->Fields(); + const auto idxId = i - firstCompositePos; + + for (const auto f : fields) { + if (f == IndexValueType::SetByJsonPath) continue; + if (affected.test(f)) { + affectedComposites[idxId] = true; + break; + } + } + if (affectedComposites[idxId]) { + continue; + } + + if (!IsIndex()) { + // Fulltext composites may be created over non-index fields + for (size_t tp = 0, end = fields.getTagsPathsLength(); tp < end; ++tp) { + if (Tagspath().IsNestedOrEqualTo(fields.getTagsPath(tp))) { + affectedComposites[idxId] = true; + break; + } + } + } + } +} class ItemModifier::RollBack_ModifiedPayload final : private RollBackBase { public: @@ -112,51 +170,49 @@ class ItemModifier::RollBack_ModifiedPayload final : private RollBackBase { IdType itemId_; }; -ItemModifier::FieldData::FieldData(const UpdateEntry &entry, NamespaceImpl &ns) +ItemModifier::FieldData::FieldData(const UpdateEntry &entry, NamespaceImpl &ns, CompositeFlags &affectedComposites) : entry_(entry), tagsPathWithLastIndex_{std::nullopt}, arrayIndex_(IndexValueType::NotSet), isIndex_(false) { if (ns.tryGetIndexByName(entry_.Column(), fieldIndex_)) { isIndex_ = true; - auto jsonPathsSize = (ns.indexes_[fieldIndex_]->Opts().IsSparse() || static_cast(fieldIndex_) >= ns.payloadType_.NumFields()) - ? ns.indexes_[fieldIndex_]->Fields().size() + const auto &idx = *ns.indexes_[fieldIndex_]; + auto jsonPathsSize = (idx.Opts().IsSparse() || static_cast(fieldIndex_) >= ns.payloadType_.NumFields()) + ? idx.Fields().size() : ns.payloadType_.Field(fieldIndex_).JsonPaths().size(); if (jsonPathsSize != 1) { throw Error(errParams, "Ambiguity when updating field with several json paths by index name: '%s'", entry_.Column()); } - if (!entry.IsExpression()) { - const auto &fields{ns.indexes_[fieldIndex_]->Fields()}; - if (fields.size() != 1) { - throw Error(errParams, "Cannot update composite index: '%s'", entry_.Column()); - } - if (fields[0] == IndexValueType::SetByJsonPath) { - if (fields.isTagsPathIndexed(0)) { - tagsPath_ = fields.getIndexedTagsPath(0); - } else { - tagsPath_ = IndexedTagsPath{fields.getTagsPath(0)}; - } + const auto &fields{idx.Fields()}; + if (fields.size() != 1) { + throw Error(errParams, "Cannot update composite index: '%s'", entry_.Column()); + } + if (fields[0] == IndexValueType::SetByJsonPath) { + if (fields.isTagsPathIndexed(0)) { + tagsPath_ = fields.getIndexedTagsPath(0); } else { - tagsPath_ = ns.tagsMatcher_.path2indexedtag(ns.payloadType_.Field(fieldIndex_).JsonPaths()[0], nullptr, true); - } - if (tagsPath_.empty()) { - throw Error(errParams, "Cannot find field by json: '%s'", entry_.Column()); - } - if (tagsPath_.back().IsWithIndex()) { - arrayIndex_ = tagsPath_.back().Index(); - tagsPath_.back().SetIndex(IndexValueType::NotSet); + tagsPath_ = IndexedTagsPath{fields.getTagsPath(0)}; } + } else { + fieldIndex_ = fields[0]; // 'Composite' index with single subindex + tagsPath_ = ns.tagsMatcher_.path2indexedtag(ns.payloadType_.Field(fieldIndex_).JsonPaths()[0], true); + } + if (tagsPath_.empty()) { + throw Error(errParams, "Cannot find field by json: '%s'", entry_.Column()); + } + if (tagsPath_.back().IsWithIndex()) { + arrayIndex_ = tagsPath_.back().Index(); + tagsPath_.back().SetIndex(IndexValueType::NotSet); } } else if (fieldIndex_ = ns.payloadType_.FieldByJsonPath(entry_.Column()); fieldIndex_ > 0) { isIndex_ = true; - if (!entry.IsExpression()) { - tagsPath_ = ns.tagsMatcher_.path2indexedtag(entry_.Column(), nullptr, true); - if (tagsPath_.empty()) { - throw Error(errParams, "Cannot find field by json: '%s'", entry_.Column()); - } + tagsPath_ = ns.tagsMatcher_.path2indexedtag(entry_.Column(), true); + if (tagsPath_.empty()) { + throw Error(errParams, "Cannot find field by json: '%s'", entry_.Column()); } } else { TagsPath tp; - IndexedTagsPath tagsPath = ns.tagsMatcher_.path2indexedtag(entry_.Column(), nullptr, true); + IndexedTagsPath tagsPath = ns.tagsMatcher_.path2indexedtag(entry_.Column(), true); std::string jsonPath; for (size_t i = 0; i < tagsPath.size(); ++i) { if (i) jsonPath += '.'; @@ -171,54 +227,27 @@ ItemModifier::FieldData::FieldData(const UpdateEntry &entry, NamespaceImpl &ns) fieldIndex_ = 0; isIndex_ = ns.getIndexByNameOrJsonPath(jsonPath, fieldIndex_) || ns.getSparseIndexByJsonPath(jsonPath, fieldIndex_); } - if (!entry.IsExpression()) { - tagsPath_ = std::move(tagsPath); - if (tagsPath_.empty()) { - throw Error(errParams, "Cannot find field by json: '%s'", entry_.Column()); - } - if (isIndex_) { - auto &lastTag = tagsPath_.back(); - if (lastTag.IsWithIndex()) { - tagsPathWithLastIndex_ = tagsPath_; - arrayIndex_ = lastTag.Index(); - lastTag.SetIndex(IndexValueType::NotSet); - } - } + tagsPath_ = std::move(tagsPath); + if (tagsPath_.empty()) { + throw Error(errParams, "Cannot find field by json: '%s'", entry_.Column()); } - } -} - -void ItemModifier::FieldData::updateTagsPath(TagsMatcher &tm, const IndexExpressionEvaluator &ev) { - if (tagsPath_.empty()) { - tagsPath_ = tm.path2indexedtag(entry_.Column(), ev, true); - } - for (size_t i = 0; i < tagsPath_.size(); ++i) { - if (tagsPath_[i].IsWithExpression()) { - IndexedPathNode &node = tagsPath_[i]; - VariantArray vals = ev(node.Expression()); - if (vals.size() != 1) { - throw Error(errParams, "Index expression has wrong syntax: '%s'", node.Expression()); + if (isIndex_) { + auto &lastTag = tagsPath_.back(); + if (lastTag.IsWithIndex()) { + tagsPathWithLastIndex_ = tagsPath_; + arrayIndex_ = lastTag.Index(); + lastTag.SetIndex(IndexValueType::NotSet); } - vals.front().Type().EvaluateOneOf([](OneOf) noexcept {}, - [&](OneOf) { - throw Error(errParams, "Wrong type of index: '%s'", node.Expression()); - }); - node.SetIndex(vals.front().As()); - } - } - if (tagsPath_.size()) { - auto &lastTag = tagsPath_.back(); - if (lastTag.IsWithIndex()) { - arrayIndex_ = lastTag.Index(); - tagsPathWithLastIndex_ = tagsPath_; - lastTag.SetIndex(IndexValueType::NotSet); } } + appendAffectedIndexes(ns, affectedComposites); } ItemModifier::ItemModifier(const std::vector &updateEntries, NamespaceImpl &ns) - : ns_(ns), updateEntries_(updateEntries), rollBackIndexData_(ns_.indexes_.totalSize()) { + : ns_(ns), + updateEntries_(updateEntries), + rollBackIndexData_(ns_.indexes_.totalSize()), + affectedComposites_(ns_.indexes_.totalSize() - ns_.indexes_.firstCompositePos(), false) { for (const UpdateEntry &updateField : updateEntries_) { for (const auto &v : updateField.Values()) { v.Type().EvaluateOneOf([](OneOf &updateEntries, Namesp "Probably 'object'/'json' type was not explicitly set in the query"); }); } - fieldsToModify_.emplace_back(updateField, ns_); + fieldsToModify_.emplace_back(updateField, ns_, affectedComposites_); } } @@ -249,48 +278,39 @@ ItemModifier::ItemModifier(const std::vector &updateEntries, Namesp FunctionExecutor funcExecutor(ns_); ExpressionEvaluator ev(ns_.payloadType_, ns_.tagsMatcher_, funcExecutor); - h_vector needUpdateCompIndexes(unsigned(ns_.indexes_.compositeIndexesSize()), false); - for (FieldData &field : fieldsToModify_) { - deleteDataFromComposite(itemId, field, needUpdateCompIndexes); - } - - const auto firstCompositePos = ns_.indexes_.firstCompositePos(); - const auto totalIndexes = ns_.indexes_.totalSize(); - + deleteItemFromComposite(itemId); try { VariantArray values; for (FieldData &field : fieldsToModify_) { // values must be assigned a value in if else below - if (field.details().IsExpression()) { - assertrx(field.details().Values().size() > 0); - values = ev.Evaluate(static_cast(field.details().Values().front()), pv, field.name()); - field.updateTagsPath(ns_.tagsMatcher_, - [&ev, &pv, &field](std::string_view expression) { return ev.Evaluate(expression, pv, field.name()); }); + if (field.Details().IsExpression()) { + assertrx(field.Details().Values().size() > 0); + values = ev.Evaluate(static_cast(field.Details().Values().front()), pv, field.Name()); } else { - values = field.details().Values(); + values = field.Details().Values(); } - if (values.IsArrayValue() && field.tagspathWithLastIndex().back().IsArrayNode()) { + if (values.IsArrayValue() && field.TagspathWithLastIndex().back().IsArrayNode()) { throw Error(errParams, "Array items are supposed to be updated with a single value, not an array"); } - if (field.details().Mode() == FieldModeSetJson || !field.isIndex()) { + if (field.Details().Mode() == FieldModeSetJson || !field.IsIndex()) { modifyCJSON(itemId, field, values); } else { modifyField(itemId, field, pl, values); } } } catch (...) { - insertItemIntoCompositeIndexes(itemId, firstCompositePos, totalIndexes, needUpdateCompIndexes); + insertItemIntoComposite(itemId); throw; } - insertItemIntoCompositeIndexes(itemId, firstCompositePos, totalIndexes, needUpdateCompIndexes); + insertItemIntoComposite(itemId); if (rollBackIndexData_.IsPkModified()) { ns_.checkUniquePK(ConstPayload(ns_.payloadType_, pv), ctx.inTransaction, ctx.rdxContext); } rollBack.Disable(); - ns_.markUpdated(false); + ns_.markUpdated(IndexOptimization::Partial); return rollBackIndexData_.IsPkModified(); } @@ -308,7 +328,7 @@ void ItemModifier::modifyCJSON(IdType id, FieldData &field, VariantArray &values } ItemImpl itemimpl(ns_.payloadType_, plData, ns_.tagsMatcher_); - itemimpl.ModifyField(field.tagspath(), values, field.details().Mode()); + itemimpl.ModifyField(field.Tagspath(), values, field.Details().Mode()); Item item = ns_.newItem(); Error err = item.Unsafe().FromCJSON(itemimpl.GetCJSON(true)); @@ -390,61 +410,44 @@ void ItemModifier::modifyCJSON(IdType id, FieldData &field, VariantArray &values impl->RealValue() = plData; } -void ItemModifier::deleteDataFromComposite(IdType itemId, FieldData &field, h_vector &needUpdateCompIndexes) { +void ItemModifier::deleteItemFromComposite(IdType itemId) { auto strHolder = ns_.strHolder(); auto indexesCacheCleaner{ns_.GetIndexesCacheCleaner()}; const auto firstCompositePos = ns_.indexes_.firstCompositePos(); const auto totalIndexes = ns_.indexes_.totalSize(); for (int i = firstCompositePos; i < totalIndexes; ++i) { - auto &compositeIdx = ns_.indexes_[i]; - const auto &fields = compositeIdx->Fields(); - const auto idxId = i - firstCompositePos; - if (needUpdateCompIndexes[idxId]) { - continue; - } - if (field.isIndex()) { - for (const auto f : fields) { - if (f == IndexValueType::SetByJsonPath) continue; - if (f == field.index()) { - needUpdateCompIndexes[idxId] = true; - break; - } - } - } - if (!needUpdateCompIndexes[idxId]) { - for (size_t tp = 0, end = fields.getTagsPathsLength(); tp < end; ++tp) { - if (field.tagspath().Compare(fields.getTagsPath(tp))) { - needUpdateCompIndexes[idxId] = true; - break; - } + if (affectedComposites_[i - firstCompositePos]) { + bool needClearCache{false}; + const auto &compositeIdx = ns_.indexes_[i]; + rollBackIndexData_.IndexAndCJsonChanged(i, compositeIdx->Opts().IsPK()); + compositeIdx->Delete(Variant(ns_.items_[itemId]), itemId, *strHolder, needClearCache); + if (needClearCache && compositeIdx->IsOrdered()) { + indexesCacheCleaner.Add(compositeIdx->SortId()); } - if (!needUpdateCompIndexes[idxId]) continue; } - bool needClearCache{false}; - rollBackIndexData_.IndexAndCJsonChanged(i, compositeIdx->Opts().IsPK()); - compositeIdx->Delete(Variant(ns_.items_[itemId]), itemId, *strHolder, needClearCache); - if (needClearCache && compositeIdx->IsOrdered()) indexesCacheCleaner.Add(compositeIdx->SortId()); } } -void ItemModifier::insertItemIntoCompositeIndexes(IdType itemId, int firstCompositePos, int totalIndexes, - const h_vector &needUpdateCompIndexes) { +void ItemModifier::insertItemIntoComposite(IdType itemId) { + const auto totalIndexes = ns_.indexes_.totalSize(); + const auto firstCompositePos = ns_.indexes_.firstCompositePos(); for (int i = firstCompositePos; i < totalIndexes; ++i) { - if (!needUpdateCompIndexes[i - firstCompositePos]) continue; - bool needClearCache{false}; - auto &compositeIdx = ns_.indexes_[i]; - rollBackIndexData_.IndexChanged(i, compositeIdx->Opts().IsPK()); - compositeIdx->Upsert(Variant(ns_.items_[itemId]), itemId, needClearCache); - if (needClearCache && compositeIdx->IsOrdered()) { - ns_.GetIndexesCacheCleaner().Add(compositeIdx->SortId()); + if (affectedComposites_[i - firstCompositePos]) { + bool needClearCache{false}; + auto &compositeIdx = ns_.indexes_[i]; + rollBackIndexData_.IndexChanged(i, compositeIdx->Opts().IsPK()); + compositeIdx->Upsert(Variant(ns_.items_[itemId]), itemId, needClearCache); + if (needClearCache && compositeIdx->IsOrdered()) { + ns_.GetIndexesCacheCleaner().Add(compositeIdx->SortId()); + } } } -}; +} void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, VariantArray &values) { - assertrx_throw(field.isIndex()); - Index &index = *(ns_.indexes_[field.index()]); - if (!index.Opts().IsSparse() && field.details().Mode() == FieldModeDrop /*&& + assertrx_throw(field.IsIndex()); + Index &index = *(ns_.indexes_[field.Index()]); + if (!index.Opts().IsSparse() && field.Details().Mode() == FieldModeDrop /*&& !(field.arrayIndex() != IndexValueType::NotSet || field.tagspath().back().IsArrayNode())*/) { // TODO #1218 allow to drop array fields throw Error(errLogic, "It's only possible to drop sparse or non-index fields via UPDATE statement!"); } @@ -461,7 +464,7 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var auto strHolder = ns_.strHolder(); auto indexesCacheCleaner{ns_.GetIndexesCacheCleaner()}; - if (field.isIndex()) { + if (field.IsIndex()) { modifyIndexValues(itemId, field, values, pl); } @@ -475,7 +478,7 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var Variant tupleValue; std::exception_ptr exception; try { - item.ModifyField(field.tagspathWithLastIndex(), values, field.details().Mode()); + item.ModifyField(field.TagspathWithLastIndex(), values, field.Details().Mode()); } catch (...) { exception = std::current_exception(); } @@ -489,15 +492,15 @@ void ItemModifier::modifyField(IdType itemId, FieldData &field, Payload &pl, Var } void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, VariantArray &values, Payload &pl) { - Index &index = *(ns_.indexes_[field.index()]); + Index &index = *(ns_.indexes_[field.Index()]); if (values.IsNullValue() && !index.Opts().IsArray()) { throw Error(errParams, "Non-array index fields cannot be set to null!"); } auto strHolder = ns_.strHolder(); auto indexesCacheCleaner{ns_.GetIndexesCacheCleaner()}; - bool updateArrayPart = field.arrayIndex() >= 0; + bool updateArrayPart = field.ArrayIndex() >= 0; bool isForAllItems = false; - for (const auto &tag : field.tagspath()) { + for (const auto &tag : field.Tagspath()) { if (tag.IsArrayNode()) { updateArrayPart = true; } @@ -521,7 +524,7 @@ void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, Vari } int offset = -1, length = -1; isForAllItems = false; - for (const auto &tag : field.tagspath()) { // TODO: Move to FieldEntry? + for (const auto &tag : field.Tagspath()) { // TODO: Move to FieldEntry? if (tag.IsForAllItems()) { isForAllItems = true; continue; @@ -531,9 +534,9 @@ void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, Vari } } - ns_.skrefs = pl.GetIndexedArrayData(field.tagspathWithLastIndex(), field.index(), offset, length); + ns_.skrefs = pl.GetIndexedArrayData(field.TagspathWithLastIndex(), field.Index(), offset, length); if (offset < 0 || length < 0) { - const auto &path = field.tagspathWithLastIndex(); + const auto &path = field.TagspathWithLastIndex(); std::string indexesStr; for (auto &p : path) { if (p.Index() >= 0) { @@ -545,48 +548,48 @@ void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, Vari } throw Error(errParams, "Requested array's index was not found: [%s]", indexesStr); } - if (field.arrayIndex() != IndexValueType::NotSet && field.arrayIndex() >= length) { - throw Error(errLogic, "Array index is out of range: [%d/%d]", field.arrayIndex(), length); + if (field.ArrayIndex() != IndexValueType::NotSet && field.ArrayIndex() >= length) { + throw Error(errLogic, "Array index is out of range: [%d/%d]", field.ArrayIndex(), length); } if (!ns_.skrefs.empty()) { bool needClearCache{false}; - rollBackIndexData_.IndexChanged(field.index(), index.Opts().IsPK()); + rollBackIndexData_.IndexChanged(field.Index(), index.Opts().IsPK()); index.Delete(ns_.skrefs.front(), itemId, *strHolder, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); } bool needClearCache{false}; - rollBackIndexData_.IndexChanged(field.index(), index.Opts().IsPK()); + rollBackIndexData_.IndexChanged(field.Index(), index.Opts().IsPK()); index.Upsert(ns_.krefs, values, itemId, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); if (isForAllItems) { for (int i = offset, end = offset + length; i < end; ++i) { - pl.Set(field.index(), i, ns_.krefs.front()); + pl.Set(field.Index(), i, ns_.krefs.front()); } - } else if (field.arrayIndex() == IndexValueType::NotSet) { + } else if (field.ArrayIndex() == IndexValueType::NotSet) { // Array may be resized VariantArray v; - pl.Get(field.index(), v); + pl.Get(field.Index(), v); v.erase(v.begin() + offset, v.begin() + offset + length); v.insert(v.begin() + offset, ns_.krefs.begin(), ns_.krefs.end()); - pl.Set(field.index(), v); + pl.Set(field.Index(), v); } else { // Exactly one value was changed - pl.Set(field.index(), offset, ns_.krefs.front()); + pl.Set(field.Index(), offset, ns_.krefs.front()); } } else { if (index.Opts().IsSparse()) { - pl.GetByJsonPath(field.tagspathWithLastIndex(), ns_.skrefs, index.KeyType()); + pl.GetByJsonPath(field.TagspathWithLastIndex(), ns_.skrefs, index.KeyType()); } else { - pl.Get(field.index(), ns_.skrefs, Variant::hold_t{}); + pl.Get(field.Index(), ns_.skrefs, Variant::hold_t{}); } // Required when updating index array field with several tagpaths VariantArray concatValues; int offset = -1, length = -1; - pl.GetIndexedArrayData(field.tagspathWithLastIndex(), field.index(), offset, length); + pl.GetIndexedArrayData(field.TagspathWithLastIndex(), field.Index(), offset, length); const bool kConcatIndexValues = index.Opts().IsArray() && !updateArrayPart && (length < int(ns_.skrefs.size())); // (length < int(ns_.skrefs.size()) - condition to avoid coping @@ -604,17 +607,17 @@ void ItemModifier::modifyIndexValues(IdType itemId, const FieldData &field, Vari if (!ns_.skrefs.empty()) { bool needClearCache{false}; - rollBackIndexData_.IndexChanged(field.index(), index.Opts().IsPK()); + rollBackIndexData_.IndexChanged(field.Index(), index.Opts().IsPK()); index.Delete(ns_.skrefs, itemId, *strHolder, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); } bool needClearCache{false}; - rollBackIndexData_.IndexChanged(field.index(), index.Opts().IsPK()); + rollBackIndexData_.IndexChanged(field.Index(), index.Opts().IsPK()); index.Upsert(ns_.krefs, kConcatIndexValues ? concatValues : values, itemId, needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); if (!index.Opts().IsSparse()) { - pl.Set(field.index(), ns_.krefs); + pl.Set(field.Index(), ns_.krefs); } } } diff --git a/cpp_src/core/itemmodifier.h b/cpp_src/core/itemmodifier.h index 2682c059b..ea14afbd3 100644 --- a/cpp_src/core/itemmodifier.h +++ b/cpp_src/core/itemmodifier.h @@ -22,20 +22,23 @@ class ItemModifier { PayloadValue &GetPayloadValueBackup() { return rollBackIndexData_.GetPayloadValueBackup(); } private: - struct FieldData { - FieldData(const UpdateEntry &entry, NamespaceImpl &ns); - void updateTagsPath(TagsMatcher &tm, const IndexExpressionEvaluator &ev); - const UpdateEntry &details() const noexcept { return entry_; } - const IndexedTagsPath &tagspath() const noexcept { return tagsPath_; } - const IndexedTagsPath &tagspathWithLastIndex() const noexcept { + using CompositeFlags = h_vector; + class FieldData { + public: + FieldData(const UpdateEntry &entry, NamespaceImpl &ns, CompositeFlags &affectedComposites); + const UpdateEntry &Details() const noexcept { return entry_; } + const IndexedTagsPath &Tagspath() const noexcept { return tagsPath_; } + const IndexedTagsPath &TagspathWithLastIndex() const noexcept { return tagsPathWithLastIndex_ ? *tagsPathWithLastIndex_ : tagsPath_; } - int arrayIndex() const noexcept { return arrayIndex_; } - int index() const noexcept { return fieldIndex_; } - bool isIndex() const noexcept { return isIndex_; } - const std::string &name() const noexcept; + int ArrayIndex() const noexcept { return arrayIndex_; } + int Index() const noexcept { return fieldIndex_; } + bool IsIndex() const noexcept { return isIndex_; } + std::string_view Name() const noexcept; private: + void appendAffectedIndexes(const NamespaceImpl &ns, CompositeFlags &affectedComposites) const; + const UpdateEntry &entry_; IndexedTagsPath tagsPath_; std::optional tagsPathWithLastIndex_; @@ -66,9 +69,8 @@ class ItemModifier { void modifyCJSON(IdType itemId, FieldData &field, VariantArray &values); void modifyIndexValues(IdType itemId, const FieldData &field, VariantArray &values, Payload &pl); - void deleteDataFromComposite(IdType itemId, FieldData &field, h_vector &needUpdateCompIndexes); - void insertItemIntoCompositeIndexes(IdType itemId, int firstCompositePos, int totalIndexes, - const h_vector &needUpdateCompIndexes); + void deleteItemFromComposite(IdType itemId); + void insertItemIntoComposite(IdType itemId); NamespaceImpl &ns_; const std::vector &updateEntries_; @@ -110,6 +112,7 @@ class ItemModifier { }; IndexRollBack rollBackIndexData_; + CompositeFlags affectedComposites_; }; } // namespace reindexer diff --git a/cpp_src/core/key_value_type.cc b/cpp_src/core/key_value_type.cc new file mode 100644 index 000000000..587c9e9a3 --- /dev/null +++ b/cpp_src/core/key_value_type.cc @@ -0,0 +1,41 @@ +#include "key_value_type.h" + +namespace reindexer { + +std::string_view KeyValueType::Name() const noexcept { + using namespace std::string_view_literals; + switch (value_) { + case KVT::Int64: + return "int64"sv; + case KVT::Double: + return "double"sv; + case KVT::String: + return "string"sv; + case KVT::Bool: + return "bool"sv; + case KVT::Null: + return "null"sv; + case KVT::Int: + return "int"sv; + case KVT::Undefined: + return "undefined"sv; + case KVT::Composite: + return "composite"sv; + case KVT::Tuple: + return "tuple"sv; + case KVT::Uuid: + return "uuid"sv; + } + assertrx(0); + std::abort(); +} + +template +[[noreturn]] void throwKVTExceptionImpl(std::string_view msg, const T& v) { + throw Error(errParams, fmt::format("{}: '{}'", msg, v)); +} +void KeyValueType::throwKVTException(std::string_view msg, std::string_view v) { throwKVTExceptionImpl(msg, v); } +void KeyValueType::throwKVTException(std::string_view msg, int v) { throwKVTExceptionImpl(msg, v); } +void KeyValueType::throwKVTException(std::string_view msg, TagType t) { throwKVTExceptionImpl(msg, TagTypeToStr(t)); } + +} // namespace reindexer diff --git a/cpp_src/core/key_value_type.h b/cpp_src/core/key_value_type.h index 796959fcf..6c8dfc056 100644 --- a/cpp_src/core/key_value_type.h +++ b/cpp_src/core/key_value_type.h @@ -100,7 +100,7 @@ class KeyValueType { case static_cast(KVT::Uuid): return KeyValueType{static_cast(n)}; default: - throw Error(errParams, "Invalid int value for KeyValueType: " + std::to_string(n)); + throwKVTException("Invalid int value for KeyValueType", n); } } [[nodiscard]] RX_ALWAYS_INLINE int toNumber() const noexcept { return static_cast(value_); } @@ -145,7 +145,7 @@ class KeyValueType { case TAG_END: break; } - throw Error(errParams, "Invalid tag type value for KeyValueType: " + std::string{TagTypeToStr(t)}); + throwKVTException("Invalid tag type value for KeyValueType", t); } template @@ -229,7 +229,7 @@ class KeyValueType { return v.value_ == value_; } [[nodiscard]] RX_ALWAYS_INLINE bool IsSame(KeyValueType other) const noexcept { return value_ == other.value_; } - [[nodiscard]] RX_ALWAYS_INLINE TagType ToTagType() const noexcept { + [[nodiscard]] RX_ALWAYS_INLINE TagType ToTagType() const { switch (value_) { case KVT::Int64: case KVT::Int: @@ -241,16 +241,15 @@ class KeyValueType { case KVT::Bool: return TAG_BOOL; case KVT::Null: + case KVT::Undefined: return TAG_NULL; case KVT::Uuid: return TAG_UUID; - case KVT::Undefined: case KVT::Composite: case KVT::Tuple: break; } - assertrx(0); - std::abort(); + throwKVTException("Unexpected value type", Name()); } [[nodiscard]] RX_ALWAYS_INLINE bool IsNumeric() const noexcept { switch (value_) { @@ -270,36 +269,15 @@ class KeyValueType { assertrx(0); std::abort(); } - [[nodiscard]] std::string_view Name() const noexcept { - using namespace std::string_view_literals; - switch (value_) { - case KVT::Int64: - return "int64"sv; - case KVT::Double: - return "double"sv; - case KVT::String: - return "string"sv; - case KVT::Bool: - return "bool"sv; - case KVT::Null: - return "null"sv; - case KVT::Int: - return "int"sv; - case KVT::Undefined: - return "undefined"sv; - case KVT::Composite: - return "composite"sv; - case KVT::Tuple: - return "tuple"sv; - case KVT::Uuid: - return "uuid"sv; - } - assertrx(0); - std::abort(); - } + [[nodiscard]] std::string_view Name() const noexcept; template static KeyValueType From(); + +private: + [[noreturn]] static void throwKVTException(std::string_view msg, std::string_view param); + [[noreturn]] static void throwKVTException(std::string_view msg, TagType); + [[noreturn]] static void throwKVTException(std::string_view msg, int); }; class key_string; diff --git a/cpp_src/core/keyvalue/p_string.h b/cpp_src/core/keyvalue/p_string.h index b6e128010..d4c76bebe 100644 --- a/cpp_src/core/keyvalue/p_string.h +++ b/cpp_src/core/keyvalue/p_string.h @@ -1,6 +1,7 @@ #pragma once #include +#include "estl/span.h" #include "key_string.h" #include "tools/customhash.h" #include "tools/jsonstring.h" @@ -176,6 +177,17 @@ struct p_string { uint64_t v; }; +inline span giftStr(p_string s) noexcept { +#ifndef _GLIBCXX_USE_CXX11_ABI + if (s.type() == p_string::tagCxxstr) { + // Trying to avoid COW-string problems + auto strPtr = s.getCxxstr(); + return span(const_cast(strPtr)->data(), strPtr->size()); + } +#endif // _GLIBCXX_USE_CXX11_ABI + return span(const_cast(s.data()), s.size()); +} + } // namespace reindexer namespace std { template <> diff --git a/cpp_src/core/keyvalue/variant.cc b/cpp_src/core/keyvalue/variant.cc index ceaf968eb..81616bbc0 100644 --- a/cpp_src/core/keyvalue/variant.cc +++ b/cpp_src/core/keyvalue/variant.cc @@ -258,7 +258,7 @@ T parseAs(std::string_view str) { template <> int Variant::As() const { if (isUuid()) { - throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}.data()); + throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}); } return variant_.type.EvaluateOneOf( [&](KeyValueType::Bool) noexcept -> int { return variant_.value_bool; }, @@ -269,7 +269,7 @@ int Variant::As() const { [this](OneOf) -> int { throw Error(errParams, "Can't convert '%s'-value to number", Type().Name()); }, - [&](KeyValueType::Uuid) -> int { throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}.data()); }); + [&](KeyValueType::Uuid) -> int { throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}); }); } static std::optional tryConvertToBool(const p_string &str) { @@ -297,7 +297,7 @@ template <> bool Variant::As() const { using namespace std::string_view_literals; if (isUuid()) { - throw Error(errParams, "Can't convert '%s' to bool", std::string{Uuid{*this}}.data()); + throw Error(errParams, "Can't convert '%s' to bool", std::string{Uuid{*this}}); } return variant_.type.EvaluateOneOf( [&](KeyValueType::Bool) noexcept { return variant_.value_bool; }, @@ -316,13 +316,13 @@ bool Variant::As() const { [this](OneOf) -> bool { throw Error(errParams, "Can't convert '%s'-value to bool", Type().Name()); }, - [&](KeyValueType::Uuid) -> bool { throw Error(errParams, "Can't convert '%s' to bool", std::string{Uuid{*this}}.data()); }); + [&](KeyValueType::Uuid) -> bool { throw Error(errParams, "Can't convert '%s' to bool", std::string{Uuid{*this}}); }); } template <> int64_t Variant::As() const { if (isUuid()) { - throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}.data()); + throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}); } return variant_.type.EvaluateOneOf( [&](KeyValueType::Bool) noexcept -> int64_t { return variant_.value_bool; }, @@ -333,13 +333,13 @@ int64_t Variant::As() const { [this](OneOf) -> int64_t { throw Error(errParams, "Can't convert '%s'-value to number", Type().Name()); }, - [&](KeyValueType::Uuid) -> int64_t { throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}.data()); }); + [&](KeyValueType::Uuid) -> int64_t { throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}); }); } template <> double Variant::As() const { if (isUuid()) { - throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}.data()); + throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}); } return variant_.type.EvaluateOneOf( [&](KeyValueType::Bool) noexcept -> double { return variant_.value_bool; }, @@ -350,7 +350,7 @@ double Variant::As() const { [this](OneOf) -> double { throw Error(errParams, "Can't convert '%s'-value to number", Type().Name()); }, - [&](KeyValueType::Uuid) -> double { throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}.data()); }); + [&](KeyValueType::Uuid) -> double { throw Error(errParams, "Can't convert '%s' to number", std::string{Uuid{*this}}); }); } template diff --git a/cpp_src/core/namespace/itemsloader.cc b/cpp_src/core/namespace/itemsloader.cc index 90efb611c..ec97275b9 100644 --- a/cpp_src/core/namespace/itemsloader.cc +++ b/cpp_src/core/namespace/itemsloader.cc @@ -120,7 +120,6 @@ void ItemsLoader::reading() { lck.lock(); const bool wasEmpty = items_.HasNoWrittenItems(); items_.WritePlaced(); - lck.unlock(); if (wasEmpty) { cv_.notify_all(); @@ -276,7 +275,7 @@ void IndexInserters::Stop() { if (threads_.size()) { std::lock_guard lck(mtx_); shared_.terminate = true; - cv_.notify_all(); + cvReady_.notify_all(); } for (auto &th : threads_) { th.join(); @@ -285,62 +284,47 @@ void IndexInserters::Stop() { } void IndexInserters::AwaitIndexesBuild() { - if (readyThreads_.load(std::memory_order_acquire) != threads_.size()) { - std::unique_lock lck(mtx_); - cv_.wait(lck, [this] { return readyThreads_.load(std::memory_order_acquire) == threads_.size(); }); - if (!status_.ok()) { - throw status_; - } - assertrx(shared_.threadsWithNewData.empty()); + std::unique_lock lck(mtx_); + cvDone_.wait(lck, [this] { return readyThreads_ == threads_.size(); }); + if (!status_.ok()) { + throw status_; } } void IndexInserters::BuildSimpleIndexesAsync(unsigned startId, span newItems, span nsItems) { - { - std::lock_guard lck(mtx_); - shared_.newItems = newItems; - shared_.nsItems = nsItems; - shared_.startId = startId; - assertrx(shared_.threadsWithNewData.empty()); - for (unsigned tid = 0; tid < threads_.size(); ++tid) { - shared_.threadsWithNewData.emplace_back(tid + kTIDOffset); - } - shared_.composite = false; - readyThreads_.store(0, std::memory_order_relaxed); - } - cv_.notify_all(); + std::lock_guard lck(mtx_); + shared_.newItems = newItems; + shared_.nsItems = nsItems; + shared_.startId = startId; + shared_.composite = false; + readyThreads_ = 0; + ++iteration_; + cvReady_.notify_all(); } void IndexInserters::BuildCompositeIndexesAsync() { - { - std::lock_guard lck(mtx_); - assertrx(shared_.threadsWithNewData.empty()); - for (unsigned tid = 0; tid < threads_.size(); ++tid) { - shared_.threadsWithNewData.emplace_back(tid + kTIDOffset); - } - shared_.composite = true; - readyThreads_.store(0, std::memory_order_relaxed); - } - cv_.notify_all(); + std::lock_guard lck(mtx_); + shared_.composite = true; + readyThreads_ = 0; + ++iteration_; + cvReady_.notify_all(); } void IndexInserters::insertionLoop(unsigned threadId) noexcept { VariantArray krefs, skrefs; const unsigned firstCompositeIndex = indexes_.firstCompositePos(); const unsigned totalIndexes = indexes_.totalSize(); + unsigned thisLoopIteration{0}; while (true) { try { std::unique_lock lck(mtx_); - cv_.wait(lck, [this, threadId] { - return shared_.terminate || std::find(shared_.threadsWithNewData.begin(), shared_.threadsWithNewData.end(), threadId) != - shared_.threadsWithNewData.end(); - }); + cvReady_.wait(lck, [this, thisLoopIteration] { return shared_.terminate || iteration_ > thisLoopIteration; }); if (shared_.terminate) { return; } - shared_.threadsWithNewData.erase(std::find(shared_.threadsWithNewData.begin(), shared_.threadsWithNewData.end(), threadId)); lck.unlock(); + ++thisLoopIteration; const unsigned startId = shared_.startId; const unsigned threadsCnt = threads_.size(); @@ -348,7 +332,7 @@ void IndexInserters::insertionLoop(unsigned threadId) noexcept { if (shared_.composite) { for (unsigned i = 0; i < shared_.newItems.size(); ++i) { const auto id = startId + i; - auto &plData = shared_.nsItems[i]; + const auto &plData = shared_.nsItems[i]; for (unsigned field = firstCompositeIndex + threadId - kTIDOffset; field < totalIndexes; field += threadsCnt) { bool needClearCache{false}; indexes_[field]->Upsert(Variant{plData}, id, needClearCache); @@ -362,7 +346,7 @@ void IndexInserters::insertionLoop(unsigned threadId) noexcept { auto &plData = shared_.nsItems[i]; Payload pl(pt_, plData); Payload plNew = item.GetPayload(); - for (unsigned field = threadId; field < firstCompositeIndex; field += threadsCnt) { + for (unsigned field = threadId - kTIDOffset + 1; field < firstCompositeIndex; field += threadsCnt) { ItemsLoader::doInsertField(indexes_, field, id, pl, plNew, krefs, skrefs, plArrayMtxs_[id % plArrayMtxs_.size()]); } @@ -375,7 +359,7 @@ void IndexInserters::insertionLoop(unsigned threadId) noexcept { auto &plData = shared_.nsItems[i]; Payload pl(pt_, plData); Payload plNew = item.GetPayload(); - for (unsigned field = threadId; field < firstCompositeIndex; field += threadsCnt) { + for (unsigned field = threadId - kTIDOffset + 1; field < firstCompositeIndex; field += threadsCnt) { ItemsLoader::doInsertField(indexes_, field, id, pl, plNew, krefs, skrefs, dummyMtx); } } diff --git a/cpp_src/core/namespace/itemsloader.h b/cpp_src/core/namespace/itemsloader.h index 3d39c27c8..f7bf79cf3 100644 --- a/cpp_src/core/namespace/itemsloader.h +++ b/cpp_src/core/namespace/itemsloader.h @@ -125,32 +125,33 @@ class IndexInserters { span newItems; span nsItems; unsigned startId = 0; - h_vector threadsWithNewData; bool terminate = false; bool composite = false; }; void insertionLoop(unsigned threadId) noexcept; void onItemsHandled() noexcept { - if ((readyThreads_.fetch_add(1, std::memory_order_acq_rel) + 1) == threads_.size()) { - std::lock_guard lck(mtx_); - cv_.notify_all(); + std::lock_guard lck(mtx_); + if (++readyThreads_ == threads_.size()) { + cvDone_.notify_one(); } } void onException(Error e) { std::lock_guard lck(mtx_); status_ = std::move(e); - if ((readyThreads_.fetch_add(1, std::memory_order_acq_rel) + 1) == threads_.size()) { - cv_.notify_all(); + if (++readyThreads_ == threads_.size()) { + cvDone_.notify_one(); } } std::mutex mtx_; - std::condition_variable cv_; + std::condition_variable cvReady_; + std::condition_variable cvDone_; + unsigned iteration_{0}; NamespaceImpl::IndexesStorage& indexes_; const PayloadType pt_; SharedData shared_; - std::atomic readyThreads_ = {0}; + unsigned readyThreads_ = {0}; std::vector threads_; Error status_; bool hasArrayIndexes_ = false; diff --git a/cpp_src/core/namespace/namespace.cc b/cpp_src/core/namespace/namespace.cc index 33d4eba88..379716585 100644 --- a/cpp_src/core/namespace/namespace.cc +++ b/cpp_src/core/namespace/namespace.cc @@ -23,7 +23,9 @@ void Namespace::CommitTransaction(Transaction& tx, QueryResults& result, const R auto lck = statCalculator.CreateLock(clonerMtx_, ctx); nsl = ns_; - if (needNamespaceCopy(nsl, tx)) { + if (needNamespaceCopy(nsl, tx) && + (tx.GetSteps().size() >= static_cast(txSizeToAlwaysCopy_.load(std::memory_order_relaxed)) || + isExpectingSelectsOnNamespace(nsl, ctx))) { PerfStatCalculatorMT nsCopyCalc(copyStatsCounter_, enablePerfCounters); calc.SetCounter(nsl->updatePerfCounter_); calc.LockHit(); @@ -101,6 +103,22 @@ bool Namespace::needNamespaceCopy(const NamespaceImpl::Ptr& ns, const Transactio (stepsCount >= txSizeToAlwaysCopy); } +bool Namespace::isExpectingSelectsOnNamespace(const NamespaceImpl::Ptr& ns, const RdxContext& ctx) { + // Some kind of heuristic: if there were no selects on this namespace yet and no one awaits read lock for it, probably we do not have to + // copy it. Improves scenarios, when user wants to fill namespace before any selections. + // It would be more optimal to acquire lock here and pass it further to the transaction, but this case is rare, so trying to not make it + // complicated. + if (ns->hadSelects() || !ns->isNotLocked(ctx)) { + return true; + } + std::this_thread::yield(); + if (!ns->hadSelects()) { + const bool enableTxHeuristic = !std::getenv("REINDEXER_NOTXHEURISTIC"); + return enableTxHeuristic; + } + return false; +} + void Namespace::doRename(const Namespace::Ptr& dst, const std::string& newName, const std::string& storagePath, const RdxContext& ctx) { std::string dbpath; const auto flushOpts = StorageFlushOpts().WithImmediateReopen(); diff --git a/cpp_src/core/namespace/namespace.h b/cpp_src/core/namespace/namespace.h index 267b0edac..c7e4eeb90 100644 --- a/cpp_src/core/namespace/namespace.h +++ b/cpp_src/core/namespace/namespace.h @@ -235,6 +235,7 @@ class Namespace { private: bool needNamespaceCopy(const NamespaceImpl::Ptr &ns, const Transaction &tx) const noexcept; + bool isExpectingSelectsOnNamespace(const NamespaceImpl::Ptr &ns, const RdxContext &ctx); void doRename(const Namespace::Ptr &dst, const std::string &newName, const std::string &storagePath, const RdxContext &ctx); NamespaceImpl::Ptr atomicLoadMainNs() const { std::lock_guard lck(nsPtrSpinlock_); diff --git a/cpp_src/core/namespace/namespaceimpl.cc b/cpp_src/core/namespace/namespaceimpl.cc index 0e60b57a8..e71e2f1a3 100644 --- a/cpp_src/core/namespace/namespaceimpl.cc +++ b/cpp_src/core/namespace/namespaceimpl.cc @@ -3,6 +3,7 @@ #include #include #include "core/cjson/cjsondecoder.h" +#include "core/cjson/defaultvaluecoder.h" #include "core/cjson/jsonbuilder.h" #include "core/cjson/uuid_recoders.h" #include "core/index/index.h" @@ -45,7 +46,7 @@ constexpr int kWALStatementItemsThreshold = 5; namespace reindexer { -std::atomic rxAllowNamespaceLeak = {false}; +std::atomic_bool rxAllowNamespaceLeak = {false}; constexpr int64_t kStorageSerialInitial = 1; constexpr uint8_t kSysRecordsBackupCount = 8; @@ -99,7 +100,7 @@ NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& dbDestroyed_(false) { for (auto& idxIt : src.indexes_) indexes_.push_back(idxIt->Clone()); - markUpdated(true); + markUpdated(IndexOptimization::Full); logPrintf(LogInfo, "Namespace::CopyContentsFrom (%s).Workers: %d, timeout: %d, tm: { state_token: 0x%08X, version: %d }", name_, config_.optimizationSortWorkers, config_.optimizationTimeout, tagsMatcher_.stateToken(), tagsMatcher_.version()); } @@ -132,7 +133,6 @@ NamespaceImpl::NamespaceImpl(const std::string& name, UpdatesObservers& observer // Add index and payload field for tuple of non indexed fields IndexDef tupleIndexDef(kTupleName, {}, IndexStrStore, IndexOpts()); addIndex(tupleIndexDef); - updateSelectTime(); logPrintf(LogInfo, "Namespace::Construct (%s).Workers: %d, timeout: %d, tm: { state_token: 0x%08X, version: %d }", name_, config_.optimizationSortWorkers, config_.optimizationTimeout, tagsMatcher_.stateToken(), tagsMatcher_.version()); @@ -470,6 +470,28 @@ class NamespaceImpl::RollBack_updateItems final : private RollBackBase { std::unique_ptr tuple_; }; +std::vector NamespaceImpl::pickJsonPath(const PayloadFieldType& fld) { + const auto& paths = fld.JsonPaths(); + if (fld.IsArray()) { + std::vector result; + result.reserve(paths.size()); + for (const auto& path : paths) { + auto tags = tagsMatcher_.path2tag(path, false); + result.push_back(std::move(tags)); + // first without nested path - always (any, now last one found) + if ((result.size() > 1) && (result.back().size() == 1)) { + std::swap(result.front(), result.back()); + } + } + + return result; + } + + assertrx_throw(paths.size() == 1); + auto tags = tagsMatcher_.path2tag(paths.front(), false); + return {std::move(tags)}; +} + template <> class NamespaceImpl::RollBack_updateItems { public: @@ -486,64 +508,76 @@ class NamespaceImpl::RollBack_updateItems { RollBack_updateItems& operator=(RollBack_updateItems&&) = delete; }; -template -NamespaceImpl::RollBack_updateItems NamespaceImpl::updateItems(const PayloadType& oldPlType, const FieldsSet& changedFields, - int deltaFields) { - logPrintf(LogTrace, "Namespace::updateItems(%s) delta=%d", name_, deltaFields); +template +NamespaceImpl::RollBack_updateItems NamespaceImpl::updateItems(const PayloadType& oldPlType, int changedField) { + logPrintf(LogTrace, "Namespace::updateItems(%s) changeType=%s", name_, fieldChangeType == FieldChangeType::Add ? "Add" : "Delete"); - assertrx(oldPlType->NumFields() + deltaFields == payloadType_->NumFields()); + assertrx(oldPlType->NumFields() + int(fieldChangeType) == payloadType_->NumFields()); - const int compositeStartIdx = - (deltaFields >= 0) ? indexes_.firstCompositePos() : indexes_.firstCompositePos(oldPlType, sparseIndexesCount_); + const int compositeStartIdx = (fieldChangeType == FieldChangeType::Add) ? indexes_.firstCompositePos() + : indexes_.firstCompositePos(oldPlType, sparseIndexesCount_); const int compositeEndIdx = indexes_.totalSize(); - // All the composite indexes must be recreated, because those indexes are holding pointers to the old Payloads + // all composite indexes must be recreated, because those indexes are holding pointers to old Payloads RollBack_updateItems rollbacker{*this, recreateCompositeIndexes(compositeStartIdx, compositeEndIdx), repl_.dataHash, itemsDataSize_}; - for (auto& idx : indexes_) { idx->UpdatePayloadType(PayloadType{payloadType_}); } - VariantArray skrefsDel, skrefsUps; - ItemImpl newItem(payloadType_, tagsMatcher_); - newItem.Unsafe(true); - int errCount = 0; - Error lastErr = errOK; - repl_.dataHash = 0; - itemsDataSize_ = 0; - auto indexesCacheCleaner{GetIndexesCacheCleaner()}; + // no items, work done, stop processing + if (items_.empty()) { + return rollbacker; + } + std::unique_ptr recoder; - if (deltaFields < 0) { - assertrx(deltaFields == -1); - for (auto fieldIdx : changedFields) { - const auto& fld = oldPlType.Field(fieldIdx); - if (fieldIdx != 0 && fld.Type().Is()) { - const auto& jsonPaths = fld.JsonPaths(); - assertrx(jsonPaths.size() == 1); - if (fld.IsArray()) { - recoder.reset(new RecoderUuidToString{tagsMatcher_.path2tag(jsonPaths[0])}); - } else { - recoder.reset(new RecoderUuidToString{tagsMatcher_.path2tag(jsonPaths[0])}); - } + if constexpr (fieldChangeType == FieldChangeType::Delete) { + assertrx_throw(changedField > 0); + const auto& fld = oldPlType.Field(changedField); + if (fld.Type().Is()) { + const auto& jsonPaths = fld.JsonPaths(); + assertrx(jsonPaths.size() == 1); + const auto tags = tagsMatcher_.path2tag(jsonPaths[0]); + if (fld.IsArray()) { + recoder = std::make_unique>(tags); + } else { + recoder = std::make_unique>(tags); } } - } else if (deltaFields > 0) { - assertrx(deltaFields == 1); - for (auto fieldIdx : changedFields) { - const auto& fld = payloadType_.Field(fieldIdx); - if (fieldIdx != 0 && fld.Type().Is()) { - if (fld.IsArray()) { - recoder.reset(new RecoderStringToUuidArray{fieldIdx}); - } else { - recoder.reset(new RecoderStringToUuid{fieldIdx}); + + } else { + static_assert(fieldChangeType == FieldChangeType::Add); + assertrx_throw(changedField > 0); + const auto& fld = payloadType_.Field(changedField); + if (fld.Type().Is()) { + if (fld.IsArray()) { + recoder = std::make_unique(changedField); + } else { + recoder = std::make_unique(changedField); + } + } else { + const auto& indexToUpdate = indexes_[changedField]; + if (!IsComposite(indexToUpdate->Type()) && !indexToUpdate->Opts().IsSparse()) { + auto tagsNames = pickJsonPath(fld); + if (!tagsNames.empty()) { + recoder = std::make_unique(name_, fld, std::move(tagsNames), changedField); } } } } - if (!items_.empty()) { - rollbacker.SaveTuple(); - } - for (size_t rowId = 0; rowId < items_.size(); rowId++) { + rollbacker.SaveTuple(); + + VariantArray skrefsDel, skrefsUps; + ItemImpl newItem(payloadType_, tagsMatcher_); + newItem.Unsafe(true); + repl_.dataHash = 0; + itemsDataSize_ = 0; + auto indexesCacheCleaner{GetIndexesCacheCleaner()}; + + auto &tuple = *indexes_[0]; + auto &index = *indexes_[changedField]; + + WrSerializer pk, data; + for (size_t rowId = 0; rowId < items_.size(); ++rowId) { if (items_[rowId].IsFree()) { continue; } @@ -551,46 +585,73 @@ NamespaceImpl::RollBack_updateItems NamespaceImpl::updateItems(con Payload oldValue(oldPlType, plCurr); ItemImpl oldItem(oldPlType, plCurr, tagsMatcher_); oldItem.Unsafe(true); - newItem.FromCJSON(&oldItem, recoder.get()); + newItem.FromCJSON(oldItem, recoder.get()); + const bool itemTupleUpdated = recoder && recoder->Reset(); - PayloadValue plNew = oldValue.CopyTo(payloadType_, deltaFields >= 0); + PayloadValue plNew = oldValue.CopyTo(payloadType_, fieldChangeType == FieldChangeType::Add); plNew.SetLSN(plCurr.GetLSN()); - Payload newValue(payloadType_, plNew); - for (auto fieldIdx : changedFields) { - auto& index = *indexes_[fieldIdx]; - if ((fieldIdx == 0) || deltaFields <= 0) { - oldValue.Get(fieldIdx, skrefsDel, Variant::hold_t{}); - bool needClearCache{false}; - index.Delete(skrefsDel, rowId, *strHolder_, needClearCache); - if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); - } + // update tuple + oldValue.Get(0, skrefsDel, Variant::hold_t{}); + bool needClearCache{false}; + tuple.Delete(skrefsDel, rowId, *strHolder_, needClearCache); + newItem.GetPayload().Get(0, skrefsUps); + krefs.resize(0); + tuple.Upsert(krefs, skrefsUps, rowId, needClearCache); + if (needClearCache && tuple.IsOrdered()) { + indexesCacheCleaner.Add(tuple.SortId()); + } - if ((fieldIdx == 0) || deltaFields >= 0) { - newItem.GetPayload().Get(fieldIdx, skrefsUps); - krefs.resize(0); - bool needClearCache{false}; - index.Upsert(krefs, skrefsUps, rowId, needClearCache); - if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); - newValue.Set(fieldIdx, krefs); + // update index + Payload newValue(payloadType_, plNew); + newValue.Set(0, krefs); + + if constexpr (fieldChangeType == FieldChangeType::Delete) { + oldValue.Get(changedField, skrefsDel, Variant::hold_t{}); + needClearCache = false; + index.Delete(skrefsDel, rowId, *strHolder_, needClearCache); + if (needClearCache && index.IsOrdered()) { + indexesCacheCleaner.Add(index.SortId()); + } + } else { + static_assert(fieldChangeType == FieldChangeType::Add); + newItem.GetPayload().Get(changedField, skrefsUps); + krefs.resize(0); + needClearCache = false; + index.Upsert(krefs, skrefsUps, rowId, needClearCache); + if (needClearCache && index.IsOrdered()) { + indexesCacheCleaner.Add(index.SortId()); } + newValue.Set(changedField, krefs); } for (int fieldIdx = compositeStartIdx; fieldIdx < compositeEndIdx; ++fieldIdx) { - bool needClearCache{false}; - indexes_[fieldIdx]->Upsert(Variant(plNew), rowId, needClearCache); - if (needClearCache && indexes_[fieldIdx]->IsOrdered()) indexesCacheCleaner.Add(indexes_[fieldIdx]->SortId()); + needClearCache = false; + auto &fieldIndex = *indexes_[fieldIdx]; + fieldIndex.Upsert(Variant(plNew), rowId, needClearCache); + if (needClearCache && fieldIndex.IsOrdered()) { + indexesCacheCleaner.Add(fieldIndex.SortId()); + } } rollbacker.SaveItem(rowId, std::move(plCurr)); plCurr = std::move(plNew); repl_.dataHash ^= Payload(payloadType_, plCurr).GetHash(); itemsDataSize_ += plCurr.GetCapacity() + sizeof(PayloadValue::dataHeader); + + // update data in storage + if (itemTupleUpdated && storage_.IsValid()) { + pk.Reset(); + data.Reset(); + pk << kRxStorageItemPrefix; + Payload(payloadType_, plCurr).SerializeFields(pk, pkFields()); + data.PutUInt64(plCurr.GetLSN()); + newItem.GetCJSON(data); + storage_.Write(pk.Slice(), data.Slice()); + } } - markUpdated(false); - if (errCount != 0) { - logPrintf(LogError, "Can't update indexes of %d items in namespace %s: %s", errCount, name_, lastErr.what()); - } + + markUpdated(IndexOptimization::Partial); return rollbacker; } @@ -776,8 +837,7 @@ void NamespaceImpl::dropIndex(const IndexDef& index) { PayloadType oldPlType = payloadType_; payloadType_.Drop(index.name_); tagsMatcher_.UpdatePayloadType(payloadType_); - FieldsSet changedFields{0, fieldIdx}; - auto rollbacker{updateItems(oldPlType, changedFields, -1)}; + auto rollbacker{updateItems(oldPlType, fieldIdx)}; rollbacker.Disable(); } @@ -1038,7 +1098,7 @@ class NamespaceImpl::RollBack_addIndex final : private RollBackBase { } void NeedDecreaseSparseIndexCount() noexcept { needDecreaseSparseIndexCount_ = true; } void SetOldPayloadType(PayloadType&& oldPt) noexcept { oldPayloadType_.emplace(std::move(oldPt)); } - const PayloadType& GetOldPayloadType() const noexcept { + [[nodiscard]] const PayloadType& GetOldPayloadType() const noexcept { // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return *oldPayloadType_; } @@ -1118,9 +1178,8 @@ bool NamespaceImpl::addIndex(const IndexDef& indexDef) { newIndex->SetFields(FieldsSet(idxNo)); newIndex->UpdatePayloadType(PayloadType{payloadType_}); - FieldsSet changedFields{0, idxNo}; rollbacker.RollBacker_insertIndex(insertIndex(std::move(newIndex), idxNo, indexName)); - rollbacker.RollBacker_updateItems(updateItems(rollbacker.GetOldPayloadType(), changedFields, 1)); + rollbacker.RollBacker_updateItems(updateItems(rollbacker.GetOldPayloadType(), idxNo)); } updateSortedIdxCount(); rollbacker.Disable(); @@ -1139,7 +1198,7 @@ void NamespaceImpl::fillSparseIndex(Index& index, std::string_view jsonPath) { index.Upsert(krefs, skrefs, int(rowId), needClearCache); if (needClearCache && index.IsOrdered()) indexesCacheCleaner.Add(index.SortId()); } - markUpdated(false); + scheduleIndexOptimization(IndexOptimization::Partial); } bool NamespaceImpl::updateIndex(const IndexDef& indexDef) { @@ -1342,8 +1401,9 @@ void NamespaceImpl::doUpdate(const Query& query, QueryResults& result, const NsC if (withExpressions && updateWithJson) break; } - if (ctx.rdxContext.fromReplication_ && withExpressions) + if (ctx.rdxContext.fromReplication_ && withExpressions) { throw Error(errLogic, "Can't apply update query with expression to slave ns '%s'", name_); + } if (!ctx.inTransaction) { ThrowOnCancel(ctx.rdxContext); @@ -1353,8 +1413,19 @@ void NamespaceImpl::doUpdate(const Query& query, QueryResults& result, const NsC // row-based replication (to preserve data inconsistency), otherwise we update // it via 'WalUpdateQuery' (statement-based replication). If Update statement // contains update of entire object (via JSON) then statement replication is not possible. - bool statementReplication = - (!updateWithJson && !withExpressions && !query.HasLimit() && !query.HasOffset() && (result.Count() >= kWALStatementItemsThreshold)); + // TODO: Statement-based replication is disabled due to logic conflicts in WAL. + // There are 2 buggy situations: + // 1. WAL [ ..., itemRef1, SQLUpdate(updates itemRef1), ... ] - in this case itemRef1 already contains updates data and follower will + // apply SQLUpdate twice. Not all of the possible updates are may be applied twice with the same result (especially updates for multiple + // fields); + // 2. WAL [ ..., itemRef1, empty (was ItemRef2), SQLUpdate(updates itemRef1+itemRef2), ..., SQLDelete(itemRef2) ] - in this case + // itemRef2 has migrated further after some kind of another Update call. When this WAL sequence will be reproduced on follower-node, it + // will cause data missmatch, because SQLUpdate and SQLDelete may depend on each other. So, we have to choose between + // statement-replication for Updates and statement replication for Deletes here. + // issue #1771 + // bool statementReplication = + // (!updateWithJson && !withExpressions && !query.HasLimit() && !query.HasOffset() && (result.Count() >= kWALStatementItemsThreshold)); + constexpr bool statementReplication = false; AsyncStorage::AdviceGuardT storageAdvice; if (result.Items().size() >= AsyncStorage::kLimitToAdviceBatching) { @@ -1370,7 +1441,7 @@ void NamespaceImpl::doUpdate(const Query& query, QueryResults& result, const NsC const bool isPKModified = itemModifier.Modify(item.Id(), ctx); std::optional modifyData; if (isPKModified) { - statementReplication = false; + // statementReplication = false; modifyData.emplace(itemModifier.GetPayloadValueBackup(), lsn_t(item.Value().GetLSN())); } replicateItem(item.Id(), ctx, statementReplication, oldPlHash, oldItemCapacity, std::move(modifyData)); @@ -1379,18 +1450,19 @@ void NamespaceImpl::doUpdate(const Query& query, QueryResults& result, const NsC result.getTagsMatcher(0) = tagsMatcher_; assertrx(result.IsNamespaceAdded(this)); - if (statementReplication) { - WrSerializer ser; - WALRecord wrec(WalUpdateQuery, query.GetSQL(ser, QueryUpdate).Slice(), ctx.inTransaction); - lsn_t lsn(wal_.Add(wrec), serverId_); - if (!ctx.rdxContext.fromReplication_) repl_.lastSelfLSN = lsn; - for (ItemRef& item : result.Items()) { - item.Value().SetLSN(int64_t(lsn)); - } - if (!repl_.temporary) - observers_->OnWALUpdate(LSNPair(lsn, ctx.rdxContext.fromReplication_ ? ctx.rdxContext.LSNs_.originLSN_ : lsn), name_, wrec); - if (!ctx.rdxContext.fromReplication_) setReplLSNs(LSNPair(lsn_t(), lsn)); - } + // Disabled due to statement base replication logic conflicts + // if (statementReplication) { + // WrSerializer ser; + // WALRecord wrec(WalUpdateQuery, query.GetSQL(ser, QueryUpdate).Slice(), ctx.inTransaction); + // lsn_t lsn(wal_.Add(wrec), serverId_); + // if (!ctx.rdxContext.fromReplication_) repl_.lastSelfLSN = lsn; + // for (ItemRef& item : result.Items()) { + // item.Value().SetLSN(int64_t(lsn)); + // } + // if (!repl_.temporary) + // observers_->OnWALUpdate(LSNPair(lsn, ctx.rdxContext.fromReplication_ ? ctx.rdxContext.LSNs_.originLSN_ : lsn), name_, wrec); + // if (!ctx.rdxContext.fromReplication_) setReplLSNs(LSNPair(lsn_t(), lsn)); + // } if (query.GetDebugLevel() >= LogInfo) { logPrintf(LogInfo, "Updated %d items in %d µs", result.Count(), @@ -1404,7 +1476,7 @@ void NamespaceImpl::replicateItem(IdType itemId, const NsContext& ctx, bool stat Payload pl(payloadType_, pv); auto sendWalUpdate = [this, itemId, &ctx, &pv](ItemModifyMode mode) { - lsn_t lsn(wal_.Add(WALRecord(WalItemUpdate, itemId, ctx.inTransaction), lsn_t()), serverId_); + lsn_t lsn(wal_.Add(WALRecord(WalItemUpdate, itemId, ctx.inTransaction), lsn_t(pv.GetLSN())), serverId_); if (!ctx.rdxContext.fromReplication_) repl_.lastSelfLSN = lsn; pv.SetLSN(int64_t(lsn)); ItemImpl item(payloadType_, pv, tagsMatcher_); @@ -1533,7 +1605,7 @@ void NamespaceImpl::doDelete(IdType id) { free_.resize(0); items_.resize(0); } - markUpdated(true); + markUpdated(IndexOptimization::Full); } void NamespaceImpl::doDelete(const Query& query, QueryResults& result, const NsContext& ctx) { @@ -1627,7 +1699,7 @@ void NamespaceImpl::doTruncate(const NsContext& ctx) { lsn_t lsn(wal_.Add(wrec), serverId_); if (!ctx.rdxContext.fromReplication_) repl_.lastSelfLSN = lsn; - markUpdated(true); + markUpdated(IndexOptimization::Full); if (!repl_.temporary) observers_->OnWALUpdate(LSNPair(lsn, ctx.rdxContext.fromReplication_ ? ctx.rdxContext.LSNs_.originLSN_ : lsn), name_, wrec); if (!ctx.rdxContext.fromReplication_) setReplLSNs(LSNPair(lsn_t(), lsn)); @@ -1715,9 +1787,6 @@ void NamespaceImpl::setReplLSNs(LSNPair LSNs) { repl_.originLSN = LSNs.originLSN_; repl_.lastUpstreamLSN = LSNs.upstreamLSN_; replStateUpdates_.fetch_add(1, std::memory_order_release); - if (!isSystem()) { - logPrintf(LogTrace, "[repl:%s]:%d setReplLSNs originLSN = %s upstreamLSN=%s", name_, serverId_, LSNs.originLSN_, LSNs.upstreamLSN_); - } } void NamespaceImpl::setSlaveMode(const RdxContext& ctx) { @@ -2100,7 +2169,7 @@ void NamespaceImpl::doModifyItem(Item& item, ItemModifyMode mode, const NsContex } if (!ctx.rdxContext.fromReplication_) setReplLSNs(LSNPair(lsn_t(), lsn)); - markUpdated(!exists); + markUpdated(exists ? IndexOptimization::Partial : IndexOptimization::Full); } RX_ALWAYS_INLINE VariantArray NamespaceImpl::getPkKeys(const ConstPayload& cpl, Index* pkIndex, int fieldNum) { @@ -2243,17 +2312,12 @@ void NamespaceImpl::optimizeIndexes(const NsContext& ctx) { } } -void NamespaceImpl::markUpdated(bool forceOptimizeAllIndexes) { +void NamespaceImpl::markUpdated(IndexOptimization requestedOptimization) { using namespace std::string_view_literals; using namespace std::chrono; itemsCount_.store(items_.size(), std::memory_order_relaxed); itemsCapacity_.store(items_.capacity(), std::memory_order_relaxed); - if (forceOptimizeAllIndexes) { - optimizationState_.store(NotOptimized); - } else { - int expected{OptimizationCompleted}; - optimizationState_.compare_exchange_strong(expected, OptimizedPartially); - } + scheduleIndexOptimization(requestedOptimization); clearNamespaceCaches(); lastUpdateTime_.store(duration_cast(system_clock_w::now().time_since_epoch()).count(), std::memory_order_release); if (!nsIsLoading_) { @@ -2261,6 +2325,19 @@ void NamespaceImpl::markUpdated(bool forceOptimizeAllIndexes) { } } +void NamespaceImpl::scheduleIndexOptimization(IndexOptimization requestedOptimization) { + switch (requestedOptimization) { + case IndexOptimization::Full: + optimizationState_.store(NotOptimized); + break; + case IndexOptimization::Partial: { + int expected{OptimizationCompleted}; + optimizationState_.compare_exchange_strong(expected, OptimizedPartially); + break; + } + } +} + template void NamespaceImpl::Select(QueryResults& result, SelectCtxWithJoinPreSelect& params, const RdxContext& ctx) { if (!params.query.IsWALQuery()) { @@ -2754,7 +2831,7 @@ void NamespaceImpl::LoadFromStorage(unsigned threadsCount, const RdxContext& ctx replStateUpdates_.fetch_add(1, std::memory_order_release); } - markUpdated(true); + markUpdated(IndexOptimization::Full); } void NamespaceImpl::initWAL(int64_t minLSN, int64_t maxLSN) { @@ -2983,28 +3060,10 @@ void NamespaceImpl::deleteMeta(const std::string& key, const RdxContext& ctx) { } void NamespaceImpl::warmupFtIndexes() { - h_vector warmupThreads; - h_vector warmupIndexes; for (auto& idx : indexes_) { - if (idx->RequireWarmupOnNsCopy()) { - warmupIndexes.emplace_back(idx.get()); - } - } - auto threadsCnt = config_.optimizationSortWorkers > 0 ? std::min(unsigned(config_.optimizationSortWorkers), warmupIndexes.size()) - : std::min(4u, warmupIndexes.size()); - warmupThreads.resize(threadsCnt); - std::atomic next = {0}; - for (unsigned i = 0; i < warmupThreads.size(); ++i) { - warmupThreads[i] = std::thread([&warmupIndexes, &next] { - unsigned num = next.fetch_add(1); - while (num < warmupIndexes.size()) { - warmupIndexes[num]->CommitFulltext(); - num = next.fetch_add(1); - } - }); - } - for (auto& th : warmupThreads) { - th.join(); + if (idx->IsFulltext()) { + idx->CommitFulltext(); + } } } @@ -3019,7 +3078,7 @@ int NamespaceImpl::getSortedIdxCount() const noexcept { void NamespaceImpl::updateSortedIdxCount() { int sortedIdxCount = getSortedIdxCount(); for (auto& idx : indexes_) idx->SetSortedIdxCount(sortedIdxCount); - markUpdated(true); + scheduleIndexOptimization(IndexOptimization::Full); } IdType NamespaceImpl::createItem(size_t realSize) { diff --git a/cpp_src/core/namespace/namespaceimpl.h b/cpp_src/core/namespace/namespaceimpl.h index db5b5d784..0a1e41a0a 100644 --- a/cpp_src/core/namespace/namespaceimpl.h +++ b/cpp_src/core/namespace/namespaceimpl.h @@ -48,14 +48,10 @@ class QueryPreprocessor; class RdxContext; class RdxActivityContext; class SortExpression; -class QueryResults; namespace long_actions { template struct Logger; - -template -struct QueryEnum2Type; } // namespace long_actions template class> @@ -77,8 +73,8 @@ struct NsContext { } const RdxContext &rdxContext; - bool isCopiedNsRequest = false; - bool inTransaction = false; + bool isCopiedNsRequest{false}; + bool inTransaction{false}; }; namespace composite_substitution_helpers { @@ -92,7 +88,10 @@ enum class StoredValuesOptimizationStatus : int8_t { Enabled }; -class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance.Padding) Padding does not matter for this class +enum class IndexOptimization : int8_t { Partial, Full }; + +class NamespaceImpl final : public intrusive_atomic_rc_base { // NOLINT(*performance.Padding) Padding does not + // matter for this class class RollBack_insertIndex; class RollBack_addIndex; template @@ -117,7 +116,7 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. ~IndexesCacheCleaner(); private: - NamespaceImpl &ns_; + const NamespaceImpl &ns_; std::bitset sorts_; }; @@ -157,17 +156,17 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. private: const NamespaceImpl &ns_; - const int sorted_indexes_; - const IdType curSortId_; + const int sorted_indexes_{0}; + const IdType curSortId_{-1}; std::vector ids2Sorts_; - int64_t ids2SortsMemSize_ = 0; + int64_t ids2SortsMemSize_{0}; }; - class IndexesStorage : public std::vector> { + class IndexesStorage final : public std::vector> { public: using Base = std::vector>; - IndexesStorage(const NamespaceImpl &ns); + explicit IndexesStorage(const NamespaceImpl &ns); IndexesStorage(const IndexesStorage &src) = delete; IndexesStorage &operator=(const IndexesStorage &src) = delete; @@ -189,20 +188,20 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. const NamespaceImpl &ns_; }; - class Items : public std::vector { + class Items final : public std::vector { public: bool exists(IdType id) const { return id < IdType(size()) && !at(id).IsFree(); } }; public: enum OptimizationState : int { NotOptimized, OptimizedPartially, OptimizationCompleted }; - + enum class FieldChangeType { Add = 1, Delete = -1 }; using Ptr = intrusive_ptr; using Mutex = MarkedMutex; NamespaceImpl(const std::string &_name, UpdatesObservers &observers); NamespaceImpl &operator=(const NamespaceImpl &) = delete; - ~NamespaceImpl(); + ~NamespaceImpl() override; std::string GetName(const RdxContext &ctx) const { auto rlck = rLock(ctx); @@ -319,12 +318,13 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. } return lck; } + bool IsNotLocked(const RdxContext &ctx) const { return WLockT(mtx_, std::try_to_lock_t{}, ctx).owns_lock(); } void MarkReadOnly() { readonly_.store(true, std::memory_order_release); } std::atomic_bool &IsReadOnly() { return readonly_; } private: mutable Mutex mtx_; - std::atomic readonly_ = {false}; + std::atomic_bool readonly_{false}; }; struct PKModifyRevertData { @@ -348,7 +348,8 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. void initWAL(int64_t minLSN, int64_t maxLSN); - void markUpdated(bool forceOptimizeAllIndexes); + void markUpdated(IndexOptimization requestedOptimization); + void scheduleIndexOptimization(IndexOptimization requestedOptimization); void doUpdate(const Query &query, QueryResults &result, const NsContext &); void doDelete(const Query &query, QueryResults &result, const NsContext &); void doTruncate(const NsContext &ctx); @@ -357,9 +358,8 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. void doModifyItem(Item &item, ItemModifyMode mode, const NsContext &ctx); void deleteItem(Item &item, const NsContext &ctx); void updateTagsMatcherFromItem(ItemImpl *ritem); - template - [[nodiscard]] RollBack_updateItems updateItems(const PayloadType &oldPlType, const FieldsSet &changedFields, - int deltaFields); + template + [[nodiscard]] RollBack_updateItems updateItems(const PayloadType &oldPlType, int changedField); void fillSparseIndex(Index &, std::string_view jsonPath); void doDelete(IdType id); void optimizeIndexes(const NsContext &); @@ -421,8 +421,10 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. using namespace std::chrono; lastSelectTime_ = duration_cast(system_clock_w::now().time_since_epoch()).count(); } + bool hadSelects() const noexcept { return lastSelectTime_.load(std::memory_order_relaxed) != 0; } void markReadOnly() { locker_.MarkReadOnly(); } Locker::WLockT wLock(const RdxContext &ctx) const { return locker_.WLock(ctx); } + bool isNotLocked(const RdxContext &ctx) const { return locker_.IsNotLocked(ctx); } Locker::RLockT rLock(const RdxContext &ctx) const { return locker_.RLock(ctx); } bool SortOrdersBuilt() const noexcept { return optimizationState_.load(std::memory_order_acquire) == OptimizationCompleted; } @@ -448,7 +450,7 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. std::unordered_map meta_; - int sparseIndexesCount_ = 0; + int sparseIndexesCount_{0}; VariantArray krefs, skrefs; SysRecordsVersions sysRecordsVersions_; @@ -484,9 +486,10 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. } size_t getWalSize(const NamespaceConfigData &cfg) const noexcept { return isSystem() ? int64_t(1) : std::max(cfg.walSize, int64_t(1)); } void clearNamespaceCaches(); + std::vector pickJsonPath(const PayloadFieldType &fld); PerfStatCounterMT updatePerfCounter_, selectPerfCounter_; - std::atomic enablePerfCounters_{false}; + std::atomic_bool enablePerfCounters_{false}; NamespaceConfigData config_; std::unique_ptr queryCountCache_; @@ -497,25 +500,25 @@ class NamespaceImpl : public intrusive_atomic_rc_base { // NOLINT(*performance. UpdatesObservers *observers_; StorageOpts storageOpts_; - std::atomic lastSelectTime_{0}; + std::atomic_int64_t lastSelectTime_{0}; sync_pool pool_; - std::atomic cancelCommitCnt_{0}; - std::atomic lastUpdateTime_{0}; + std::atomic_int32_t cancelCommitCnt_{0}; + std::atomic_int64_t lastUpdateTime_{0}; - std::atomic itemsCount_ = {0}; - std::atomic itemsCapacity_ = {0}; - bool nsIsLoading_ = false; + std::atomic_uint32_t itemsCount_{0}; + std::atomic_uint32_t itemsCapacity_{0}; + bool nsIsLoading_{false}; - int serverId_ = 0; - std::atomic serverIdChanged_{false}; - size_t itemsDataSize_ = 0; + int serverId_{0}; + std::atomic_bool serverIdChanged_{false}; + size_t itemsDataSize_{0}; - std::atomic optimizationState_{OptimizationState::NotOptimized}; + std::atomic_int optimizationState_{OptimizationState::NotOptimized}; StringsHolderPtr strHolder_; std::deque strHoldersWaitingToBeDeleted_; std::chrono::seconds lastExpirationCheckTs_{0}; - mutable std::atomic nsUpdateSortedContextMemory_{0}; - std::atomic dbDestroyed_{false}; + mutable std::atomic_int64_t nsUpdateSortedContextMemory_{0}; + std::atomic_bool dbDestroyed_{false}; }; } // namespace reindexer diff --git a/cpp_src/core/nsselecter/comparator/comparator_indexed.h b/cpp_src/core/nsselecter/comparator/comparator_indexed.h index fbd7baa9f..86979e73c 100644 --- a/cpp_src/core/nsselecter/comparator/comparator_indexed.h +++ b/cpp_src/core/nsselecter/comparator/comparator_indexed.h @@ -11,6 +11,8 @@ #include "core/payload/payloadfieldvalue.h" #include "core/payload/payloadtype.h" #include "core/payload/payloadvalue.h" +#include "estl/fast_hash_map.h" +#include "estl/fast_hash_set.h" #include "helpers.h" #include "tools/string_regexp_functions.h" @@ -63,7 +65,7 @@ struct ValuesHolder { template struct ValuesHolder { - using Type = std::unordered_set; + using Type = fast_hash_set; }; template <> @@ -79,8 +81,8 @@ struct ValuesHolder { template struct ValuesHolder { struct Type { - std::unordered_map values_; - std::unordered_set allSetValues_; + fast_hash_map values_; + fast_hash_set allSetValues_; }; }; @@ -88,7 +90,7 @@ template <> struct ValuesHolder { struct Type { key_string_map values_; - std::unordered_set allSetValues_; + fast_hash_set allSetValues_; }; }; @@ -96,7 +98,7 @@ template <> struct ValuesHolder { struct Type { unordered_payload_map values_; - std::unordered_set allSetValues_; + fast_hash_set allSetValues_; }; }; @@ -1729,57 +1731,71 @@ class ComparatorIndexed { switch (impl_.index()) { case 0: res = std::get_if<0>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 1: res = std::get_if<1>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 2: res = std::get_if<2>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 3: res = std::get_if<3>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 4: res = std::get_if<4>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 5: res = std::get_if<5>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 6: res = std::get_if<6>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 7: res = std::get_if<7>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 8: res = std::get_if<8>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 9: res = std::get_if<9>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 10: res = std::get_if<10>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 11: res = std::get_if<11>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 12: res = std::get_if<12>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 13: res = std::get_if<13>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 14: res = std::get_if<14>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 15: res = std::get_if<15>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; default: abort(); } - matchedCount_ += res; - return res; } void ClearDistinctValues() noexcept { std::visit([](auto& impl) { impl.ClearDistinctValues(); }, impl_); @@ -1809,54 +1825,67 @@ template <> switch (impl_.index()) { case 0: res = std::get_if<0>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 1: res = std::get_if<1>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 2: res = std::get_if<2>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 3: res = std::get_if<3>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 4: res = std::get_if<4>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 5: res = std::get_if<5>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 6: res = std::get_if<6>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 7: res = std::get_if<7>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 8: res = std::get_if<8>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 9: res = std::get_if<9>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 10: res = std::get_if<10>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 11: res = std::get_if<11>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 12: res = std::get_if<12>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 13: res = std::get_if<13>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 14: res = std::get_if<14>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; default: abort(); } - matchedCount_ += res; - return res; } template <> @@ -1874,24 +1903,27 @@ template <> switch (impl_.index()) { case 0: res = std::get_if<0>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 1: res = std::get_if<1>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 2: res = std::get_if<2>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 3: res = std::get_if<3>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; case 4: res = std::get_if<4>(&impl_)->Compare(item, rowId); - break; + matchedCount_ += res; + return res; default: abort(); } - matchedCount_ += res; - return res; } extern template std::string ComparatorIndexed::ConditionStr() const; diff --git a/cpp_src/core/nsselecter/querypreprocessor.cc b/cpp_src/core/nsselecter/querypreprocessor.cc index 20f62ce9a..519ae66c4 100644 --- a/cpp_src/core/nsselecter/querypreprocessor.cc +++ b/cpp_src/core/nsselecter/querypreprocessor.cc @@ -514,7 +514,7 @@ const std::vector *QueryPreprocessor::getCompositeIndex(int field) const no return nullptr; } -static void createCompositeKeyValues(const span> &values, Payload &pl, VariantArray &ret, +static void createCompositeKeyValues(span> values, Payload &pl, VariantArray &ret, uint32_t resultSetSize, uint32_t n) { const auto &v = values[n]; for (auto it = v.second.cbegin(), end = v.second.cend(); it != end; ++it) { @@ -531,7 +531,7 @@ static void createCompositeKeyValues(const span> &v } } -static VariantArray createCompositeKeyValues(const span> &values, const PayloadType &plType, +static VariantArray createCompositeKeyValues(span> values, const PayloadType &plType, uint32_t resultSetSize) { PayloadValue d(plType.TotalSize()); Payload pl(plType, d); @@ -616,7 +616,7 @@ size_t QueryPreprocessor::substituteCompositeIndexes(const size_t from, const si setQueryIndex(fld, res.idx, ns_); container_[first].Emplace(std::move(fld), qValues.size() == 1 ? CondEq : CondSet, std::move(qValues)); } - deleteRanges.Add(span(res.entries.data() + 1, res.entries.size() - 1)); + deleteRanges.Add(span(res.entries.data() + 1, res.entries.size() - 1)); resIdx = searcher.RemoveUsedAndGetNext(resIdx); } for (auto rit = deleteRanges.rbegin(); rit != deleteRanges.rend(); ++rit) { @@ -1962,7 +1962,7 @@ class JoinOnExplainEnabled { void QueryPreprocessor::setQueryIndex(QueryField &qField, int idxNo, const NamespaceImpl &ns) { const auto &idx = *ns.indexes_[idxNo]; - std::vector compositeFieldsTypes; + QueryField::CompositeTypesVecT compositeFieldsTypes; if (idxNo >= ns.indexes_.firstCompositePos()) { #ifndef NDEBUG const bool ftIdx = IsFullText(idx.Type()); diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.cc b/cpp_src/core/nsselecter/selectiteratorcontainer.cc index fa9a2648e..e3fa0ec14 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.cc +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.cc @@ -60,7 +60,34 @@ void SelectIteratorContainer::sortByCost(span indexes, span co costs[indexes[j]] = cst; } } - std::stable_sort(indexes.begin() + from, indexes.begin() + to, [&costs](unsigned i1, unsigned i2) { return costs[i1] < costs[i2]; }); + // GCC's std::stable_sort performs allocations even in the simpliest scenarios, so handling some of them explicitly + switch (to - from) { + case 0: + case 1: + break; + case 2: { + auto it = indexes.begin() + from; + auto &a = *(it++); + auto &b = *(it); + if (costs[a] > costs[b]) { + std::swap(a, b); + } + break; + } + case 3: { + auto it = indexes.begin() + from; + auto &a = *(it++); + auto &b = *(it++); + auto &c = *(it); + if (costs[a] > costs[b]) std::swap(a, b); + if (costs[b] > costs[c]) std::swap(b, c); + if (costs[a] > costs[b]) std::swap(a, b); + break; + } + default: + std::stable_sort(indexes.begin() + from, indexes.begin() + to, + [&costs](unsigned i1, unsigned i2) { return costs[i1] < costs[i2]; }); + } moveJoinsToTheBeginingOfORs(indexes, from, to); } @@ -266,7 +293,7 @@ SelectKeyResults SelectIteratorContainer::processQueryEntry(const QueryEntry &qe opts.inTransaction = ctx_->inTransaction; auto ctx = selectFnc ? selectFnc->CreateCtx(qe.IndexNo()) : BaseFunctionCtx::Ptr{}; - if (ctx && ctx->type == BaseFunctionCtx::kFtCtx) ftCtx = reindexer::reinterpret_pointer_cast(ctx); + if (ctx && ctx->type == BaseFunctionCtx::kFtCtx) ftCtx = reindexer::static_ctx_pointer_cast(ctx); if (index->Opts().GetCollateMode() == CollateUTF8 || isIndexFt) { for (auto &key : qe.Values()) key.EnsureUTF8(); diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.h b/cpp_src/core/nsselecter/selectiteratorcontainer.h index 00ecd51d2..e53eb8501 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.h +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.h @@ -94,7 +94,7 @@ class SelectIteratorContainer private: bool prepareIteratorsForSelectLoop(QueryPreprocessor &, size_t begin, size_t end, unsigned sortId, bool isFt, const NamespaceImpl &, SelectFunction::Ptr &, FtCtx::Ptr &, const RdxContext &); - void sortByCost(span indexes, span costs, unsigned from, unsigned to, int expectedIterations); + void sortByCost(span indexes, span costs, unsigned from, unsigned to, int expectedIterations); double fullCost(span indexes, unsigned i, unsigned from, unsigned to, int expectedIterations) const noexcept; double cost(span indexes, unsigned cur, int expectedIterations) const noexcept; double cost(span indexes, unsigned from, unsigned to, int expectedIterations) const noexcept; diff --git a/cpp_src/core/nsselecter/substitutionhelpers.h b/cpp_src/core/nsselecter/substitutionhelpers.h index f53fee54e..8ebc54929 100644 --- a/cpp_src/core/nsselecter/substitutionhelpers.h +++ b/cpp_src/core/nsselecter/substitutionhelpers.h @@ -172,7 +172,7 @@ class EntriesRanges : h_vector { Base::const_reverse_iterator rbegin() const noexcept { return Base::rbegin(); } Base::const_reverse_iterator rend() const noexcept { return Base::rend(); } - void Add(span entries) { + void Add(span entries) { for (auto entry : entries) { auto insertionPos = Base::end(); bool wasMerged = false; diff --git a/cpp_src/core/payload/payloadiface.cc b/cpp_src/core/payload/payloadiface.cc index c7849f148..b2a5877a0 100644 --- a/cpp_src/core/payload/payloadiface.cc +++ b/cpp_src/core/payload/payloadiface.cc @@ -88,7 +88,7 @@ void PayloadIface::GetByJsonPath(std::string_view jsonPath, TagsMatcher &tags return; } if (t_.Field(fieldIdx).IsArray()) { - IndexedTagsPath tagsPath = tagsMatcher.path2indexedtag(jsonPath, nullptr, false); + IndexedTagsPath tagsPath = tagsMatcher.path2indexedtag(jsonPath, false); if (tagsPath.back().IsWithIndex()) { kvs.clear(); kvs.emplace_back(Get(fieldIdx, tagsPath.back().Index())); @@ -97,7 +97,7 @@ void PayloadIface::GetByJsonPath(std::string_view jsonPath, TagsMatcher &tags } return Get(fieldIdx, kvs); } - GetByJsonPath(tagsMatcher.path2indexedtag(jsonPath, nullptr, false), kvs, expectedType); + GetByJsonPath(tagsMatcher.path2indexedtag(jsonPath, false), kvs, expectedType); } template @@ -126,7 +126,7 @@ void PayloadIface::GetByJsonPath(const IndexedTagsPath &tagsPath, VariantArra template void PayloadIface::GetByFieldsSet(const FieldsSet &fields, VariantArray &kvs, KeyValueType expectedType, - const std::vector &expectedCompositeTypes) const { + const h_vector &expectedCompositeTypes) const { if (expectedType.Is()) { kvs.Clear(); kvs.emplace_back(GetComposite(fields, expectedCompositeTypes)); @@ -146,7 +146,7 @@ void PayloadIface::GetByFieldsSet(const FieldsSet &fields, VariantArray &kvs, } template -Variant PayloadIface::GetComposite(const FieldsSet &fields, const std::vector &expectedTypes) const { +Variant PayloadIface::GetComposite(const FieldsSet &fields, const h_vector &expectedTypes) const { thread_local VariantArray buffer; buffer.clear(); assertrx_throw(fields.size() == expectedTypes.size()); diff --git a/cpp_src/core/payload/payloadiface.h b/cpp_src/core/payload/payloadiface.h index 8e66df6f4..c9fad0e4c 100644 --- a/cpp_src/core/payload/payloadiface.h +++ b/cpp_src/core/payload/payloadiface.h @@ -35,11 +35,11 @@ class PayloadIface { // Get array as span of typed elements template - span GetArray(int field) & { + span GetArray(int field) const & { assertrx(field < Type().NumFields()); assertrx(Type().Field(field).IsArray()); auto *arr = reinterpret_cast(Field(field).p_); - return span(reinterpret_cast(v_->Ptr() + arr->offset), arr->len); + return span(reinterpret_cast(v_->Ptr() + arr->offset), arr->len); } // Get array len int GetArrayLen(int field) const { @@ -116,8 +116,8 @@ class PayloadIface { void GetByJsonPath(const TagsPath &jsonPath, VariantArray &, KeyValueType expectedType) const; void GetByJsonPath(const IndexedTagsPath &jsonPath, VariantArray &, KeyValueType expectedType) const; void GetByFieldsSet(const FieldsSet &, VariantArray &, KeyValueType expectedType, - const std::vector &expectedCompositeTypes) const; - [[nodiscard]] Variant GetComposite(const FieldsSet &, const std::vector &expectedTypes) const; + const h_vector &expectedCompositeTypes) const; + [[nodiscard]] Variant GetComposite(const FieldsSet &, const h_vector &expectedTypes) const; VariantArray GetIndexedArrayData(const IndexedTagsPath &jsonPath, int field, int &offset, int &size) const; // Get fields count diff --git a/cpp_src/core/payload/payloadtype.cc b/cpp_src/core/payload/payloadtype.cc index a44cbf4f1..4aa4350f2 100644 --- a/cpp_src/core/payload/payloadtype.cc +++ b/cpp_src/core/payload/payloadtype.cc @@ -1,7 +1,6 @@ #include "payloadtype.h" #include #include "core/keyvalue/key_string.h" -#include "core/keyvalue/variant.h" #include "payloadtypeimpl.h" #include "tools/serializer.h" @@ -61,6 +60,8 @@ void PayloadTypeImpl::Add(PayloadFieldType f) { throw Error(errLogic, "Cannot add field with name '%s' to namespace '%s'. Json path '%s' already used in field '%s'", f.Name(), Name(), jp, Field(res.first->second).Name()); } + + checkNewJsonPathBeforeAdd(f, jp); } fieldsByName_.emplace(f.Name(), int(fields_.size())); if (f.Type().Is()) { @@ -71,10 +72,10 @@ void PayloadTypeImpl::Add(PayloadFieldType f) { } bool PayloadTypeImpl::Drop(std::string_view field) { - auto it = fieldsByName_.find(field); - if (it == fieldsByName_.end()) return false; + auto itField = fieldsByName_.find(field); + if (itField == fieldsByName_.end()) return false; - int fieldIdx = it->second; + const auto fieldIdx = itField->second; for (auto &f : fieldsByName_) { if (f.second > fieldIdx) --f.second; } @@ -153,16 +154,16 @@ void PayloadTypeImpl::deserialize(Serializer &ser) { ser.GetVarUint(); - int count = ser.GetVarUint(); + auto count = ser.GetVarUint(); - for (int i = 0; i < count; i++) { + for (uint64_t i = 0; i < count; i++) { const auto t = ser.GetKeyValueType(); std::string name(ser.GetVString()); std::vector jsonPaths; - int offset = ser.GetVarUint(); - int elemSizeof = ser.GetVarUint(); + uint64_t offset = ser.GetVarUint(); + uint64_t elemSizeof = ser.GetVarUint(); bool isArray = ser.GetVarUint(); - int jsonPathsCount = ser.GetVarUint(); + uint64_t jsonPathsCount = ser.GetVarUint(); while (jsonPathsCount--) jsonPaths.push_back(std::string(ser.GetVString())); @@ -205,4 +206,21 @@ void PayloadType::Dump(std::ostream &os, std::string_view step, std::string_view os << '\n' << offset << '}'; } +void PayloadTypeImpl::checkNewJsonPathBeforeAdd(const PayloadFieldType &f, const std::string &jsonPath) const { + const auto pos = jsonPath.find('.'); + if (pos < jsonPath.length() - 1) { + for (auto &fld : fields_) { + for (auto &jpfld : fld.JsonPaths()) { + // new field total overwrites existing one + if ((jsonPath.rfind(jpfld, 0) == 0) && (jsonPath[jpfld.length()] == '.')) { + throw Error(errLogic, + "Cannot add field with name '%s' (jsonpath '%s') and type '%s' to namespace '%s'." + " Already exists json path '%s' with type '%s' in field '%s'. Rewriting is impossible", + f.Name(), jsonPath, f.Type().Name(), Name(), jpfld, fld.Type().Name(), fld.Name()); + } + } + } + } +} + } // namespace reindexer diff --git a/cpp_src/core/payload/payloadtype.h b/cpp_src/core/payload/payloadtype.h index e0940e6df..07e512eb3 100644 --- a/cpp_src/core/payload/payloadtype.h +++ b/cpp_src/core/payload/payloadtype.h @@ -17,7 +17,7 @@ class PayloadType : public shared_cow_ptr { PayloadType(const PayloadType &) = default; PayloadType &operator=(PayloadType &&) noexcept = default; PayloadType &operator=(const PayloadType &) = default; - PayloadType(const std::string &name, std::initializer_list fields = {}); + explicit PayloadType(const std::string &name, std::initializer_list fields = {}); explicit PayloadType(const PayloadTypeImpl &impl); ~PayloadType(); const PayloadFieldType &Field(int field) const; diff --git a/cpp_src/core/payload/payloadtypeimpl.h b/cpp_src/core/payload/payloadtypeimpl.h index 882a66deb..64db9568c 100644 --- a/cpp_src/core/payload/payloadtypeimpl.h +++ b/cpp_src/core/payload/payloadtypeimpl.h @@ -47,6 +47,8 @@ class PayloadTypeImpl { void Dump(std::ostream &, std::string_view step, std::string_view offset) const; private: + void checkNewJsonPathBeforeAdd(const PayloadFieldType &f, const std::string &jsonPath) const; + std::vector fields_; FieldMap fieldsByName_; JsonPathMap fieldsByJsonPath_; diff --git a/cpp_src/core/query/queryentry.cc b/cpp_src/core/query/queryentry.cc index f8ba8d747..8a32d917b 100644 --- a/cpp_src/core/query/queryentry.cc +++ b/cpp_src/core/query/queryentry.cc @@ -56,12 +56,9 @@ bool QueryField::operator==(const QueryField &other) const noexcept { compositeFieldsTypes_.size() != other.compositeFieldsTypes_.size()) { return false; } - for (size_t i = 0, s = compositeFieldsTypes_.size(); i < s; ++i) { - if (!compositeFieldsTypes_[i].IsSame(other.compositeFieldsTypes_[i])) { - return false; - } - } - return true; + return std::equal( + compositeFieldsTypes_.begin(), compositeFieldsTypes_.end(), other.compositeFieldsTypes_.begin(), + [](const CompositeTypesVecT::value_type &l, const CompositeTypesVecT::value_type &r) noexcept { return l.IsSame(r); }); } void QueryField::SetField(FieldsSet &&fields) & { @@ -73,7 +70,7 @@ void QueryField::SetField(FieldsSet &&fields) & { } static void checkIndexData([[maybe_unused]] int idxNo, [[maybe_unused]] const FieldsSet &fields, KeyValueType fieldType, - [[maybe_unused]] const std::vector &compositeFieldsTypes) { + [[maybe_unused]] const QueryField::CompositeTypesVecT &compositeFieldsTypes) { assertrx_throw(idxNo >= 0); if (fieldType.Is()) { assertrx_throw(fields.size() == compositeFieldsTypes.size()); @@ -84,7 +81,7 @@ static void checkIndexData([[maybe_unused]] int idxNo, [[maybe_unused]] const Fi } void QueryField::SetIndexData(int idxNo, FieldsSet &&fields, KeyValueType fieldType, KeyValueType selectType, - std::vector &&compositeFieldsTypes) & { + QueryField::CompositeTypesVecT &&compositeFieldsTypes) & { checkIndexData(idxNo, fields, fieldType, compositeFieldsTypes); idxNo_ = idxNo; fieldsSet_ = std::move(fields); diff --git a/cpp_src/core/query/queryentry.h b/cpp_src/core/query/queryentry.h index 2002fd9ca..a18b4dc1e 100644 --- a/cpp_src/core/query/queryentry.h +++ b/cpp_src/core/query/queryentry.h @@ -34,6 +34,8 @@ struct JoinQueryEntry { class QueryField { public: + using CompositeTypesVecT = h_vector; + template explicit QueryField(Str &&fieldName) noexcept : fieldName_{std::forward(fieldName)} {} QueryField(std::string &&fieldName, int idxNo, FieldsSet fields, KeyValueType fieldType, @@ -52,11 +54,11 @@ class QueryField { [[nodiscard]] const std::string &FieldName() const & noexcept { return fieldName_; } [[nodiscard]] KeyValueType FieldType() const noexcept { return fieldType_; } [[nodiscard]] KeyValueType SelectType() const noexcept { return selectType_; } - [[nodiscard]] const std::vector &CompositeFieldsTypes() const & noexcept { return compositeFieldsTypes_; } + [[nodiscard]] const CompositeTypesVecT &CompositeFieldsTypes() const & noexcept { return compositeFieldsTypes_; } [[nodiscard]] bool HaveEmptyField() const noexcept; void SetField(FieldsSet &&fields) &; void SetIndexData(int idxNo, FieldsSet &&fields, KeyValueType fieldType, KeyValueType selectType, - std::vector &&compositeFieldsTypes) &; + CompositeTypesVecT &&compositeFieldsTypes) &; QueryField &operator=(const QueryField &) = delete; auto Fields() const && = delete; @@ -69,7 +71,7 @@ class QueryField { FieldsSet fieldsSet_; KeyValueType fieldType_{KeyValueType::Undefined{}}; KeyValueType selectType_{KeyValueType::Undefined{}}; - std::vector compositeFieldsTypes_; + CompositeTypesVecT compositeFieldsTypes_; }; enum class VerifyQueryEntryFlags : unsigned { null = 0u, ignoreEmptyValues = 1u }; @@ -181,7 +183,7 @@ class BetweenFieldsQueryEntry { BetweenFieldsQueryEntry(StrL &&fstIdx, CondType cond, StrR &&sndIdx) : leftField_{std::forward(fstIdx)}, rightField_{std::forward(sndIdx)}, condition_{cond} { if (condition_ == CondAny || condition_ == CondEmpty || condition_ == CondDWithin) { - throw Error{errLogic, "Condition '%s' is inapplicable between two fields", std::string{CondTypeToStr(condition_)}}; + throw Error{errLogic, "Condition '%s' is inapplicable between two fields", CondTypeToStr(condition_)}; } } @@ -197,8 +199,10 @@ class BetweenFieldsQueryEntry { [[nodiscard]] const FieldsSet &RightFields() const & noexcept { return rightField_.Fields(); } [[nodiscard]] KeyValueType LeftFieldType() const noexcept { return leftField_.FieldType(); } [[nodiscard]] KeyValueType RightFieldType() const noexcept { return rightField_.FieldType(); } - [[nodiscard]] const std::vector &LeftCompositeFieldsTypes() const & noexcept { return leftField_.CompositeFieldsTypes(); } - [[nodiscard]] const std::vector &RightCompositeFieldsTypes() const & noexcept { + [[nodiscard]] const QueryField::CompositeTypesVecT &LeftCompositeFieldsTypes() const & noexcept { + return leftField_.CompositeFieldsTypes(); + } + [[nodiscard]] const QueryField::CompositeTypesVecT &RightCompositeFieldsTypes() const & noexcept { return rightField_.CompositeFieldsTypes(); } [[nodiscard]] const QueryField &LeftFieldData() const & noexcept { return leftField_; } @@ -304,7 +308,7 @@ class UpdateEntry { } bool operator==(const UpdateEntry &) const noexcept; bool operator!=(const UpdateEntry &obj) const noexcept { return !operator==(obj); } - std::string const &Column() const noexcept { return column_; } + std::string_view Column() const noexcept { return column_; } VariantArray const &Values() const noexcept { return values_; } VariantArray &Values() noexcept { return values_; } FieldModifyMode Mode() const noexcept { return mode_; } @@ -333,8 +337,10 @@ class QueryJoinEntry { [[nodiscard]] const FieldsSet &RightFields() const & noexcept { return rightField_.Fields(); } [[nodiscard]] KeyValueType LeftFieldType() const noexcept { return leftField_.FieldType(); } [[nodiscard]] KeyValueType RightFieldType() const noexcept { return rightField_.FieldType(); } - [[nodiscard]] const std::vector &LeftCompositeFieldsTypes() const & noexcept { return leftField_.CompositeFieldsTypes(); } - [[nodiscard]] const std::vector &RightCompositeFieldsTypes() const & noexcept { + [[nodiscard]] const QueryField::CompositeTypesVecT &LeftCompositeFieldsTypes() const & noexcept { + return leftField_.CompositeFieldsTypes(); + } + [[nodiscard]] const QueryField::CompositeTypesVecT &RightCompositeFieldsTypes() const & noexcept { return rightField_.CompositeFieldsTypes(); } [[nodiscard]] OpType Operation() const noexcept { return op_; } @@ -347,11 +353,11 @@ class QueryJoinEntry { [[nodiscard]] const QueryField &RightFieldData() const & noexcept { return rightField_; } [[nodiscard]] QueryField &RightFieldData() & noexcept { return rightField_; } void SetLeftIndexData(int idxNo, FieldsSet &&fields, KeyValueType fieldType, KeyValueType selectType, - std::vector &&compositeFieldsTypes) & { + QueryField::CompositeTypesVecT &&compositeFieldsTypes) & { leftField_.SetIndexData(idxNo, std::move(fields), fieldType, selectType, std::move(compositeFieldsTypes)); } void SetRightIndexData(int idxNo, FieldsSet &&fields, KeyValueType fieldType, KeyValueType selectType, - std::vector &&compositeFieldsTypes) & { + QueryField::CompositeTypesVecT &&compositeFieldsTypes) & { rightField_.SetIndexData(idxNo, std::move(fields), fieldType, selectType, std::move(compositeFieldsTypes)); } void SetLeftField(FieldsSet &&fields) & { leftField_.SetField(std::move(fields)); } diff --git a/cpp_src/core/reindexer_impl/reindexerimpl.cc b/cpp_src/core/reindexer_impl/reindexerimpl.cc index 6c51e1356..6913236c0 100644 --- a/cpp_src/core/reindexer_impl/reindexerimpl.cc +++ b/cpp_src/core/reindexer_impl/reindexerimpl.cc @@ -452,15 +452,13 @@ Error ReindexerImpl::closeNamespace(std::string_view nsName, const RdxContext& c if (dropStorage) { ns->DeleteStorage(ctx); - } else { - ns->CloseStorage(ctx); - } - if (dropStorage) { + if (!nsIt->second->GetDefinition(ctx).isTemporary) { observers_.OnWALUpdate(LSNPair(), nsName, WALRecord(WalNamespaceDrop)); } + } else { + ns->CloseStorage(ctx); } - } catch (const Error& e) { err = e; } @@ -873,13 +871,12 @@ Error ReindexerImpl::Select(const Query& q, QueryResults& result, const Internal const QueriesStatTracer::QuerySQL sql{normalizedSQL.Slice(), nonNormalizedSQL.Slice()}; auto hitter = queriesPerfStatsEnabled - ? [&sql, &tracker](bool lockHit, std::chrono::microseconds time) { - if (lockHit) - tracker.LockHit(sql, time); - else - tracker.Hit(sql, time); - } - : std::function{}; + ? [&sql, &tracker](bool lockHit, std::chrono::microseconds time) { + if (lockHit) + tracker.LockHit(sql, time); + else + tracker.Hit(sql, time); + } : std::function{}; const bool isSystemNsRequest = isSystemNamespaceNameFast(q.NsName()); QueryStatCalculator statCalculator( @@ -895,11 +892,15 @@ Error ReindexerImpl::Select(const Query& q, QueryResults& result, const Internal // Lookup and lock namespaces_ mainNs->updateSelectTime(); locks.Add(std::move(mainNs)); - q.WalkNested(false, true, true, [this, &locks, &rdxCtx](const Query& q) { - auto nsWrp = getNamespace(q.NsName(), rdxCtx); - auto ns = q.IsWALQuery() ? nsWrp->awaitMainNs(rdxCtx) : nsWrp->getMainNs(); + struct { + RxSelector::NsLocker& locks; + const RdxContext& ctx; + } refs{locks, rdxCtx}; + q.WalkNested(false, true, true, [this, &refs](const Query& q) { + auto nsWrp = getNamespace(q.NsName(), refs.ctx); + auto ns = q.IsWALQuery() ? nsWrp->awaitMainNs(refs.ctx) : nsWrp->getMainNs(); ns->updateSelectTime(); - locks.Add(std::move(ns)); + refs.locks.Add(std::move(ns)); }); locks.Lock(); @@ -1226,8 +1227,7 @@ Error ReindexerImpl::InitSystemNamespaces() { } if (!hasReplicatorConfig) { - auto err = tryLoadReplicatorConfFromFile(); - (void)err; // ignore + err = tryLoadReplicatorConfFromFile(); } return errOK; diff --git a/cpp_src/core/reindexer_impl/rx_selector.cc b/cpp_src/core/reindexer_impl/rx_selector.cc index c6eb63a6f..ac49dd50f 100644 --- a/cpp_src/core/reindexer_impl/rx_selector.cc +++ b/cpp_src/core/reindexer_impl/rx_selector.cc @@ -324,7 +324,7 @@ VariantArray RxSelector::selectSubQuery(const Query& subQuery, const Query& main } } else { const auto fields = ns->indexes_[idxNo]->Fields(); - std::vector fieldsTypes; + QueryField::CompositeTypesVecT fieldsTypes; #ifndef NDEBUG const bool ftIdx = IsFullText(ns->indexes_[idxNo]->Type()); #endif @@ -332,10 +332,10 @@ VariantArray RxSelector::selectSubQuery(const Query& subQuery, const Query& main if (f == IndexValueType::SetByJsonPath) { // not indexed fields allowed only in ft composite indexes assertrx_throw(ftIdx); - fieldsTypes.push_back(KeyValueType::String{}); + fieldsTypes.emplace_back(KeyValueType::String{}); } else { assertrx_throw(f <= ns->indexes_.firstCompositePos()); - fieldsTypes.push_back(ns->indexes_[f]->SelectKeyType()); + fieldsTypes.emplace_back(ns->indexes_[f]->SelectKeyType()); } } for (const auto& it : qr) { diff --git a/cpp_src/core/selectfunc/ctx/basefunctionctx.h b/cpp_src/core/selectfunc/ctx/basefunctionctx.h index bb8f8e5f2..cebba4e26 100644 --- a/cpp_src/core/selectfunc/ctx/basefunctionctx.h +++ b/cpp_src/core/selectfunc/ctx/basefunctionctx.h @@ -1,39 +1,75 @@ #pragma once -#include -#include "core/selectfunc/selectfuncparser.h" -#include "estl/fast_hash_map.h" -#include "estl/fast_hash_set.h" +#include "core/selectfunc/functions/highlight.h" +#include "core/selectfunc/functions/snippet.h" namespace reindexer { template -std::shared_ptr reinterpret_pointer_cast(const std::shared_ptr& r) noexcept { - auto p = reinterpret_cast::element_type*>(r.get()); - return std::shared_ptr(r, p); +intrusive_ptr static_ctx_pointer_cast(const intrusive_ptr& r) noexcept { + assertrx_dbg(dynamic_cast(r.get()) != nullptr); + return intrusive_ptr(static_cast(r.get())); } -class BaseFunctionCtx { +class FuncNone { public: - typedef std::shared_ptr Ptr; + bool Process(ItemRef&, PayloadType&, const SelectFuncStruct&, std::vector&) noexcept { return false; } +}; + +template +constexpr std::size_t variant_index() { + static_assert(std::variant_size_v > index, "Type not found in variant"); + if constexpr (std::is_same_v, T>) { + return index; + } else { + return variant_index(); + } +} + +using SelectFuncVariant = std::variant; +enum class SelectFuncType { + None = variant_index(), + Snippet = variant_index(), + Highlight = variant_index(), + SnippetN = variant_index(), + + Max // Max possible value +}; + +class BaseFunctionCtx : public intrusive_atomic_rc_base { +public: + typedef intrusive_ptr Ptr; enum CtxType { kFtCtx = 0 }; virtual ~BaseFunctionCtx() {} - void AddFunction(const std::string& name, SelectFuncStruct::SelectFuncType functionIndx) { functions_[name].insert(functionIndx); } - bool CheckFunction(const std::string& name, std::initializer_list types) { - auto it = functions_.find(name); - - if (it == functions_.end()) return false; - for (auto t : types) { - auto fit = it->second.find(t); - if (fit != it->second.end()) return true; + void AddFunction(const std::string& name, SelectFuncType functionIndx) { + auto it = std::find_if(functions_.begin(), functions_.end(), [&name](const FuncData& data) { return data.name == name; }); + auto& ref = (it == functions_.end()) ? functions_.emplace_back(std::string(name)) : *it; + ref.types[static_cast(functionIndx)] = true; + } + bool CheckFunction(const std::string& name, std::initializer_list types) { + auto it = std::find_if(functions_.begin(), functions_.end(), [&name](const FuncData& data) { return data.name == name; }); + if (it != functions_.end()) { + for (auto t : types) { + if (it->types[static_cast(t)]) { + return true; + } + } } return false; } CtxType type; -protected: - fast_hash_map>> functions_; +private: + struct FuncData { + using TypesArrayT = std::array(SelectFuncType::Max)>; + + FuncData(std::string&& _name) noexcept : name(std::move(_name)) {} + + std::string name; + TypesArrayT types{}; + }; + h_vector functions_; }; } // namespace reindexer diff --git a/cpp_src/core/selectfunc/ctx/ftctx.cc b/cpp_src/core/selectfunc/ctx/ftctx.cc index 304acaabb..c185dc124 100644 --- a/cpp_src/core/selectfunc/ctx/ftctx.cc +++ b/cpp_src/core/selectfunc/ctx/ftctx.cc @@ -2,83 +2,79 @@ namespace reindexer { -FtCtx::FtCtx() { - data_ = std::make_shared(); - this->type = BaseFunctionCtx::kFtCtx; -} - -int16_t FtCtx::Proc(size_t pos) { - if (pos >= data_->proc_.size()) return 0; - return data_->proc_[pos]; -} -void FtCtx::Reserve(size_t size) { data_->proc_.reserve(size); } - -size_t FtCtx::Size() const noexcept { return data_->proc_.size(); } - -bool FtCtx::NeedArea() const noexcept { return data_->need_area_; } - bool FtCtx::PrepareAreas(const RHashMap &fields, const std::string &name) { - if (!fields.empty()) data_->is_composite_ = true; + assertrx_dbg(!NeedArea()); + auto &data = *data_; + if (!fields.empty()) { + data.isComposite_ = true; + } - if (data_->is_composite_) { + bool needArea = false; + if (data.isComposite_) { for (auto &field : fields) { - data_->need_area_ = - CheckFunction(field.first, {SelectFuncStruct::SelectFuncType::Snippet, SelectFuncStruct::SelectFuncType::SnippetN, - SelectFuncStruct::SelectFuncType::Highlight}); - if (data_->need_area_) return true; + needArea = CheckFunction(field.first, {SelectFuncType::Snippet, SelectFuncType::SnippetN, SelectFuncType::Highlight}); + if (needArea) { + break; + } } } - data_->need_area_ = CheckFunction(name, {SelectFuncStruct::SelectFuncType::Snippet, SelectFuncStruct::SelectFuncType::SnippetN, - SelectFuncStruct::SelectFuncType::Highlight}); - return data_->need_area_; + needArea = needArea || CheckFunction(name, {SelectFuncType::Snippet, SelectFuncType::SnippetN, SelectFuncType::Highlight}); + if (needArea) { + data.InitHolders(); + } + return needArea; } template void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, AreaHolder &&holder) { - data_->area_.emplace_back(std::move(holder)); + auto &data = *data_; + data.area_.emplace_back(std::move(holder)); for (; begin != end; ++begin) { - data_->proc_.push_back(proc); - if (data_->need_area_) { - data_->holders_.emplace(*begin, data_->area_.size() - 1); + data.proc_.emplace_back(proc); + if (data.holders_.has_value()) { + data.holders_->emplace(*begin, data_->area_.size() - 1); } } } template void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc) { + auto &data = *data_; for (; begin != end; ++begin) { - data_->proc_.push_back(proc); + data.proc_.emplace_back(proc); } } template void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector &mask, AreaHolder &&holder) { - data_->area_.emplace_back(std::move(holder)); + auto &data = *data_; + data.area_.emplace_back(std::move(holder)); for (; begin != end; ++begin) { assertrx(static_cast(*begin) < mask.size()); if (!mask[*begin]) continue; - data_->proc_.push_back(proc); - if (data_->need_area_) { - data_->holders_.emplace(*begin, data_->area_.size() - 1); + data.proc_.emplace_back(proc); + if (data.holders_.has_value()) { + data.holders_->emplace(*begin, data.area_.size() - 1); } } } template void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector &mask) { + auto &data = *data_; for (; begin != end; ++begin) { assertrx(static_cast(*begin) < mask.size()); if (!mask[*begin]) continue; - data_->proc_.push_back(proc); + data.proc_.emplace_back(proc); } } -template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, - AreaHolder &&holder); -template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, - const std::vector &, AreaHolder &&holder); -template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc); -template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, - const std::vector &); +template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, + AreaHolder &&holder); +template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, + const std::vector &, AreaHolder &&holder); +template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc); +template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, + const std::vector &); } // namespace reindexer diff --git a/cpp_src/core/selectfunc/ctx/ftctx.h b/cpp_src/core/selectfunc/ctx/ftctx.h index 225b21b3d..bc85c3218 100644 --- a/cpp_src/core/selectfunc/ctx/ftctx.h +++ b/cpp_src/core/selectfunc/ctx/ftctx.h @@ -10,22 +10,25 @@ namespace reindexer { class FtCtx : public BaseFunctionCtx { public: - typedef std::shared_ptr Ptr; - struct Data { - typedef std::shared_ptr Ptr; + typedef intrusive_ptr Ptr; + struct Data : public BaseFunctionCtx { + bool NeedArea() const noexcept { return holders_.has_value(); } + void InitHolders() { + assertrx_dbg(!holders_.has_value()); + holders_.emplace(); + } + + typedef intrusive_ptr Ptr; std::vector proc_; - fast_hash_map holders_; + std::optional> holders_; std::vector area_; - bool need_area_ = false; - bool is_composite_ = false; + bool isComposite_ = false; bool isWordPositions_ = false; std::string extraWordSymbols_; }; - FtCtx(); - int16_t Proc(size_t pos); - bool isComposite() const noexcept { return data_->is_composite_; } - size_t GetSize() const noexcept { return data_->proc_.size(); } + FtCtx() : data_(make_intrusive()) { this->type = BaseFunctionCtx::kFtCtx; } + int16_t Proc(size_t pos) const noexcept { return (pos < data_->proc_.size()) ? data_->proc_[pos] : 0; } template void Add(InputIterator begin, InputIterator end, int16_t proc, AreaHolder &&holder); @@ -37,9 +40,9 @@ class FtCtx : public BaseFunctionCtx { template void Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector &mask); - void Reserve(size_t size); - size_t Size() const noexcept; - bool NeedArea() const noexcept; + void Reserve(size_t size) { data_->proc_.reserve(size); } + size_t Size() const noexcept { return data_->proc_.size(); } + bool NeedArea() const noexcept { return data_->NeedArea(); } bool PrepareAreas(const RHashMap &fields, const std::string &name); void SetData(Data::Ptr data) noexcept { data_ = std::move(data); } diff --git a/cpp_src/core/selectfunc/functions/highlight.cc b/cpp_src/core/selectfunc/functions/highlight.cc index 4e4177843..508b51532 100644 --- a/cpp_src/core/selectfunc/functions/highlight.cc +++ b/cpp_src/core/selectfunc/functions/highlight.cc @@ -3,6 +3,8 @@ #include "core/keyvalue/p_string.h" #include "core/payload/payloadiface.h" #include "core/selectfunc/ctx/ftctx.h" +#include "core/selectfunc/selectfuncparser.h" + namespace reindexer { bool Highlight::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStruct &func, std::vector &stringsHolder) { @@ -10,10 +12,13 @@ bool Highlight::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStru if (!func.ctx || func.ctx->type != BaseFunctionCtx::kFtCtx) return false; - FtCtx::Ptr ftctx = reindexer::reinterpret_pointer_cast(func.ctx); - auto dataFtCtx = ftctx->GetData(); - auto it = dataFtCtx->holders_.find(res.Id()); - if (it == dataFtCtx->holders_.end()) { + FtCtx::Ptr ftctx = reindexer::static_ctx_pointer_cast(func.ctx); + auto &dataFtCtx = *ftctx->GetData(); + if (!dataFtCtx.holders_.has_value()) { + return false; + } + auto it = dataFtCtx.holders_->find(res.Id()); + if (it == dataFtCtx.holders_->end()) { return false; } @@ -31,7 +36,7 @@ bool Highlight::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStru } const std::string *data = p_string(kr[0]).getCxxstr(); - auto pva = dataFtCtx->area_[it->second].GetAreas(func.fieldNo); + auto pva = dataFtCtx.area_[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) return false; auto &va = *pva; diff --git a/cpp_src/core/selectfunc/functions/snippet.cc b/cpp_src/core/selectfunc/functions/snippet.cc index c1c2de718..0f3bc28db 100644 --- a/cpp_src/core/selectfunc/functions/snippet.cc +++ b/cpp_src/core/selectfunc/functions/snippet.cc @@ -3,6 +3,7 @@ #include "core/keyvalue/p_string.h" #include "core/payload/payloadiface.h" #include "core/selectfunc/ctx/ftctx.h" +#include "core/selectfunc/selectfuncparser.h" #include "highlight.h" #include "tools/errors.h" #include "utf8cpp/utf8.h" @@ -256,16 +257,19 @@ bool Snippet::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStruct if (!func.ctx) return false; init(func); - FtCtx::Ptr ftctx = reindexer::reinterpret_pointer_cast(func.ctx); - auto dataFtCtx = ftctx->GetData(); - if (!dataFtCtx->isWordPositions_) { + FtCtx::Ptr ftctx = reindexer::static_ctx_pointer_cast(func.ctx); + auto &dataFtCtx = *ftctx->GetData(); + if (!dataFtCtx.isWordPositions_) { throw Error(errParams, "Snippet function does not work with ft_fuzzy index."); } if (!func.tagsPath.empty()) { throw Error(errConflict, "SetByJsonPath is not implemented yet!"); } - auto it = dataFtCtx->holders_.find(res.Id()); - if (it == dataFtCtx->holders_.end()) { + if (!dataFtCtx.holders_.has_value()) { + return false; + } + auto it = dataFtCtx.holders_->find(res.Id()); + if (it == dataFtCtx.holders_->end()) { return false; } Payload pl(pl_type, res.Value()); @@ -277,7 +281,7 @@ bool Snippet::Process(ItemRef &res, PayloadType &pl_type, const SelectFuncStruct } const std::string *data = p_string(kr[0]).getCxxstr(); - auto pva = dataFtCtx->area_[it->second].GetAreas(func.fieldNo); + auto pva = dataFtCtx.area_[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) return false; std::string resultString; diff --git a/cpp_src/core/selectfunc/selectfunc.cc b/cpp_src/core/selectfunc/selectfunc.cc index d8d4bab24..2c4bc50f2 100644 --- a/cpp_src/core/selectfunc/selectfunc.cc +++ b/cpp_src/core/selectfunc/selectfunc.cc @@ -33,19 +33,18 @@ SelectFunction::Ptr SelectFunctionsHolder::AddNamespace(const Query &q, const Na if (queries_.size() <= nsid) { queries_.resize(nsid + 1); } - queries_[nsid] = std::make_shared(q, NsSelectFuncInterface(nm)); + queries_[nsid] = make_intrusive(q, NsSelectFuncInterface(nm)); return queries_[nsid]; } SelectFunction::SelectFunction(const Query &q, NsSelectFuncInterface &&nm) : nm_(std::move(nm)), currCjsonFieldIdx_(nm_.getIndexesCount()) { - functions_.reserve(q.selectFunctions_.size()); for (auto &func : q.selectFunctions_) { SelectFuncParser parser; SelectFuncStruct &result = parser.Parse(func); if (!result.isFunction) continue; createFunc(result); } -}; +} void SelectFunction::createFunc(SelectFuncStruct &data) { int indexNo = IndexValueType::NotSet; @@ -60,8 +59,6 @@ void SelectFunction::createFunc(SelectFuncStruct &data) { // if index is composite then create function for inner use only if (IsComposite(nm_.getIndexType(indexNo))) { - std::vector subIndexes; - int fieldNo = 0; const FieldsSet &fields = nm_.getIndexFields(indexNo); @@ -205,12 +202,12 @@ bool SelectFunction::ProcessItem(ItemRef &res, PayloadType &pl_type, std::vector BaseFunctionCtx::Ptr SelectFunction::createCtx(SelectFuncStruct &data, BaseFunctionCtx::Ptr ctx, IndexType index_type) { if (IsFullText(index_type)) { if (!ctx) { - data.ctx = std::make_shared(); + data.ctx = make_intrusive(); } else { data.ctx = std::move(ctx); } const std::string &indexName = (data.indexNo >= nm_.getIndexesCount()) ? data.field : nm_.getIndexName(data.indexNo); - data.ctx->AddFunction(indexName, SelectFuncStruct::SelectFuncType(data.func.index())); + data.ctx->AddFunction(indexName, SelectFuncType(data.func.index())); } return data.ctx; } diff --git a/cpp_src/core/selectfunc/selectfunc.h b/cpp_src/core/selectfunc/selectfunc.h index 6b5686589..c263f54d1 100644 --- a/cpp_src/core/selectfunc/selectfunc.h +++ b/cpp_src/core/selectfunc/selectfunc.h @@ -1,17 +1,17 @@ #pragma once #include "core/query/query.h" #include "core/queryresults/queryresults.h" -#include "ctx/basefunctionctx.h" #include "nsselectfuncinterface.h" +#include "selectfuncparser.h" namespace reindexer { class NamespaceImpl; /// Represents sql function in a query /// (like avg(x) or sum(x)). -class SelectFunction { +class SelectFunction : public intrusive_atomic_rc_base { public: - typedef std::shared_ptr Ptr; + typedef intrusive_ptr Ptr; SelectFunction(const Query& q, NsSelectFuncInterface&& nm); /// Processes selected item to apply sql function. diff --git a/cpp_src/core/selectfunc/selectfuncparser.h b/cpp_src/core/selectfunc/selectfuncparser.h index 91a4b045a..6e2427239 100644 --- a/cpp_src/core/selectfunc/selectfuncparser.h +++ b/cpp_src/core/selectfunc/selectfuncparser.h @@ -5,46 +5,22 @@ #include #include #include +#include "ctx/basefunctionctx.h" #include "estl/tokenizer.h" -#include "functions/highlight.h" -#include "functions/snippet.h" namespace reindexer { class BaseFunctionCtx; -class FuncNone { -public: - bool Process(ItemRef &, PayloadType &, const SelectFuncStruct &, std::vector &) noexcept { return false; } -}; - -template -constexpr std::size_t variant_index() { - static_assert(std::variant_size_v > index, "Type not found in variant"); - if constexpr (std::is_same_v, T>) { - return index; - } else { - return variant_index(); - } -} - struct SelectFuncStruct { - using FuncVariant = std::variant; - enum class SelectFuncType { - None = variant_index(), - Snippet = variant_index(), - Highlight = variant_index(), - SnippetN = variant_index() - }; - - FuncVariant func; + SelectFuncVariant func; bool isFunction = false; std::string field; std::string value; std::string funcName; std::vector funcArgs; std::unordered_map namedArgs; - std::shared_ptr ctx; + BaseFunctionCtx::Ptr ctx; TagsPath tagsPath; int indexNo = -1; int fieldNo = 0; diff --git a/cpp_src/core/selectkeyresult.h b/cpp_src/core/selectkeyresult.h index 03f0cf14f..665f1b8af 100644 --- a/cpp_src/core/selectkeyresult.h +++ b/cpp_src/core/selectkeyresult.h @@ -36,7 +36,7 @@ class SingleSelectKeyResult { } } explicit SingleSelectKeyResult(IdSet::Ptr &&ids) noexcept : tempIds_(std::move(ids)), ids_(*tempIds_) {} - explicit SingleSelectKeyResult(const IdSetRef &ids) noexcept : ids_(ids) {} + explicit SingleSelectKeyResult(IdSetCRef ids) noexcept : ids_(ids) {} explicit SingleSelectKeyResult(IdType rBegin, IdType rEnd) noexcept : rBegin_(rBegin), rEnd_(rEnd), isRange_(true) {} SingleSelectKeyResult(const SingleSelectKeyResult &other) noexcept : tempIds_(other.tempIds_), @@ -97,14 +97,14 @@ class SingleSelectKeyResult { } IdSet::Ptr tempIds_; - IdSetRef ids_; + IdSetCRef ids_; protected: const base_idsetset *set_ = nullptr; union { - IdSetRef::const_iterator begin_; - IdSetRef::const_reverse_iterator rbegin_; + IdSetCRef::const_iterator begin_; + IdSetCRef::const_reverse_iterator rbegin_; base_idsetset::const_iterator setbegin_; base_idsetset::const_reverse_iterator setrbegin_; int rBegin_ = 0; @@ -112,8 +112,8 @@ class SingleSelectKeyResult { }; union { - IdSetRef::const_iterator end_; - IdSetRef::const_reverse_iterator rend_; + IdSetCRef::const_iterator end_; + IdSetCRef::const_reverse_iterator rend_; base_idsetset::const_iterator setend_; base_idsetset::const_reverse_iterator setrend_; int rEnd_ = 0; @@ -121,8 +121,8 @@ class SingleSelectKeyResult { }; union { - IdSetRef::const_iterator it_; - IdSetRef::const_reverse_iterator rit_; + IdSetCRef::const_iterator it_; + IdSetCRef::const_reverse_iterator rit_; base_idsetset::const_iterator itset_; base_idsetset::const_reverse_iterator ritset_; int rIt_ = 0; diff --git a/cpp_src/estl/span.h b/cpp_src/estl/span.h index 7f8fafd6d..3896d3aed 100644 --- a/cpp_src/estl/span.h +++ b/cpp_src/estl/span.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include "tools/assertrx.h" #include "trivial_reverse_iterator.h" @@ -37,16 +36,27 @@ class span { return *this; } - // FIXME: const override (implicit const cast should not be possible for any type) - // Requires explicit giftStr for the 'string' types, which could be COW + template