From 30b838e7eda042a3e34204f22b093a11f46c2566 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Wed, 13 Nov 2024 11:53:19 +0000 Subject: [PATCH 1/8] Initial commit Signed-off-by: Roshan Khatri --- .gitignore | 31 + CMakeLists.txt | 166 + README.md | 40 + build.sh | 63 + requirements.txt | 3 + src/CMakeLists.txt | 40 + src/json/CPPLINT.cfg | 2 + src/json/alloc.cc | 59 + src/json/alloc.h | 29 + src/json/dom.cc | 1624 ++++++++ src/json/dom.h | 545 +++ src/json/json.cc | 3232 +++++++++++++++ src/json/json.h | 22 + src/json/json_api.cc | 120 + src/json/json_api.h | 70 + src/json/keytable.cc | 659 +++ src/json/keytable.h | 386 ++ src/json/memory.cc | 352 ++ src/json/memory.h | 144 + src/json/rapidjson_includes.h | 23 + src/json/selector.cc | 2418 +++++++++++ src/json/selector.h | 369 ++ src/json/stats.cc | 291 ++ src/json/stats.h | 138 + src/json/util.cc | 142 + src/json/util.h | 122 + src/rapidjson/CPPLINT.cfg | 1 + src/rapidjson/README.md | 14 + src/rapidjson/document.h | 3599 ++++++++++++++++ src/rapidjson/license.txt | 57 + src/rapidjson/prettywriter.h | 393 ++ src/rapidjson/reader.h | 2281 +++++++++++ src/rapidjson/stringbuffer.h | 119 + src/rapidjson/writer.h | 730 ++++ tst/CMakeLists.txt | 19 + tst/integration/data/store.json | 80 + tst/integration/data/wikipedia.json | 26 + tst/integration/data/wikipedia_compact.json | 1 + tst/integration/error_handlers.py | 31 + tst/integration/json_test_case.py | 61 + tst/integration/run.sh | 34 + tst/integration/test_json_basic.py | 4104 +++++++++++++++++++ tst/integration/test_rdb.py | 44 + tst/integration/utils_json.py | 28 + tst/unit/CMakeLists.txt | 89 + tst/unit/CPPLINT.cfg | 6 + tst/unit/dom_test.cc | 2304 +++++++++++ tst/unit/hashtable_test.cc | 209 + tst/unit/json_test.cc | 249 ++ tst/unit/keytable_test.cc | 393 ++ tst/unit/module_sim.cc | 101 + tst/unit/module_sim.h | 19 + tst/unit/selector_test.cc | 1344 ++++++ tst/unit/stats_test.cc | 32 + tst/unit/traps_test.cc | 180 + tst/unit/util_test.cc | 213 + 56 files changed, 27851 insertions(+) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 README.md create mode 100755 build.sh create mode 100644 requirements.txt create mode 100644 src/CMakeLists.txt create mode 100644 src/json/CPPLINT.cfg create mode 100644 src/json/alloc.cc create mode 100644 src/json/alloc.h create mode 100644 src/json/dom.cc create mode 100644 src/json/dom.h create mode 100644 src/json/json.cc create mode 100644 src/json/json.h create mode 100644 src/json/json_api.cc create mode 100644 src/json/json_api.h create mode 100644 src/json/keytable.cc create mode 100644 src/json/keytable.h create mode 100644 src/json/memory.cc create mode 100644 src/json/memory.h create mode 100644 src/json/rapidjson_includes.h create mode 100644 src/json/selector.cc create mode 100644 src/json/selector.h create mode 100644 src/json/stats.cc create mode 100644 src/json/stats.h create mode 100644 src/json/util.cc create mode 100644 src/json/util.h create mode 100644 src/rapidjson/CPPLINT.cfg create mode 100644 src/rapidjson/README.md create mode 100644 src/rapidjson/document.h create mode 100644 src/rapidjson/license.txt create mode 100644 src/rapidjson/prettywriter.h create mode 100644 src/rapidjson/reader.h create mode 100644 src/rapidjson/stringbuffer.h create mode 100644 src/rapidjson/writer.h create mode 100644 tst/CMakeLists.txt create mode 100644 tst/integration/data/store.json create mode 100644 tst/integration/data/wikipedia.json create mode 100644 tst/integration/data/wikipedia_compact.json create mode 100644 tst/integration/error_handlers.py create mode 100644 tst/integration/json_test_case.py create mode 100755 tst/integration/run.sh create mode 100644 tst/integration/test_json_basic.py create mode 100644 tst/integration/test_rdb.py create mode 100644 tst/integration/utils_json.py create mode 100644 tst/unit/CMakeLists.txt create mode 100644 tst/unit/CPPLINT.cfg create mode 100644 tst/unit/dom_test.cc create mode 100644 tst/unit/hashtable_test.cc create mode 100644 tst/unit/json_test.cc create mode 100644 tst/unit/keytable_test.cc create mode 100644 tst/unit/module_sim.cc create mode 100644 tst/unit/module_sim.h create mode 100644 tst/unit/selector_test.cc create mode 100644 tst/unit/stats_test.cc create mode 100644 tst/unit/traps_test.cc create mode 100644 tst/unit/util_test.cc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..48c5da1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# IDE files +.idea/ +*.vscode +.vscode/* + +# Build temp files +*build +cmake-build-*/ + +# Auto-generated files +**/__pycache__/* +test-data +*.pyc +*.bin +*.o +*.xo +*.so +*.d +*.a +*.log +*.out + +# Others +.DS_Store +.attach_pid* +venv/ +core.* +valkeytests +**/include +**/report.html +**/assets \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..85061ee --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,166 @@ +cmake_minimum_required(VERSION 3.17) + +include(FetchContent) +include(ExternalProject) + +# Detect the system architecture +EXECUTE_PROCESS( + COMMAND uname -m + COMMAND tr -d '\n' + OUTPUT_VARIABLE ARCHITECTURE +) + +if("${ARCHITECTURE}" STREQUAL "x86_64") + message("Building JSON for x86_64") +elseif("${ARCHITECTURE}" STREQUAL "aarch64") + message("Building JSON for aarch64") +else() + message(FATAL_ERROR "Unsupported architecture. JSON is only supported on x86_64 and aarch64.") +endif() + +# Project definition +project(ValkeyJSONModule VERSION 1.0 LANGUAGES C CXX) + +# Set the name of the JSON shared library +set(JSON_MODULE_LIB json) + +# Define the Valkey directories +set(VALKEY_DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/_deps/valkey-src") +set(VALKEY_BIN_DIR "${CMAKE_BINARY_DIR}/_deps/valkey-src/src/valkey/bin") + +# Download and build Valkey +ExternalProject_Add( + valkey + GIT_REPOSITORY https://github.com/valkey-io/valkey.git # Replace with actual URL + GIT_TAG ${VALKEY_VERSION} + PREFIX ${VALKEY_DOWNLOAD_DIR} + BUILD_COMMAND make -j + INSTALL_COMMAND "" + BUILD_IN_SOURCE 1 +) + +# Define the paths for the copied files +set(VALKEY_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/include") +set(VALKEY_BINARY_DEST "${CMAKE_CURRENT_SOURCE_DIR}/tst/integration/.build/binaries/${VALKEY_VERSION}") + +ExternalProject_Add_Step( + valkey + copy_header_files + COMMENT "Copying header files to include/ directory" + DEPENDEES download + DEPENDERS configure + COMMAND ${CMAKE_COMMAND} -E make_directory ${VALKEY_INCLUDE_DIR} + COMMAND ${CMAKE_COMMAND} -E copy ${VALKEY_DOWNLOAD_DIR}/src/valkey/src/valkeymodule.h ${VALKEY_INCLUDE_DIR}/valkeymodule.h + ALWAYS 1 +) + +# Copy header and binary after Valkey make +add_custom_command(TARGET valkey + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${VALKEY_BINARY_DEST} + COMMAND ${CMAKE_COMMAND} -E copy ${VALKEY_BIN_DIR}/valkey-server ${VALKEY_BINARY_DEST}/valkey-server + COMMENT "Copied valkeymodule.h and valkey-server to destination directories" +) + +# Define valkey-bloom branch +set(VALKEY_BLOOM_BRANCH "unstable" CACHE STRING "Valkey-bloom branch to use") + +# Set the download directory for Valkey-bloom +set(VALKEY_BLOOM_DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_deps/valkey-bloom-src") + +# Download valkey-bloom +ExternalProject_Add( + valkey-bloom + GIT_REPOSITORY https://github.com/valkey-io/valkey-bloom.git + GIT_TAG ${VALKEY_BLOOM_BRANCH} + GIT_SHALLOW TRUE + PREFIX "${VALKEY_BLOOM_DOWNLOAD_DIR}" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" +) + +# Step to copy pytest files +ExternalProject_Add_Step( + valkey-bloom + copy_pytest_files + COMMENT "Copying pytest files to tst/integration directory" + DEPENDEES build + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/tst/integration + COMMAND ${CMAKE_COMMAND} -E copy_directory ${VALKEY_BLOOM_DOWNLOAD_DIR}/src/valkey-bloom/tests/valkeytests ${CMAKE_CURRENT_SOURCE_DIR}/tst/integration/valkeytests +) + +# Enable instrumentation options if requested +if("$ENV{INSTRUMENT_V2PATH}" STREQUAL "yes") + add_compile_definitions(INSTRUMENT_V2PATH) + message("Enabled INSTRUMENT_V2PATH") +endif() + +# Disable Doxygen documentation generation +set(BUILD_DOCUMENTATION OFF) +# When CODE_COVERAGE is ON, the package is built twice, once for debug and once for release. +# To fix the problem, disable the code coverage. +set(CODE_COVERAGE OFF) + +# Fix for linking error when code coverage is enabled on ARM +if(CODE_COVERAGE AND CMAKE_BUILD_TYPE STREQUAL "Debug") + add_link_options("--coverage") +endif() + +# Set C & C++ standard versions +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED True) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) + +# Always include debug symbols and optimize the code +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -g") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -g") + +# RapidJSON SIMD optimization +if("${ARCHITECTURE}" STREQUAL "x86_64") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=nehalem") +elseif("${ARCHITECTURE}" STREQUAL "aarch64") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a") +else() + message(FATAL_ERROR "Unsupported architecture. JSON is only supported on x86_64 and aarch64.") +endif() + +# Additional flags for all architectures +set(ADDITIONAL_FLAGS "-fPIC") + +# Compiler warning flags +set(C_WARNING "-Wall -Werror -Wextra") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ADDITIONAL_FLAGS} ${C_WARNING}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ADDITIONAL_FLAGS} ${C_WARNING}") + +# Fetch RapidJSON +FetchContent_Declare( + rapidjson + GIT_REPOSITORY https://github.com/Tencent/rapidjson.git + GIT_TAG 0d4517f15a8d7167ba9ae67f3f22a559ca841e3b +) + +# Disable RapidJSON tests and examples +set(RAPIDJSON_BUILD_TESTS OFF CACHE BOOL "Build rapidjson tests" FORCE) +set(RAPIDJSON_BUILD_EXAMPLES OFF CACHE BOOL "Build rapidjson examples" FORCE) +set(RAPIDJSON_BUILD_DOC OFF CACHE BOOL "Build rapidjson documentation" FORCE) + +# Make Rapidjson available +FetchContent_MakeAvailable(rapidjson) + +# Add the src subdirectory for building +add_subdirectory(src) + +# Add the src subdirectory for building +add_subdirectory(tst) + +add_custom_target(test + COMMENT "Run JSON integration tests." + USES_TERMINAL + COMMAND rm -rf ${CMAKE_BINARY_DIR}/tst/integration + COMMAND mkdir -p ${CMAKE_BINARY_DIR}/tst/integration + COMMAND cp -rp ${CMAKE_SOURCE_DIR}/tst/integration/. ${CMAKE_BINARY_DIR}/tst/integration/ + COMMAND echo "[TARGET] begin integration tests" + COMMAND ${CMAKE_SOURCE_DIR}/tst/integration/run.sh "test" ${CMAKE_SOURCE_DIR} + COMMAND echo "[TARGET] end integration tests") \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..858b7f9 --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +# ValkeyJSON + +ValkeyJSON introduces a native JSON data type to Valkey open source. +JSON data interchange standard. With this feature, users can store, query, and modify JSON data structures in Valkey using a comprehensive JSONPath query language. The feature will be compatible with the API and RDB formats supported by Valkey + +## Pre-requisite: +Python - 3.9 +Pytest - 4 + +## Building ValkeyJSON module and run tests. + +To build the module and the tests +```text +./build.sh +``` + +## Building ValkeyJSON module only. + +To build just the module +```text +mdkir build +cd build +cmake .. -DVALKEY_VERSION=unstable +make +``` + +## Unit Tests + +To run all unit tests: +```text +cd build +make -j unit +``` + +## Integration Tests + +To run all integration tests: +```text +make -j test +``` diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..674e879 --- /dev/null +++ b/build.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Script to build valkeyJSON module, build it and generate .so files, run unit and integration tests. + +# # Exit the script if any command fails +set -e + +SCRIPT_DIR=$(pwd) +echo "Script Directory: $SCRIPT_DIR" + +# Ensure SERVER_VERSION environment variable is set +if [ -z "$SERVER_VERSION" ]; then + echo "WARNING: SERVER_VERSION environment variable is not set. Defaulting to unstable." + export SERVER_VERSION="unstable" +fi + +if [ "$SERVER_VERSION" != "unstable" ] && [ "$SERVER_VERSION" != "8.0.0" ] ; then + echo "ERROR: Unsupported version - $SERVER_VERSION" + exit 1 +fi + +# Variables +BUILD_DIR="$SCRIPT_DIR/build" + +# Build the Valkey JSON module using CMake +echo "Building ValkeyJSON module..." +if [ ! -d "$BUILD_DIR" ]; then + mkdir $BUILD_DIR +fi +cd $BUILD_DIR +cmake .. -DVALKEY_VERSION=$SERVER_VERSION +make + +# Running the Valkey JSON unit tests. +echo "Running Unit Tests..." +make -j unit + +cd $SCRIPT_DIR + +REQUIREMENTS_FILE="requirements.txt" + +# Check if pip is available +if command -v pip > /dev/null 2>&1; then + echo "Using pip to install packages..." + pip install -r "$SCRIPT_DIR/$REQUIREMENTS_FILE" +# Check if pip3 is available +elif command -v pip3 > /dev/null 2>&1; then + echo "Using pip3 to install packages..." + pip3 install -r "$SCRIPT_DIR/$REQUIREMENTS_FILE" + +else + echo "Error: Neither pip nor pip3 is available. Please install Python package installer." + exit 1 +fi + +export MODULE_PATH="$SCRIPT_DIR/build/src/libjson.so" + +# Running the Valkey JSON integration tests. +echo "Running the integration tests..." +cd $BUILD_DIR +make -j test + +echo "Build and Integration Tests succeeded" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9e95d79 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +valkey +pytest==4 +pytest-html \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..a812131 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,40 @@ +message("src/CMakeLists.txt: Build JSON") + +set(OBJECT_TARGET json-objects CACHE INTERNAL "Object target for json module") +add_library(${OBJECT_TARGET} OBJECT "") + +# Build with C11 & C++17 +set_target_properties( + ${OBJECT_TARGET} + PROPERTIES + C_STANDARD 11 + C_STANDARD_REQUIRED ON + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON +) + +target_include_directories(${OBJECT_TARGET} + + # Need to make the source files public within CMake + # so that they are used when building the tests. + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${rapidjson_SOURCE_DIR}/include +) + +# Add source files for the JSON module +target_sources(${OBJECT_TARGET} + PRIVATE + json/json.cc + json/dom.cc + json/alloc.cc + json/util.cc + json/stats.cc + json/selector.cc + json/keytable.cc + json/memory.cc + json/json_api.cc +) + +add_library(${JSON_MODULE_LIB} SHARED $) diff --git a/src/json/CPPLINT.cfg b/src/json/CPPLINT.cfg new file mode 100644 index 0000000..672708d --- /dev/null +++ b/src/json/CPPLINT.cfg @@ -0,0 +1,2 @@ +filter=-build/include_order,-legal/copyright,-whitespace/braces,-build/c++11,-runtime/references,-build/include_what_you_use,-readability/casting,-build/header_guard,-runtime/int,-build/namespaces,-runtime/explicit +linelength=120 diff --git a/src/json/alloc.cc b/src/json/alloc.cc new file mode 100644 index 0000000..a1ea342 --- /dev/null +++ b/src/json/alloc.cc @@ -0,0 +1,59 @@ +#include "json/memory.h" +#include "json/alloc.h" +#include "json/stats.h" +#include +#include + +extern "C" { +#define VALKEYMODULE_EXPERIMENTAL_API +#include <./include/valkeymodule.h> +} + +void *dom_alloc(size_t size) { + void *ptr = memory_alloc(size); + // actually allocated size may not be same as the requested size + size_t real_size = memory_allocsize(ptr); + jsonstats_increment_used_mem(real_size); + return ptr; +} + +void dom_free(void *ptr) { + size_t size = memory_allocsize(ptr); + memory_free(ptr); + jsonstats_decrement_used_mem(size); +} + +void *dom_realloc(void *orig_ptr, size_t new_size) { + // We need to handle the following two edge cases first. Otherwise, the following + // calculation of the incremented/decremented amount will fail. + if (new_size == 0 && orig_ptr != nullptr) { + dom_free(orig_ptr); + return nullptr; + } + if (orig_ptr == nullptr) return dom_alloc(new_size); + + size_t orig_size = memory_allocsize(orig_ptr); + void *new_ptr = memory_realloc(orig_ptr, new_size); + // actually allocated size may not be same as the requested size + size_t real_new_size = memory_allocsize(new_ptr); + if (real_new_size > orig_size) + jsonstats_increment_used_mem(real_new_size - orig_size); + else if (real_new_size < orig_size) + jsonstats_decrement_used_mem(orig_size - real_new_size); + + return new_ptr; +} + +char *dom_strdup(const char *s) { + size_t size = strlen(s) + 1; + char *dup = static_cast(dom_alloc(size)); + strncpy(dup, s, size); + return dup; +} + +char *dom_strndup(const char *s, const size_t n) { + char *dup = static_cast(dom_alloc(n + 1)); + strncpy(dup, s, n); + dup[n] = '\0'; + return dup; +} diff --git a/src/json/alloc.h b/src/json/alloc.h new file mode 100644 index 0000000..5b0bf09 --- /dev/null +++ b/src/json/alloc.h @@ -0,0 +1,29 @@ +/** + * This C module is the JSON memory allocator (also called DOM allocator), which wraps around Valkey's built-in + * allocation functions - ValkeyModule_Alloc, ValkeyModule_Free and ValkeyModule_Realloc. All memory allocations, + * permanent or transient, should be done through this interface, so that allocated memories are correctly + * reported to the Valkey engine (MEMORY STATS). + * + * Besides correctly reporting memory usage to Valkey, it also provides a facility to track memory usage of JSON + * objects, so that we can achieve the following: + * 1. To track total memory allocated to JSON objects. This is done through an atomic global counter. Note that + * Valkey engine only reports total memories for all keys, not by key type. This JSON memory allocator overcomes + * such deficiency. + * 2. To track each JSON document object's memory size. This is done through a thread local counter. With the ability + * to track individual document's footprint, we can maintain a few interesting histograms that will provide + * insights into data distribution and API access patterns. + */ +#ifndef VALKEYJSONMODULE_ALLOC_H_ +#define VALKEYJSONMODULE_ALLOC_H_ + +#include + +#include "json/memory.h" + +void *dom_alloc(size_t size); +void dom_free(void *ptr); +void *dom_realloc(void *orig_ptr, size_t new_size); +char *dom_strdup(const char *s); +char *dom_strndup(const char *s, const size_t n); + +#endif // VALKEYJSONMODULE_ALLOC_H_ diff --git a/src/json/dom.cc b/src/json/dom.cc new file mode 100644 index 0000000..674c4b4 --- /dev/null +++ b/src/json/dom.cc @@ -0,0 +1,1624 @@ +#include "json/dom.h" +#include "json/json.h" +#include "json/stats.h" +#include "json/selector.h" +#include +#include +#include +#include +#include +#include "json/rapidjson_includes.h" + +#define STATIC /* decorator for static functions, remove so that backtrace symbols include these */ + +#define CHECK_DOCUMENT_SIZE_LIMIT(ctx, curr_doc_size, input_json_val_size) \ +{ \ + if (ctx != nullptr && !(ValkeyModule_GetContextFlags(ctx) & VALKEYMODULE_CTX_FLAGS_REPLICATED) && \ + json_get_max_document_size() > 0 && (curr_doc_size + input_json_val_size > json_get_max_document_size())) { \ + ValkeyModule_Log(ctx, "debug", \ + "Document size limit is exceeded. The attempted operation will result in a document with %lu bytes of " \ + "memory size.", curr_doc_size + input_json_val_size); \ + return JSONUTIL_DOCUMENT_SIZE_LIMIT_EXCEEDED; \ + } \ +} + +#define CHECK_DOCUMENT_PATH_LIMIT(ctx, selector, new_val) \ +{ \ + size_t __depth_would_be = selector.getMaxPathDepth() + new_val.GetMaxDepth(); \ + if (__depth_would_be > json_get_max_path_limit()) { \ + ValkeyModule_Log(ctx, "debug", \ + "Document path limit is exceeded. The attempted operation will result in a document with %lu nesting" \ + " levels.", __depth_would_be); \ + return JSONUTIL_DOCUMENT_PATH_LIMIT_EXCEEDED; \ + } else { \ + jsonstats_update_max_depth_ever_seen(__depth_would_be); \ + } \ +} + +// the one true allocator +RapidJsonAllocator allocator; + +/** + * We want to avoid all redundant creations of an allocator -- for performance reasons. + * So we use the constructor to detect that situation. It's free after startup. If you trip + * this trap, then you let a rapidjson allocator instance get defaulted to 0 somewhere in your code. + */ +RapidJsonAllocator::RapidJsonAllocator() { + ValkeyModule_Assert(this == &allocator); // Only this one is allowed :) +} + +JValue& dom_get_value(JDocument &doc) { + return doc.GetJValue(); +} + +JParser& JParser::Parse(const char *json, size_t len) { + int64_t begin_val = jsonstats_begin_track_mem(); + RJParser::Parse(json, len); + int64_t delta = jsonstats_end_track_mem(begin_val); + ValkeyModule_Assert(delta >= 0); + allocated_size = static_cast(delta); + return *this; +} + +JParser& JParser::Parse(const std::string_view &sv) { + return Parse(sv.data(), sv.length()); +} + +jsn::string validate(const JDocument *doc) { + std::string s = doc->GetJValue().Validate(); + return jsn::string(s.c_str(), s.length()); +} + +STATIC JDocument *create_doc() { + return new JDocument(); +} + +void dom_free_doc(JDocument *doc) { + ValkeyModule_Assert(doc != nullptr); + delete doc; +} + +size_t dom_get_doc_size(const JDocument *doc) { + return doc->size; +} + +void dom_set_doc_size(JDocument *doc, const size_t size) { + doc->size = size; +} + +size_t dom_get_bucket_id(const JDocument *doc) { + return doc->bucket_id; +} + +void dom_set_bucket_id(JDocument *doc, const uint32_t bucket_id) { + doc->bucket_id = bucket_id; +} + +JsonUtilCode dom_parse(ValkeyModuleCtx *ctx, const char *json_buf, const size_t buf_len, JDocument **doc) { + *doc = nullptr; + JParser parser; + if (parser.Parse(json_buf, buf_len).HasParseError()) { + return parser.GetParseErrorCode(); + } + CHECK_DOCUMENT_SIZE_LIMIT(ctx, size_t(0), parser.GetJValueSize()) + *doc = create_doc(); + (*doc)->SetJValue(parser.GetJValue()); + jsonstats_update_max_depth_ever_seen(parser.GetMaxDepth()); + return JSONUTIL_SUCCESS; +} + +STATIC bool has_custom_format(const PrintFormat *format) { + return (format != nullptr && (format->indent != nullptr || format->space != nullptr || format->newline != nullptr)); +} +/** + * Serialize a value. + * @param json OUTPUT param, serialized string is appended to the param. + */ +STATIC void serialize_value(const JValue &val, size_t initialLevel, const PrintFormat *format, + rapidjson::StringBuffer &oss) { + size_t max_depth = 0; + rapidjson::PrettyWriter writer(oss); + if (has_custom_format(format)) { + if (format && format->newline) writer.SetNewline(std::string_view(format->newline, strlen(format->newline))); + if (format && format->indent) writer.SetIndent(std::string_view(format->indent, strlen(format->indent))); + if (format && format->space) writer.SetSpace(std::string_view(format->space, strlen(format->space))); + writer.SetInitialLevel(initialLevel); + val.Accept(writer); + jsonstats_update_max_depth_ever_seen(writer.GetMaxDepth()); + } else { + writer.FastWrite(val, &max_depth); + jsonstats_update_max_depth_ever_seen(max_depth); + } +} + +STATIC void serialize_value(const JValue &val, size_t initialLevel, const PrintFormat *format, ReplyBuffer& oss) { + size_t max_depth = 0; + rapidjson::PrettyWriter writer(oss); + if (has_custom_format(format)) { + if (format && format->newline) writer.SetNewline(std::string_view(format->newline, strlen(format->newline))); + if (format && format->indent) writer.SetIndent(std::string_view(format->indent, strlen(format->indent))); + if (format && format->space) writer.SetSpace(std::string_view(format->space, strlen(format->space))); + writer.SetInitialLevel(initialLevel); + val.Accept(writer); + jsonstats_update_max_depth_ever_seen(writer.GetMaxDepth()); + } else { + writer.FastWrite(val, &max_depth); + jsonstats_update_max_depth_ever_seen(max_depth); + } +} + +void dom_serialize(JDocument *doc, const PrintFormat *format, rapidjson::StringBuffer &oss) { + serialize_value(*(doc), 0, format, oss); +} + +void dom_serialize_value(const JValue &val, const PrintFormat *format, rapidjson::StringBuffer &oss) { + serialize_value(val, 0, format, oss); +} + +JsonUtilCode dom_set_value(ValkeyModuleCtx *ctx, JDocument *doc, const char *json_path, const char *new_val_json, + size_t new_val_size, const bool is_create_only, const bool is_update_only) { + if (is_create_only && is_update_only) return JSONUTIL_NX_XX_SHOULD_BE_MUTUALLY_EXCLUSIVE; + + Selector selector; + JsonUtilCode rc = selector.prepareSetValues(doc->GetJValue(), json_path); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (is_create_only && selector.hasUpdates()) return JSONUTIL_NX_XX_CONDITION_NOT_SATISFIED; + if (is_update_only && selector.hasInserts()) return JSONUTIL_NX_XX_CONDITION_NOT_SATISFIED; + + JParser new_val; + if (new_val.Parse(new_val_json, new_val_size).HasParseError()) { + return new_val.GetParseErrorCode(); + } + + CHECK_DOCUMENT_PATH_LIMIT(ctx, selector, new_val) + CHECK_DOCUMENT_SIZE_LIMIT(ctx, doc->size, new_val.GetJValueSize()) + + selector.commit(new_val); + return JSONUTIL_SUCCESS; +} + +template +STATIC void PutString(OutputBuffer& oss, const char *str) { + while (*str) oss.Put(*str++); +} + +template +STATIC void PutEscapedString(OutputBuffer& oss, const char *str) { + JValue tmp; + tmp.SetString(str, strlen(str)); + serialize_value(tmp, 0, nullptr, oss); +} + +// Build stringified JSON array directly from a vector of values. +template +STATIC void build_json_array(const jsn::vector &values, const PrintFormat *format, T &oss) { + bool has_format = has_custom_format(format); + oss.Put('['); + if (has_format && format->newline) PutString(oss, format->newline); + for (size_t i=0; i < values.size(); i++) { + if (has_format && format->indent) PutString(oss, format->indent); + serialize_value(*values[i], 1, format, oss); + if (i < values.size() - 1) oss.Put(','); + if (has_format && format->newline) PutString(oss, format->newline); + } + oss.Put(']'); +} + +template STATIC void build_json_array(const jsn::vector &values, const PrintFormat *format, ReplyBuffer &oss); +template STATIC void build_json_array(const jsn::vector &values, const PrintFormat *format, + rapidjson::StringBuffer &oss); + +template +JsonUtilCode dom_get_value_as_str(JDocument *doc, const char *json_path, const PrintFormat *format, + T &oss, const bool update_stats) { + Selector selector; + JsonUtilCode rc = selector.getValues(*doc, json_path); + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + jsn::vector values; + selector.getSelectedValues(values); + + // If legacy path, return either the first value, or NONEXISTENT error if no value is found. + if (selector.isLegacyJsonPathSyntax()) { + if (values.empty()) { + return JSONUTIL_JSON_PATH_NOT_EXIST; + } else { + serialize_value(*values[0], 0, format, oss); + // update stats + if (update_stats) jsonstats_update_stats_on_read(oss.GetLength()); + return JSONUTIL_SUCCESS; + } + } + + // v2 path: return an array of values. + if (values.empty()) { + // return an empty array + oss.Put('['); + oss.Put(']'); + } else { + // Multiple values are returned to the client as a JSON array. + build_json_array(values, format, oss); + } + + // update stats + if (update_stats) jsonstats_update_stats_on_read(oss.GetLength()); + return JSONUTIL_SUCCESS; +} + +template JsonUtilCode dom_get_value_as_str(JDocument *doc, const char *json_path, const PrintFormat *format, + ReplyBuffer &oss, const bool update_stats); +template JsonUtilCode dom_get_value_as_str(JDocument *doc, const char *json_path, const PrintFormat *format, + rapidjson::StringBuffer &oss, const bool update_stats); + +STATIC void appendPathAndValue(const char *key, const JValue &val, const bool isLastPath, + const bool has_format, const PrintFormat *format, ReplyBuffer &oss) { + if (has_format && format->indent) PutString(oss, format->indent); + PutEscapedString(oss, key); + oss.Put(':'); + if (has_format && format->space) PutString(oss, format->space); + serialize_value(val, 1, format, oss); + if (!isLastPath) oss.Put(','); + if (has_format && format->newline) PutString(oss, format->newline); +} + +STATIC void appendPathAndValues(const char *key, const jsn::vector &values, const bool isLastPath, + const bool has_format, const PrintFormat *format, ReplyBuffer &oss) { + if (has_format && format->indent) PutString(oss, format->indent); + PutEscapedString(oss, key); + oss.Put(':'); + if (has_format && format->space) PutString(oss, format->space); + oss.Put('['); + if (has_format && format->newline) PutString(oss, format->newline); + + for (size_t i=0; i < values.size(); i++) { + if (has_format && format->indent) { + PutString(oss, format->indent); + PutString(oss, format->indent); + } + serialize_value(*values[i], 2, format, oss); + if (i < values.size() - 1) oss.Put(','); + if (has_format && format->newline) PutString(oss, format->newline); + } + + if (has_format && format->indent) PutString(oss, format->indent); + oss.Put(']'); + if (!isLastPath) oss.Put(','); + if (has_format && format->newline) PutString(oss, format->newline); +} + +STATIC JsonUtilCode buildJsonForMultiPaths(JDocument *doc, const char **paths, const int num_paths, + const bool is_v2path, const PrintFormat *format, + ReplyBuffer &oss) { + bool has_format = has_custom_format(format); + Selector selector(is_v2path); + JsonUtilCode rc; + oss.Put('{'); + if (has_format && format->newline) PutString(oss, format->newline); + for (int i = 0; i < num_paths; i++) { + rc = selector.getValues(*doc, paths[i]); + if (rc != JSONUTIL_SUCCESS) { + if (!is_v2path) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + jsn::vector values; + selector.getSelectedValues(values); + + if (!is_v2path) { // legacy path + if (values.empty()) { + return JSONUTIL_JSON_PATH_NOT_EXIST; + } else { + appendPathAndValue(paths[i], *values[0], (i == num_paths - 1), has_format, format, oss); + } + } else { + appendPathAndValues(paths[i], values, (i == num_paths - 1), has_format, format, oss); + } + } + oss.Put('}'); + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_get_values_as_str(JDocument *doc, const char **paths, const int num_paths, + PrintFormat *format, ReplyBuffer &oss, const bool update_stats) { + // If there are multiple paths mixed with both v1 and v2 syntax, the returned value should conform to the V2 + // behavior (returning an array of values). + // We can't start processing the first element until we know if we should conform to V1 or V2 behavior. + // Example: + // cmd1: json.get wikipedia .foo .address + // cmd2: json.get wikipedia .foo $.address + // The expected behavior is: Cmd1 should fail because .foo does not exist, while cmd2 should succeed because + // overall the command should conform to V2 behavior (as the 2nd path is V2 path). Cmd2 should return the + // following result: + // 127.0.0.1:6379> json.get wikipedia .foo $.address + // {"$.address":[{"street":"21 2nd Street","city":"New York","state":"NY","zipcode":"10021-3100"}],".foo":[]} + // + // Without the pre-knowledge of V1 vs V2, both commands would fail, because when the selector first runs ".foo", + // it would think it is V1 and returns an error. The loop below would then exit without attempting the 2nd path. + bool is_v2path = Selector::has_at_least_one_v2path(paths, num_paths); + + // Values at multiple paths are combined to form a serialized JSON object string, in which each path is a key. + JsonUtilCode rc = buildJsonForMultiPaths(doc, paths, num_paths, is_v2path, format, oss); + if (rc != JSONUTIL_SUCCESS) return rc; + + // update stats + if (update_stats) jsonstats_update_stats_on_read(oss.GetLength()); + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_delete_value(JDocument *doc, const char *json_path, size_t &num_vals_deleted) { + Selector selector; + return selector.deleteValues(doc->GetJValue(), json_path, num_vals_deleted); +} + +// check if there is at least one number value +STATIC bool has_number_value(jsn::vector &values) { + for (auto &v : values) { + if (v->IsNumber()) return true; + } + return false; +} + +JsonUtilCode dom_increment_by(JDocument *doc, const char *json_path, const JValue *incr_by, + jsn::vector &out_vals, bool &is_v2_path) { + out_vals.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), json_path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) return rc; + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path: + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no number value is selected + if (!has_number_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_NUMBER; + } + + for (auto &val : selector.getUniqueResultSet()) { + if (val.first->IsNumber()) { + if (val.first->IsInt64() && incr_by->IsInt64()) { + // All are integers + int64_t res; + rc = jsonutil_add_int64(val.first->GetInt64(), incr_by->GetInt64(), &res); + if (rc == JSONUTIL_SUCCESS) { + val.first->SetInt64(res); + out_vals.push_back(res); + continue; + } + } + + double res; + rc = jsonutil_add_double(val.first->GetDouble(), incr_by->GetDouble(), &res); + if (rc != JSONUTIL_SUCCESS) return rc; + char double_string[BUF_SIZE_DOUBLE_JSON]; + size_t len = jsonutil_double_to_string(res, double_string, sizeof(double_string)); + val.first->SetDouble(double_string, len, allocator); + + out_vals.push_back(res); + } else { + out_vals.push_back(std::nan("NaN")); // indicates the value is not number + } + } + + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_multiply_by(JDocument *doc, const char *json_path, const JValue *mult_by, + jsn::vector &out_vals, bool &is_v2_path) { + out_vals.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), json_path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) return rc; + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path: + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no number value is selected + if (!has_number_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_NUMBER; + } + + for (auto &val : selector.getUniqueResultSet()) { + if (val.first->IsNumber()) { + double res; + rc = jsonutil_multiply_double(val.first->GetDouble(), mult_by->GetDouble(), &res); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (jsonutil_is_int64(res)) { + val.first->SetInt64(static_cast(res)); + } else { + char double_string[BUF_SIZE_DOUBLE_JSON]; + size_t len = jsonutil_double_to_string(res, double_string, sizeof(double_string)); + val.first->SetDouble(double_string, len, allocator); + } + + out_vals.push_back(res); + } else { + out_vals.push_back(std::nan("NaN")); // indicates the value is not number + } + } + return JSONUTIL_SUCCESS; +} + +// check if there is at least one boolean value +STATIC bool has_boolean_value(jsn::vector &values) { + for (auto &v : values) { + if (v->IsBool()) return true; + } + return false; +} + +JsonUtilCode dom_toggle(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no boolean value is selected + if (!has_boolean_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_BOOL; + } + + for (auto &v : selector.getUniqueResultSet()) { + if (v.first->IsBool()) { + bool res = v.first->GetBool(); + res = !res; + v.first->SetBool(res); + vec.push_back(res? 1 : 0); + } else { + vec.push_back(-1); // -1 means the source value is not boolean + } + } + return JSONUTIL_SUCCESS; +} + +// check if there is at least one string value +STATIC bool has_string_value(jsn::vector &values) { + for (auto &v : values) { + if (v->IsString()) return true; + } + return false; +} + +JsonUtilCode dom_string_length(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no string value is selected + if (!has_string_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_STRING; + } + + for (auto &v : values) { + if (v->IsString()) { + vec.push_back(v->GetStringLength()); + } else { + vec.push_back(SIZE_MAX); // indicates non-string value + } + } + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_string_append(JDocument *doc, const char *path, const char *json, const size_t json_len, + jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) return rc; + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path: + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no string value is selected + if (!has_string_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_STRING; + } + + // verify the input json string is a valid + JParser appendVal; + if (appendVal.Parse(json, json_len).HasParseError()) return appendVal.GetParseErrorCode(); + if (!appendVal.GetJValue().IsString()) return JSONUTIL_VALUE_NOT_STRING; + + jsn::string str_append = jsn::string(appendVal.GetString()); + for (auto &v : selector.getUniqueResultSet()) { + if (v.first->IsString()) { + jsn::string new_string = jsn::string(v.first->GetString()) + str_append; + v.first->SetString(new_string.c_str(), new_string.length(), allocator); + vec.push_back(new_string.length()); + } else { + vec.push_back(SIZE_MAX); // indicates non-string value + } + } + return JSONUTIL_SUCCESS; +} + +// check if there is at least one object value +STATIC bool has_object_value(jsn::vector &values) { + for (auto &v : values) { + if (v->IsObject()) return true; + } + return false; +} + +JsonUtilCode dom_object_length(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no object value is selected + if (!has_object_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_OBJECT; + } + + for (auto &v : values) { + if (v->IsObject()) { + vec.push_back(v->MemberCount()); + } else { + vec.push_back(SIZE_MAX); // indicates non-object value + } + } + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_object_keys(JDocument *doc, const char *path, + jsn::vector> &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no object value is selected + if (!has_object_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_OBJECT; + } + + for (auto &v : values) { + jsn::vector keys; + if (v->IsObject()) { + for (auto &m : v->GetObject()) { + keys.push_back(std::move(jsn::string(m.name.GetString(), m.name.GetStringLength()))); + } + } + vec.push_back(keys); + } + return JSONUTIL_SUCCESS; +} + +// check if there is at least one array value +STATIC bool has_array_value(jsn::vector &values) { + for (auto &v : values) { + if (v->IsArray()) return true; + } + return false; +} + +JsonUtilCode dom_array_length(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no array value is selected + if (!has_array_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + } + + for (auto &v : values) { + if (v->IsArray()) { + vec.push_back(v->Size()); + } else { + vec.push_back(SIZE_MAX); // indicates non-array value + } + } + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_array_append(ValkeyModuleCtx *ctx, JDocument *doc, const char *path, + const char **jsons, size_t *json_lens, const size_t num_values, + jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) return rc; + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path: + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no array value is selected + if (!has_array_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + } + + // parse json values + jsn::vector appendVals(num_values); + size_t totalJValueSize = 0; + for (size_t i=0; i < num_values; i++) { + if (appendVals[i].Parse(jsons[i], json_lens[i]).HasParseError()) { + return appendVals[i].GetParseErrorCode(); + } + CHECK_DOCUMENT_PATH_LIMIT(ctx, selector, appendVals[i]) + totalJValueSize += appendVals[i].GetJValueSize(); + } + CHECK_DOCUMENT_SIZE_LIMIT(ctx, doc->size, totalJValueSize) + + for (auto &v : selector.getUniqueResultSet()) { + if (v.first->IsArray()) { + for (size_t i=0; i < num_values; i++) { + // Need to make a copy of the value because after the first call of JValue::PushBack, + // the object is moved and can no longer be pushed into anther array. + JValue copy(appendVals[i], allocator); + v.first->PushBack(copy, allocator); + } + vec.push_back(v.first->Size()); + } else { + vec.push_back(SIZE_MAX); // indicates non-array value + } + } + return JSONUTIL_SUCCESS; +} + +STATIC void internal_array_pop(JValue &arrVal, int64_t index, jsn::vector &vec, + rapidjson::StringBuffer &oss) { + // Convert negative index to positive + int64_t size = arrVal.Size(); + if (index < 0) index = (arrVal.Size() == 0 ? 0 : size + index); + + // Out-of-bound index is rounded to respective array bounds + if (index >= size) index = size - 1; + if (index < 0) index = 0; + + serialize_value(arrVal[index], 0, nullptr, oss); + arrVal.Erase(arrVal.Begin() + index); + vec.push_back(std::move(oss)); +} + +JsonUtilCode dom_array_pop(JDocument *doc, const char *path, int64_t index, + jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) return rc; + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path: + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no array value is selected + if (!has_array_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + } + + for (auto &v : selector.getUniqueResultSet()) { + rapidjson::StringBuffer oss; + if (v.first->IsArray()) { + if (v.first->Empty()) { + vec.push_back(std::move(oss)); // empty array, oss is empty + } else { + internal_array_pop(*v.first, index, vec, oss); + } + } else { + vec.push_back(std::move(oss)); // non-array value, oss is empty + } + } + + return JSONUTIL_SUCCESS; +} + +STATIC JsonUtilCode internal_array_insert(JValue &arrVal, jsn::vector &insertVals, + const size_t num_values, int64_t index, jsn::vector &vec) { + size_t size = arrVal.Size(); + + // Negative index values are interpreted as starting from the end. + if (index < 0) index = (arrVal.Size() == 0 ? 0 : size + index); + + // Return error if the index is out of bounds. + // If index is size-1, we are inserting before the last element. + // If index is size, we are appending to the array. + if (index < 0 || index > static_cast(size)) return JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES; + + // append num_values empty values + for (size_t i=0; i < num_values; i++) { + JValue empty; + arrVal.PushBack(empty, allocator); + } + + // shift values [index+1..end-num_values] to the right by num_values positions + for (int64_t i = arrVal.Size() - 1; i >= static_cast(num_values) + index; i--) { + arrVal[i] = arrVal[i - num_values]; + } + + // overwrite values [index..index+num_values-1] + for (int64_t i=index; i < index + static_cast(num_values); i++) { + // Need to make a copy of the value to insert because after the value is assigned, + // is is moved and can no longer be assigned into anther value. + JValue copy(insertVals[i - index].GetJValue(), allocator); + arrVal[i] = copy; + } + + vec.push_back(arrVal.Size()); + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_array_insert(ValkeyModuleCtx *ctx, JDocument *doc, const char *path, int64_t index, + const char **jsons, size_t *json_lens, const size_t num_values, + jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) return rc; + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path: + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no array value is selected + if (!has_array_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + } + + // parse json values + jsn::vector insertVals(num_values); + size_t totalJValueSize = 0; + for (size_t i=0; i < num_values; i++) { + if (insertVals[i].Parse(jsons[i], json_lens[i]).HasParseError()) { + return insertVals[i].GetParseErrorCode(); + } + CHECK_DOCUMENT_PATH_LIMIT(ctx, selector, insertVals[i]) + totalJValueSize += insertVals[i].GetJValueSize(); + } + CHECK_DOCUMENT_SIZE_LIMIT(ctx, doc->size, totalJValueSize) + + for (auto &v : selector.getUniqueResultSet()) { + if (v.first->IsArray()) { + rc = internal_array_insert(*v.first, insertVals, num_values, index, vec); + if (rc != JSONUTIL_SUCCESS) return rc; + } else { + vec.push_back(SIZE_MAX); // indicates non-array value + } + } + return JSONUTIL_SUCCESS; +} + +STATIC void internal_array_trim(JValue &arrVal, int64_t start, int64_t stop, jsn::vector &vec) { + int64_t size = static_cast(arrVal.Size()); + if (size == 0) { + vec.push_back(0); + return; + } + + // if start < 0, set it to 0. + if (start < 0) start = 0; + + // if stop >= size, set it to size-1 + if (stop >= size) stop = size - 1; + + if (start >= size || start > stop) { + // If start >= size or start > stop, empty the array and return *new_len as 0. + arrVal.Erase(arrVal.Begin(), arrVal.End()); + vec.push_back(0); + return; + } + + if (stop < size-1) + arrVal.Erase(arrVal.Begin() + stop + 1, arrVal.Begin() + size); + if (start > 0) + arrVal.Erase(arrVal.Begin(), arrVal.Begin() + start); + + vec.push_back(arrVal.Size()); +} + +JsonUtilCode dom_array_trim(JDocument *doc, const char *path, int64_t start, int64_t stop, + jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) return rc; + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path: + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no array value is selected + if (!has_array_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + } + + for (auto &v : selector.getUniqueResultSet()) { + if (v.first->IsArray()) { + internal_array_trim(*v.first, start, stop, vec); + } else { + vec.push_back(SIZE_MAX); // indicates non-array value + } + } + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_clear(JDocument *doc, const char *path, size_t &elements_cleared) { + elements_cleared = 0; + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + if (rc != JSONUTIL_SUCCESS) return rc; + + for (auto &v : selector.getUniqueResultSet()) { + if (v.first->IsArray()) { + if (!v.first->Empty()) { + v.first->Erase(v.first->Begin(), v.first->End()); + elements_cleared++; + } + } else if (v.first->IsObject()) { + if (!v.first->ObjectEmpty()) { + v.first->RemoveAllMembers(); + elements_cleared++; + } + } else if (v.first->IsBool()) { + if (v.first->IsTrue()) { + v.first->SetBool(false); + elements_cleared++; + } + } else if (v.first->IsString()) { + if (v.first->GetStringLength() > 0) { + v.first->SetString(""); + elements_cleared++; + } + } else if (v.first->IsInt()) { + if (v.first->GetInt() != 0) { + v.first->SetInt(0); + elements_cleared++; + } + } else if (v.first->IsInt64()) { + if (v.first->GetInt64() !=0) { + v.first->SetInt64(0); + elements_cleared++; + } + } else if (v.first->IsUint()) { + if (v.first->GetUint() != 0) { + v.first->SetUint(0); + elements_cleared++; + } + } else if (v.first->IsUint64()) { + if (v.first->GetUint64() != 0) { + v.first->SetUint64(0); + elements_cleared++; + } + } else if (v.first->IsDouble()) { + if (v.first->GetDouble() < 0.0 || v.first->GetDouble() > 0.0) { + v.first->SetDouble("0.0", 3, allocator); + elements_cleared++; + } + } + } + return JSONUTIL_SUCCESS; +} + +STATIC void internal_array_index_of(const JValue &arrVal, const JValue &inputVal, int64_t start, int64_t stop, + jsn::vector &vec) { + int64_t size = static_cast(arrVal.Size()); + if (size == 0) { + vec.push_back(-1); + return; + } + + // if stop == 0 or -1, the last element is included. + if (stop == 0 || stop == -1) stop = size; + + // if stop > size, set it to size. + if (stop > size) stop = size; + + if (start > stop) { + vec.push_back(-1); + return; + } + + for (int64_t i=start; i < stop; i++) { + if (arrVal[i] == inputVal) { + vec.push_back(i); + return; + } + } + + vec.push_back(-1); // not found +} + +JsonUtilCode dom_array_index_of(JDocument *doc, const char *path, const char *scalar_val, + const size_t scalar_val_len, int64_t start, int64_t stop, + jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + jsn::vector values; + selector.getSelectedValues(values); + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (values.empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + // return WRONGTYPE error if no array value is selected + if (!has_array_value(values)) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + } + + // if start < 0, set it to 0. + if (start < 0) start = 0; + + // verify the input value is valid JSON + JParser inputVal; + if (inputVal.Parse(scalar_val, scalar_val_len).HasParseError()) return inputVal.GetParseErrorCode(); + + for (auto &v : values) { + if (v->IsArray()) { + internal_array_index_of(*v, inputVal.GetJValue(), start, stop, vec); + } else { + vec.push_back(INT64_MAX); // indicates non-array value + } + } + return JSONUTIL_SUCCESS; +} + +JsonUtilCode dom_value_type(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (selector.getResultSet().empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + } + + // JSON type name returned to client for command JSON.TYPE + static const char *TYPE_NAMES[] = {"null", "boolean", "string", "number", "integer", "object", "array"}; + + for (auto &v : selector.getResultSet()) { + switch (v.first->GetType()) { + case rapidjson::kNullType: + vec.push_back(std::move(jsn::string(TYPE_NAMES[0]))); + break; + case rapidjson::kTrueType: + case rapidjson::kFalseType: + vec.push_back(std::move(jsn::string(TYPE_NAMES[1]))); + break; + case rapidjson::kStringType: + vec.push_back(std::move(jsn::string(TYPE_NAMES[2]))); + break; + case rapidjson::kNumberType: { + if (v.first->IsDouble()) + vec.push_back(std::move(jsn::string(TYPE_NAMES[3]))); + else + vec.push_back(std::move(jsn::string(TYPE_NAMES[4]))); + break; + } + case rapidjson::kObjectType: + vec.push_back(std::move(jsn::string(TYPE_NAMES[5]))); + break; + case rapidjson::kArrayType: + vec.push_back(std::move(jsn::string(TYPE_NAMES[6]))); + break; + default: + ValkeyModule_Assert(false); + break; + } + } + return JSONUTIL_SUCCESS; +} + +STATIC void dom_reply_with_resp_internal(ValkeyModuleCtx *ctx, const JValue& val) { + switch (val.GetType()) { + case rapidjson::kObjectType: { + ValkeyModule_ReplyWithArray(ctx, VALKEYMODULE_POSTPONED_ARRAY_LEN); + ValkeyModule_ReplyWithSimpleString(ctx, "{"); + long len = 1; + for (auto &m : val.GetObject()) { + ValkeyModule_ReplyWithArray(ctx, 2); + ValkeyModule_ReplyWithStringBuffer(ctx, m.name.GetString(), m.name.GetStringLength()); + dom_reply_with_resp_internal(ctx, m.value); + len++; + } + ValkeyModule_ReplySetArrayLength(ctx, len); + break; + } + case rapidjson::kArrayType: { + ValkeyModule_ReplyWithArray(ctx, VALKEYMODULE_POSTPONED_ARRAY_LEN); + ValkeyModule_ReplyWithSimpleString(ctx, "["); + for (auto &m : val.GetArray()) { + dom_reply_with_resp_internal(ctx, m); + } + ValkeyModule_ReplySetArrayLength(ctx, val.Size() + 1); + break; + } + case rapidjson::kNullType: + ValkeyModule_ReplyWithNull(ctx); + break; + case rapidjson::kTrueType: + ValkeyModule_ReplyWithSimpleString(ctx, "true"); + break; + case rapidjson::kFalseType: + ValkeyModule_ReplyWithSimpleString(ctx, "false"); + break; + case rapidjson::kNumberType: { + if (val.IsInt()) { + ValkeyModule_ReplyWithLongLong(ctx, val.GetInt()); + } else if (val.IsInt64()) { + ValkeyModule_ReplyWithLongLong(ctx, val.GetInt64()); + } else if (val.IsUint()) { + ValkeyModule_ReplyWithLongLong(ctx, val.GetUint()); + } else if (val.IsUint64()) { + ValkeyModule_ReplyWithLongLong(ctx, val.GetUint64()); + } else { + ValkeyModule_Assert(val.IsDouble()); + char str[BUF_SIZE_DOUBLE_RAPID_JSON]; + size_t len = jsonutil_double_to_string_rapidjson(val.GetDouble(), str, sizeof(str)); + ValkeyModule_ReplyWithStringBuffer(ctx, str, len); + } + break; + } + case rapidjson::kStringType: + ValkeyModule_ReplyWithStringBuffer(ctx, val.GetString(), val.GetStringLength()); + break; + default: + ValkeyModule_Assert(false); + break; + } +} + +JsonUtilCode dom_reply_with_resp(ValkeyModuleCtx *ctx, JDocument *doc, const char *path) { + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + if (selector.getResultSet().empty()) { + if (!selector.isV2Path) { + // Legacy path, return NONEXISTENT + return JSONUTIL_JSON_PATH_NOT_EXIST; + } else { + // JSONPath, return empty array + ValkeyModule_ReplyWithEmptyArray(ctx); + return JSONUTIL_SUCCESS; + } + } + + if (selector.isV2Path) ValkeyModule_ReplyWithArray(ctx, selector.getResultSet().size()); + + for (auto &v : selector.getResultSet()) { + dom_reply_with_resp_internal(ctx, *v.first); + } + return JSONUTIL_SUCCESS; +} + +STATIC size_t mem_size_internal(const JValue& v) { + size_t size = sizeof(v); // data structure size + if (v.IsString()) { + size += v.IsShortString() ? 0 : v.GetStringLength(); // add scalar string value's length + } else if (v.IsDouble()) { + size += v.IsShortDouble() ? 0 : v.GetDoubleStringLength(); + } else if (v.IsObject()) { + for (auto m = v.MemberBegin(); m != v.MemberEnd(); ++m) { + size += m.NodeSize() - sizeof(m->value); // Overhead (not including the value, which gets added below) + size += m->name.GetStringLength(); // add key's length + size += mem_size_internal(m->value); // add value's size + } + } else if (v.IsArray()) { + for (auto &m : v.GetArray()) + size += mem_size_internal(m); // add member's size + } + return size; +} + +JsonUtilCode dom_mem_size(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path, + bool default_path) { + vec.clear(); + // Optimization: + // The size of the whole document should be obtained from the meta data attached to document object. + if (jsonutil_is_root_path(path) && default_path) { + vec.push_back(dom_get_doc_size(doc)); + is_v2_path = !strcmp(path, "$"); + return JSONUTIL_SUCCESS; + } + + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (selector.getResultSet().empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + } + + for (auto &v : selector.getResultSet()) { + vec.push_back(mem_size_internal(*v.first)); + } + return JSONUTIL_SUCCESS; +} + +STATIC size_t num_fields_internal(JValue& v) { + size_t num_fields = 1; + if (v.IsObject()) { + for (auto &m : v.GetObject()) + num_fields += num_fields_internal(m.value); + } else if (v.IsArray()) { + for (auto &m : v.GetArray()) + num_fields += num_fields_internal(m); + } + return num_fields; +} + +/* + * If the top-level JSON value is a container (object or array), we want to return number of fields IN the container, + * not to count the container itself. For a nested container object, we want to count the container itself. + * e.g., { "address": { "street": "21 2nd Street", "city": "New York", "state": "NY", "zipcode": "10021-3100" } }. + * If we are counting number of fields in the root doc, the address field is counted. So, there are 5 fields. + * But If we are counting number of fields for the "address" value, the answer is 4, excluding address field itself. + */ +JsonUtilCode dom_num_fields(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path) { + vec.clear(); + Selector selector; + JsonUtilCode rc = selector.getValues(doc->GetJValue(), path); + is_v2_path = selector.isV2Path; + if (rc != JSONUTIL_SUCCESS) { + if (selector.isLegacyJsonPathSyntax()) return rc; + // For v2 path, return error code only if it's a syntax error. + if (selector.isSyntaxError(rc)) return rc; + } + + // Legacy path + if (!is_v2_path) { + // return NONEXISTENT error if no value is selected + if (selector.getResultSet().empty()) return JSONUTIL_JSON_PATH_NOT_EXIST; + } + + for (auto &v : selector.getResultSet()) { + size_t count = num_fields_internal(*v.first); + if (v.first->IsObject() || v.first->IsArray()) + count--; // exclude the container itself + vec.push_back(count); + } + return JSONUTIL_SUCCESS; +} + +STATIC void find_path_depth_internal(JValue& v, size_t d, size_t *max_depth) { + *max_depth = std::max(d, *max_depth); + if (v.IsObject()) { + for (auto &m : v.GetObject()) + find_path_depth_internal(m.value, d+1, max_depth); + } else if (v.IsArray()) { + for (auto &m : v.GetArray()) + find_path_depth_internal(m, d+1, max_depth); + } +} + +void dom_path_depth(JDocument *doc, size_t *depth) { + *depth = 0; + find_path_depth_internal(doc->GetJValue(), 0, depth); +} + +/* + * Make a copy of this document + */ +JDocument *dom_copy(const JDocument *src) { + int64_t begin_val = jsonstats_begin_track_mem(); + + JDocument *dst = create_doc(); + dst->CopyFrom(*src, allocator); + + int64_t delta = jsonstats_end_track_mem(begin_val); + ValkeyModule_Assert(delta > 0); + dom_set_doc_size(dst, static_cast(delta)); + + return dst; +} + +/* + * RDB File Format. + * + * Each JValue in RDB file format has a type code followed by type-specific data + */ +enum meta_codes { + JSON_METACODE_NULL = 0x01, // Nothing follows + JSON_METACODE_STRING = 0x02, // Followed by the string + JSON_METACODE_DOUBLE = 0x04, // Followed by the double + JSON_METACODE_INTEGER = 0x08, // Coded as a 64-bit Signed Integer + JSON_METACODE_BOOLEAN = 0x10, // Coded as the string '1' or '0' + JSON_METACODE_OBJECT = 0x20, // Followed by a member count, and then N "pairs" + JSON_METACODE_ARRAY = 0x40, // Followed by a element count and then n JValue elements + JSON_METACODE_PAIR = 0x80 // Codes an object Memory, a string(member name) and a JValue +}; + +// +// save a JValue, recurse as required for object and array +// +STATIC void store_JValue(ValkeyModuleIO *rdb, const JValue *val) { + if (val->IsNull()) { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_NULL); + } else if (val->IsString()) { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_STRING); + ValkeyModule_SaveStringBuffer(rdb, val->GetString(), val->GetStringLength()); + } else if (val->IsNumber()) { + if (val->IsDouble()) { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_DOUBLE); + ValkeyModule_SaveDouble(rdb, val->GetDouble()); + } else { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_INTEGER); + if (val->IsInt64() || val->IsInt()) { + ValkeyModule_SaveSigned(rdb, val->GetInt64()); + } else { + // rdb format doesn't understand unsigned, fail on numbers that aren't handled correctly + ValkeyModule_Assert(val->GetUint64() < static_cast(1L << 63)); + ValkeyModule_SaveUnsigned(rdb, val->GetUint64()); + } + } + } else if (val->IsFalse()) { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_BOOLEAN); + ValkeyModule_SaveStringBuffer(rdb, "0", 1); + } else if (val->IsTrue()) { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_BOOLEAN); + ValkeyModule_SaveStringBuffer(rdb, "1", 1); + } else if (val->IsObject()) { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_OBJECT); + ValkeyModule_SaveUnsigned(rdb, val->MemberCount()); + for (auto m = val->MemberBegin(); m != val->MemberEnd(); ++m) { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_PAIR); + ValkeyModule_SaveStringBuffer(rdb, m->name.GetString(), m->name.GetStringLength()); + store_JValue(rdb, &m->value); + } + } else if (val->IsArray()) { + ValkeyModule_SaveUnsigned(rdb, JSON_METACODE_ARRAY); + ValkeyModule_SaveUnsigned(rdb, val->Size()); + for (size_t i = 0; i < val->Size(); ++i) { + store_JValue(rdb, &(*val)[i]); + } + } else { + ValkeyModule_Assert(false); + } +} + +void dom_save(const JDocument *doc, ValkeyModuleIO *rdb, int encver) { + switch (encver) { + case 3: { + rapidjson::StringBuffer oss; + serialize_value(*(doc), 0, nullptr, oss); + ValkeyModule_SaveStringBuffer(rdb, oss.GetString(), oss.GetLength()); + break; + } + case 0: + store_JValue(rdb, doc); + break; + default: + ValkeyModule_Assert(0); + break; + } +} + +// Helper function, read string into a JValue +STATIC JValue readStringAsJValue(ValkeyModuleIO *rdb) { + // The modern take is that we have doubles as strings + size_t str_len; + char *str = ValkeyModule_LoadStringBuffer(rdb, &str_len); + if (str) { + JValue v(str, str_len, allocator); + ValkeyModule_Free(str); + return v; + } else { + ValkeyModule_LogIOError(rdb, "error", "Unable to read string or double"); + return JValue(); + } +} + +// Helper function, read legacy double into a new string double JValue +STATIC JValue readLegacyDoubleAsJValue(ValkeyModuleIO *rdb) { + double d = ValkeyModule_LoadDouble(rdb); + char str[BUF_SIZE_DOUBLE_JSON]; + size_t str_len = jsonutil_double_to_string(d, str, sizeof(str)); + if (str) { + JValue v(str, str_len, allocator, false, /* isdouble */ true); + return v; + } else { + ValkeyModule_LogIOError(rdb, "error", "Unable to read legacy double"); + return JValue(); + } +} + +/* + * One instance of this is passed to all recursive invokations of rdbLoadJValue + */ +typedef struct load_params { + ValkeyModuleIO *rdb; + unsigned nestLevel; + JsonUtilCode status; +} load_params; + +JValue rdbLoadJValue(load_params *params) { + uint64_t code = ValkeyModule_LoadUnsigned(params->rdb); + switch (code) { + case JSON_METACODE_NULL: + return JValue(); + case JSON_METACODE_STRING: + return readStringAsJValue(params->rdb); + case JSON_METACODE_DOUBLE: + return readLegacyDoubleAsJValue(params->rdb); + case JSON_METACODE_INTEGER: + return JValue(ValkeyModule_LoadSigned(params->rdb)); + case JSON_METACODE_BOOLEAN: { + size_t strlen; + char *s = ValkeyModule_LoadStringBuffer(params->rdb, &strlen); + char c = (s && strlen == 1) ? *s : 0; + ValkeyModule_Free(s); + switch (c) { + case '1': return JValue(true); + case '0': return JValue(false); + default: + params->status = JSONUTIL_INVALID_RDB_FORMAT; + ValkeyModule_LogIOError(params->rdb, "error", "invalid boolean format"); + return JValue(); + } + } + case JSON_METACODE_OBJECT: { + uint64_t members = ValkeyModule_LoadUnsigned(params->rdb); + JValue obj; + obj.SetObject(); + if (params->nestLevel >= json_get_max_path_limit()) { + ValkeyModule_LogIOError(params->rdb, "error", "document path limit exceeded"); + params->status = JSONUTIL_DOCUMENT_PATH_LIMIT_EXCEEDED; + return JValue(); + } + params->nestLevel++; + while (members--) { + uint64_t paircode = ValkeyModule_LoadUnsigned(params->rdb); + if (paircode != JSON_METACODE_PAIR) { + params->status = JSONUTIL_INVALID_RDB_FORMAT; + ValkeyModule_LogIOError(params->rdb, "error", "Invalid pair code"); + params->nestLevel--; + return JValue(); + } + JValue key = readStringAsJValue(params->rdb); + JValue value = rdbLoadJValue(params); + obj.AddMember(key, value, allocator); + } + params->nestLevel--; + return obj; + } + case JSON_METACODE_ARRAY: { + uint64_t length = ValkeyModule_LoadUnsigned(params->rdb); + JValue array; + array.SetArray(); + array.Reserve(length, allocator); + if (params->nestLevel >= json_get_max_path_limit()) { + params->status = JSONUTIL_DOCUMENT_PATH_LIMIT_EXCEEDED; + ValkeyModule_LogIOError(params->rdb, "error", "document path limit exceeded"); + return JValue(); + } + params->nestLevel++; + while (length--) { + array.PushBack(rdbLoadJValue(params), allocator); + } + params->nestLevel--; + return array; + } + default: + ValkeyModule_LogIOError(params->rdb, "error", "Invalid metadata code %lx", code); + params->status = JSONUTIL_INVALID_RDB_FORMAT; + return JValue(); + } +} + +JsonUtilCode dom_load(JDocument **doc, ValkeyModuleIO *ctx, int encver) { + *doc = nullptr; + ValkeyModule_Log(nullptr, "debug", "Begin dom_load, encver:%d", encver); + switch (encver) { + case 3: { + // + // New encoding, data is stored as wire-format JSON + // + size_t json_len; + char *json = ValkeyModule_LoadStringBuffer(ctx, &json_len); + if (!json) return JSONUTIL_INVALID_RDB_FORMAT; + JsonUtilCode rc = dom_parse(nullptr, json, json_len, doc); + ValkeyModule_Free(json); + return rc; + } + case 0: { + // + // Encoding Version 0, Data is stored JSON node by node. + // + load_params params; + params.rdb = ctx; + params.nestLevel = 0; + params.status = JSONUTIL_SUCCESS; + JValue loadedValue = rdbLoadJValue(¶ms); + if (params.status == JSONUTIL_SUCCESS) { + *doc = create_doc(); + (*doc)->SetJValue(loadedValue); + } + return params.status; + } + default: + ValkeyModule_Log(nullptr, "warning", "JSON: Unrecognized rdb encoding level %d", encver); + return JSONUTIL_INVALID_RDB_FORMAT; + } +} + +// +// Compute Digest +// +STATIC void compute_digest(ValkeyModuleDigest *ctx, const JValue& v) { + switch (v.GetType()) { + case rapidjson::Type::kNullType: + ValkeyModule_DigestAddLongLong(ctx, -1); + ValkeyModule_DigestEndSequence(ctx); + break; + case rapidjson::Type::kFalseType: + ValkeyModule_DigestAddLongLong(ctx, 0); + ValkeyModule_DigestEndSequence(ctx); + break; + case rapidjson::Type::kTrueType: + ValkeyModule_DigestAddLongLong(ctx, 1); + ValkeyModule_DigestEndSequence(ctx); + break; + case rapidjson::Type::kArrayType: + ValkeyModule_DigestAddLongLong(ctx, v.Size()); + ValkeyModule_DigestEndSequence(ctx); + for (size_t i = 0; i < v.Size(); ++i) { + compute_digest(ctx, v[i]); + } + break; + case rapidjson::Type::kNumberType: + if (v.IsDouble()) { + double d = v.GetDouble(); + int64_t bits; + memcpy(&bits, &d, sizeof(bits)); + ValkeyModule_DigestAddLongLong(ctx, bits); + } else if (v.IsUint64()) { + uint64_t ui = v.GetUint64(); + int64_t bits; + memcpy(&bits, &ui, sizeof(bits)); + ValkeyModule_DigestAddLongLong(ctx, bits); + } else { + ValkeyModule_DigestAddLongLong(ctx, v.GetInt64()); + } + ValkeyModule_DigestEndSequence(ctx); + break; + case rapidjson::Type::kObjectType: + ValkeyModule_DigestAddLongLong(ctx, v.MemberCount()); + ValkeyModule_DigestEndSequence(ctx); + for (auto m = v.MemberBegin(); m != v.MemberEnd(); ++m) { + const char *b = m->name.GetString(); + ValkeyModule_DigestAddStringBuffer(ctx, b, m->name.GetStringLength()); + compute_digest(ctx, m->value); + } + break; + case rapidjson::Type::kStringType: + ValkeyModule_DigestAddStringBuffer(ctx, v.GetString(), v.GetStringLength()); + ValkeyModule_DigestEndSequence(ctx); + break; + default: + ValkeyModule_Assert(false); + break; + } +} + +void dom_compute_digest(ValkeyModuleDigest *ctx, const JDocument *doc) { + compute_digest(ctx, doc->GetJValue()); +} + +void dom_dump_value(JValue &v) { + (void)v; + rapidjson::StringBuffer sb; + rapidjson::Writer writer(sb); + v.Accept(writer); + std::cout << "DEBUG DOM\tvalue: " << sb.GetString() << std::endl; +} + +/* ========================= functions consumed by unit tests ======================== */ + +jsn::string dom_get_string(JDocument *d) { + return d->GetString(); +} + diff --git a/src/json/dom.h b/src/json/dom.h new file mode 100644 index 0000000..63023e3 --- /dev/null +++ b/src/json/dom.h @@ -0,0 +1,545 @@ +/** + * DOM (Document Object Model) interface for JSON. + * The DOM module provides the following functions: + * 1. Parsing and validating an input JSON string buffer + * 2. Deserializing a JSON string into document object + * 2. Serializing a document object into JSON string + * 3. JSON CRUD operations: search, insert, update and delete + * + * Design Considerations: + * 1. Memory management: All memory management must be handled by the JSON allocator. + * - For memories allocated by our own code: + * All allocations and de-allocations must be done through: + * dom_alloc, dom_free, dom_realloc, dom_strdup, and dom_strndup + * - For objects allocated by RapidJSON library: + * Our solution is to use a custom memory allocator class as template, so as to instruct RapidJSON to the JSON + * allocator. The custom allocator works under the hood and is not exposed through this interface. + * 2. If a method returns to the caller a heap-allocated object, it must be documented. + * The caller is responsible for releasing the memory after consuming it. + * 3. Generally speaking, interface methods should not have Valkey module types such as ValkeyModuleCtx or + * ValkeyModuleString, because that would make unit tests hard to write, unless gmock classes have been developed. + * + * Coding Conventions & Best Practices: + * 1. Error handling: If a method may fail, the return type should be enum JsonUtilCode. + * 2. Output parameters: Output parameters should be placed at the end. Output parameters should be initialized at the + * beginning of the method. It should not require the caller to do any initialization before invoking the method. + * 3. Every public interface method declared in this file should be prefixed with "dom_". + */ + +#ifndef VALKEYJSONMODULE_JSON_DOM_H_ +#define VALKEYJSONMODULE_JSON_DOM_H_ + +#include +#include +#include "json/util.h" +#include "json/alloc.h" +#include "json/rapidjson_includes.h" + +class ReplyBuffer : public rapidjson::StringBuffer { + public: + ReplyBuffer(ValkeyModuleCtx *_ctx, bool) : rapidjson::StringBuffer(), ctx(_ctx) {} + ReplyBuffer() : rapidjson::StringBuffer(), ctx(nullptr) {} + void Initialize(ValkeyModuleCtx *_ctx, bool) { ctx = _ctx; } + void Reply() { ValkeyModule_ReplyWithStringBuffer(ctx, GetString(), GetLength()); } + + private: + ValkeyModuleCtx *ctx; +}; + +extern "C" { +#define VALKEYMODULE_EXPERIMENTAL_API +#include <./include/valkeymodule.h> +} + +/** + * This is a custom allocator for RapidJSON. It delegates memory management to the JSON allocator, so that + * memory allocated by the underlying RapidJSON library can be correctly reported to Valkey engine. The class + * is passed into rapidjson::GenericDocument and rapidjson::GenericValue as template, which is the way to tell + * RapidJSON to use a custom allocator. + */ +class RapidJsonAllocator { + public: + RapidJsonAllocator(); + + void *Malloc(size_t size) { + return dom_alloc(size); + } + + void *Realloc(void *originalPtr, size_t /*originalSize*/, size_t newSize) { + return dom_realloc(originalPtr, newSize); + } + + static void Free(void *ptr) RAPIDJSON_NOEXCEPT { + dom_free(ptr); + } + + bool operator==(const RapidJsonAllocator&) const RAPIDJSON_NOEXCEPT { + return true; + } + + bool operator!=(const RapidJsonAllocator&) const RAPIDJSON_NOEXCEPT { + return false; + } + + static const bool kNeedFree = true; +}; + +/** + * Now, wrap the RapidJSON objects (RJxxxxx) with our own objects (Jxxxxx). We wrap them to hide + * various RapidJSON oddities, simplify the syntax and to more explicitly express the semantics. + * For example, the details of allocators are largely hidden in the wrapped objects. + * + * We use four objects for all of our work. All of these objects descend from one of the three + * basic RapidJSON object types. + * + * RJValue (JValue): A JSON value. This is implemented as a node of a tree. This object doesn't + * differentiate between being the root of a tree or the root of a sub-tree. + * The full suite of RapidJSON value manipulation functions is available. Many + * of the RapidJSON value functions require an allocator. You must use the + * global "allocator". + * + * RJParser (JParser): This object contains a JValue into which you can deserialize a stream via the + * Parse/ParseStream member functions. The JValue created by the parsing routines + * is allocated using the dom_alloc/dom_free accounting. Typically, a JParser + * object is created on the run-time stack, filled with some serialized data + * and the then created JValue is moved into a destination location. + * + * JDocument This is the only object visible external to the dom layer. Externally, a + * JDocument is the Valkey data type for this module, i.e., the Valkey dictionary + * contains this pointer. Externally, this is an opaque data structure. + * Internally, it's implemented as a JValue plus a size and bucket number. The + * size is maintained as the memory size of the entire tree of JValues + * contained by the JDocument. + */ + +typedef rapidjson::GenericValue, RapidJsonAllocator> RJValue; +// A JValue is an RJValue without any local augmentation of change. +typedef RJValue JValue; + +extern RapidJsonAllocator allocator; + +/** + * A JDocument privately inherits from JValue. You must use the GetJValue() member + * to access the underlying JValue. This improves readability at the usage point. + */ +struct JDocument : JValue { + JDocument() : JValue(), size(0), bucket_id(0) {} + JValue& GetJValue() { return *this; } + const JValue& GetJValue() const { return *this; } + void SetJValue(JValue& rhs) { *static_cast(this) = rhs; } + size_t size:56; // Size of this document, maintained by the JSON layer, not here. + size_t bucket_id:8; // document histogram's bucket id. maintained by JSON layer, not here + void *operator new(size_t size) { return dom_alloc(size); } + void operator delete(void *ptr) { return dom_free(ptr); } + + private: + // + // Since JDocument objects are 1:1 with Valkey Keys, you can't ever have an array of them. + // + void *operator new[](size_t); // Not defined anywhere, causes link error if used + void operator delete[](void *); // Not defined anywhere, causes link error if used +}; + +// +// typedef the RapidJSON objects we care about, name them RJxxxxx for clarity +// +typedef rapidjson::GenericDocument, RapidJsonAllocator> RJParser; + +/** + * A JParser privately inherits from RJParser, which inherits from RJValue. You must use the + * GetJValue() member to access the post Parse value. + * + */ +struct JParser : RJParser { + JParser() : RJParser(&allocator), allocated_size(0) {} + // + // Make these inner routines publicly visible + // + using RJParser::ParseStream; + using RJParser::HasParseError; + using RJParser::GetMaxDepth; + // Access the contained JValue + JValue& GetJValue() { return *this; } + // + // Translate rapidJSON parse error code into JsonUtilCode. + // + JsonUtilCode GetParseErrorCode() { + switch (GetParseError()) { + case rapidjson::kParseErrorTermination: + return JSONUTIL_DOCUMENT_PATH_LIMIT_EXCEEDED; + case rapidjson::kParseErrorNone: + ValkeyModule_Assert(false); + /* Fall Through, but not really */ + default: + return JSONUTIL_JSON_PARSE_ERROR; + } + } + // + // When we parse an incoming string, we want to know how much member this will consume. + // So track it and retain it. + // + JParser& Parse(const char *json, size_t len); + JParser& Parse(const std::string_view &sv); + // + // This object holds a JValue which is the root of the parsed tree. The dom_alloc/dom_free + // machinery will track all memory allocations outside of this object, but the root JValue + // won't be covered by that. Because the JParser objects aren't created via new, they are + // created as stack variables. So we manually add that in, since it'll be charged to the + // destination when we actually move the value out. + // + size_t GetJValueSize() const { return allocated_size + sizeof(RJValue); } + + private: + size_t allocated_size; +}; + +/* Parse input JSON string, validate syntax, and return a document object. + * This method can handle an input string that is not NULL terminated. One use case is that + * we call ValkeyModule_LoadStringBuffer() to load JSON data from RDB, which returns a string that + * is not automatically NULL terminated. + * Also note that if the input string has NULL character '\0' in the middle, the string + * terminates at the NULL character. + * + * @param json_buf - pointer to binary string buffer, which may not be NULL terminated. + * @param buf_len - length of the input string buffer, which may not be NULL terminated. + * @param doc - OUTPUT param, pointer to document pointer. The caller is responsible for calling + * dom_free_doc(JDocument*) to free the memory after it's consumed. + * @return JSONUTIL_SUCCESS for success, other code for failure. + */ +JsonUtilCode dom_parse(ValkeyModuleCtx *ctx, const char *json_buf, const size_t buf_len, JDocument **doc); + +/* Free a document object */ +void dom_free_doc(JDocument *doc); + +/* Get document size */ +size_t dom_get_doc_size(const JDocument *doc); + +/* Set document size */ +void dom_set_doc_size(JDocument *doc, const size_t size); + +/* Get the document histogram's bucket ID */ +size_t dom_get_bucket_id(const JDocument *doc); + +/* Set the document histogram's bucket ID */ +void dom_set_bucket_id(JDocument *doc, const uint32_t bucket_id); + +/** + * Serialize a document into the given string stream. + * @param format - controls format of returned JSON string. + * if NULL, return JSON in compact format (no space, no indent, no newline). + * @param oss - output stream + */ +void dom_serialize(JDocument *doc, const PrintFormat *format, rapidjson::StringBuffer &oss); + +/** + * Serialize a value into the given string stream. + * @param format - controls format of returned JSON string. + * if NULL, return JSON in compact format (no space, no indent, no newline). + * @param oss - output stream + * @param json_len - OUTPUT param, *json_len is length of JSON string. + */ +void dom_serialize_value(const JValue &val, const PrintFormat *format, rapidjson::StringBuffer &oss); + +/** + * Get the root value of the document. + */ +JValue& dom_get_value(JDocument &doc); + +/* Set value at the path. + * @param json_path: path that is compliant to the JSON Path syntax. + * @param is_create_only - indicates to create a new value. + * @param is_update_only - indicates to update an existing value. + * @return JSONUTIL_SUCCESS for success, other code for failure. + */ +JsonUtilCode dom_set_value(ValkeyModuleCtx *ctx, JDocument *doc, const char *json_path, const char *new_val_json, + size_t new_val_len, const bool is_create_only = false, const bool is_update_only = false); + + +inline JsonUtilCode dom_set_value(ValkeyModuleCtx *ctx, JDocument *doc, const char *json_path, const char *new_val_json, + const bool is_create_only = false, const bool is_update_only = false) { + return dom_set_value(ctx, doc, json_path, new_val_json, strlen(new_val_json), is_create_only, is_update_only); +} + + + +/* Get JSON value at the path. + * If the path is invalid, the method will return error code JSONUTIL_INVALID_JSON_PATH. + * If the path does not exist, the method will return error code JSONUTIL_JSON_PATH_NOT_EXIST. + * + * @param format - controls format of returned JSON string. + * if NULL, return JSON in compact format (no space, no indent, no newline). + * @param oss - output stream + * @return JSONUTIL_SUCCESS for success, other code for failure. + */ +template +JsonUtilCode dom_get_value_as_str(JDocument *doc, const char *json_path, const PrintFormat *format, + T &oss, const bool update_stats = true); + +/* Get JSON values at multiple paths. Values at multiple paths will be aggregated into a JSON object, + * in which each path is a key. + * If the path is invalid, the method will return error code JSONUTIL_INVALID_JSON_PATH. + * If the path does not exist, the method will return error code JSONUTIL_JSON_PATH_NOT_EXIST. + * + * @param format - controls format of returned JSON string. + * if NULL, return JSON in compact format (no space, no indent, no newline). + * @param oss - output stream, the string represents an aggregated JSON object in which each path is a key. +* @return JSONUTIL_SUCCESS if success. Other codes indicate failure. +*/ +JsonUtilCode dom_get_values_as_str(JDocument *doc, const char **paths, const int num_paths, + PrintFormat *format, ReplyBuffer &oss, const bool update_stats = true); + +/** + * Delete JSON values at the given path. + * @num_vals_deleted number of values deleted + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_delete_value(JDocument *doc, const char *json_path, size_t &num_vals_deleted); + +/* Increment the JSON value by a given number. + * @param out_val OUTPUT parameter, pointer to new value + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_increment_by(JDocument *doc, const char *json_path, const JValue *incr_by, + jsn::vector &out_vals, bool &is_v2_path); + +/* Multiply the JSON value by a given number. + * @param out_val OUTPUT parameter, pointer to new value + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_multiply_by(JDocument *doc, const char *json_path, const JValue *mult_by, + jsn::vector &out_vals, bool &is_v2_path); + +/* Toggle a JSON boolean between true and false. + * @param vec OUTPUT parameter, a vector of integers. 0: false, 1: true, -1: N/A - the source value is not boolean. + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_toggle(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path); + + +/* Get the length of a JSON string value. + * @param vec OUTPUT parameter, a vector of string lengths + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_string_length(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path); + +/* Append a string to an existing JSON string value. + * @param vec OUTPUT parameter, a vector of new string lengths + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_string_append(JDocument *doc, const char *path, const char *json, const size_t json_len, + jsn::vector &vec, bool &is_v2_path); + +/** + * Get number of keys in the object at the given path. + * @param vec, OUTPUT parameter, a vector of object lengths + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_object_length(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path); + +/** + * Get keys in the object at the given path. + * @param vec OUTPUT parameter, a vector of vector of strings. In the first level vector, number of items is + * number of objects. In the second level vector, number of items is number keys in the object. + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_object_keys(JDocument *doc, const char *path, + jsn::vector> &vec, bool &is_v2_path); + +/** + * Get number of elements in the array at the given path. + * @param vec OUTPUT parameter, a vector of array lengths + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_array_length(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path); + +/** + * Append a list of values to the array at the given path. + * @param vec OUTPUT parameter, a vector of new array lengths + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_array_append(ValkeyModuleCtx *ctx, JDocument *doc, const char *path, + const char **jsons, size_t *json_lens, const size_t num_values, + jsn::vector &vec, bool &is_v2_path); + +/** + * Remove and return element from the index in the array. + * Out of range index is rounded to respective array boundaries. + * + * @param index - position in the array to start popping from, defaults -1 , which means the last element. + * Negative value means position from the last element. + * @param vec - OUTPUT parameter, a vector of string streams, each containing JSON string of the popped element + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_array_pop(JDocument *doc, const char *path, int64_t index, + jsn::vector &vec, bool &is_v2_path); + +/** + * Insert one or more json values into the array at path before the index. + * Inserting at index 0 prepends to the array. + * A negative index values in interpreted as starting from the end. + * The index must be in the array's range. + * + * @param vec OUTPUT parameter, a vector of new array lengths + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_array_insert(ValkeyModuleCtx *ctx, JDocument *doc, const char *path, int64_t index, + const char **jsons, size_t *json_lens, const size_t num_values, + jsn::vector &vec, bool &is_v2_path); + +/** + * Clear all the elements in an array or object. + * Return number of containers cleared. + * + * @param elements_cleared, OUTPUT parameter, number of elements cleared + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_clear(JDocument *doc, const char *path, size_t &elements_cleared); + +/* + * Trim an array so that it becomes subarray [start, end], both inclusive. + * If the array is empty, do nothing, return 0. + * If start < 0, set it to 0. + * If stop >= size, set it to size-1 + * If start >= size or start > stop, empty the array and return 0. + * + * @param start - start index, inclusive + * @param stop - stop index, inclusive + * @param vec, OUTPUT parameter, a vector of new array lengths + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_array_trim(JDocument *doc, const char *path, int64_t start, int64_t stop, + jsn::vector &vec, bool &is_v2_path); + + +/** + * Search for the first occurrence of a scalar JSON value in an array. + * Out of range errors are treated by rounding the index to the array's start and end. + * If start > stop, return -1 (not found). + * + * @param scalar_val - scalar value to search for + * @param start - start index, inclusive + * @param stop - stop index, exclusive. 0 or -1 means the last element is included. + * @param vec OUTPUT parameter, a vector of matching indexes. -1 means value not found. + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_array_index_of(JDocument *doc, const char *path, const char *scalar_val, + const size_t scalar_val_len, int64_t start, int64_t stop, + jsn::vector &vec, bool &is_v2_path); + +/* Get type of a JSON value. + * @param vec, OUTPUT parameter, a vector of value types. + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_value_type(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path); + +/* + * Return a JSON value in Valkey Serialization Protocol (RESP). + * If the value is container, the response is RESP array or nested array. + * + * JSON null is mapped to the RESP Null Bulk String. + * JSON boolean values are mapped to the respective RESP Simple Strings. + * JSON integer numbers are mapped to RESP Integers. + * JSON float or double numbers are mapped to RESP Bulk Strings. + * JSON Strings are mapped to RESP Bulk Strings. + * JSON Arrays are represented as RESP Arrays, where the first element is the simple string [, + * followed by the array's elements. + * JSON Objects are represented as RESP Arrays, where the first element is the simple string {, + * followed by key-value pairs, each of which is a RESP bulk string. + */ +JsonUtilCode dom_reply_with_resp(ValkeyModuleCtx *ctx, JDocument *doc, const char *path); + +/* Get memory size of a JSON value. + * @param vec, OUTPUT parameter, vector of memory size. + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_mem_size(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path, + bool default_path); + +/* Get number of fields in a JSON value. + * @param vec, OUTPUT parameter, vector of number of fields. + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +JsonUtilCode dom_num_fields(JDocument *doc, const char *path, jsn::vector &vec, bool &is_v2_path); + +/** + * Get max path depth of a document. + */ +void dom_path_depth(JDocument *doc, size_t *depth); + +/* Duplicate a JSON value. */ +JDocument *dom_copy(const JDocument *source); + +/* + * The dom_save and dom_load support the ability to save and load a single JSON document + * as a sequence of chunks of data. The advantage of chunking is that you never need a single + * buffer that's the size of the entire serialized object. Without chunking for a large JSON + * object you would need to reserve sufficient memory to serialize it en masse. + * + * dom_save synchronously serializes the object into a sequence of chunks, each chunk is + * delivered to a callback function for disposal. + * + * dom_load synchronously deserializes a series of chunks of data into a JSON object. + * It calls a callback which returns a chunk of data. That data is "owned" by dom_load until + * it passes the ownership of that chunk back to the caller via a callback. End of input can + * be signalled by returning a nullptr or a 0-length chunk of data. + * + * The usage of callbacks insulates the dom layer from any knowledge of the RDB format. That's + * exclusively done by the callback functions. + */ + +/* + * save a document into rdb format. + * @param source JSON document to be saved + * @param rdb rdb file context + */ +void dom_save(const JDocument *source, ValkeyModuleIO *rdb, int encver); + +/* + * load a document from rdb format + * @param dest output document pointer + * @param rdb rdb file context + * @param encver encoding version + */ +JsonUtilCode dom_load(JDocument **dest, ValkeyModuleIO *rdb, int encver); + +/* + * Implement DEBUG DIGEST + */ +void dom_compute_digest(ValkeyModuleDigest *ctx, const JDocument *doc); + +void dom_dump_value(JValue &v); + +// Unit test +jsn::string validate(const JDocument *); + +// +// JSON Validation functions +// + +// +// Validates that all pointers contained within this JValue are valid. +// +// true => All good. +// false => Not all good. +// +bool ValidateJValue(JValue &v); + +// +// This function dumps a JValue to an output stream (like an ostringstream) +// +// The structure of the object is dumped but no actual customer data is dumped. +// It's totally legal to call this function on a corrupted JValue, it'll avoid the bad mallocs +// +// Typical usage: +// +// std::ostringstream os; +// DumpRedactedJValue(os, v); // Don't specify level or index parameters, let them default +// ValkeyModule_Log(...., os.str()); +// +void DumpRedactedJValue(std::ostream& os, const JValue &v, size_t level = 0, int index = -1); +// +// Same as above, except targets the Valkey Log +// +void DumpRedactedJValue(const JValue &v, ValkeyModuleCtx *ctx = nullptr, const char *level = "debug"); + +#endif // VALKEYJSONMODULE_JSON_DOM_H_ \ No newline at end of file diff --git a/src/json/json.cc b/src/json/json.cc new file mode 100644 index 0000000..a8d7cca --- /dev/null +++ b/src/json/json.cc @@ -0,0 +1,3232 @@ +/** + * This file implements the Valkey Module interfaces. + * + * When the module is loaded, it does the following: + * 1. register the JSON module. + * 2. register callback methods such as rdb_load, rdb_save, free, etc. + * 3. register JSON data type. + * 4. register commands that are all prefixed with "JSON.". + * + * Design Considerations: + * 1. Command API: see API.md. + * 2. All JSON CRUD operations should be delegated to the DOM module. + * 3. Shared utility/helper code should reside in the UTIL module. + * 4. When invoking a DOM or UTIL method tha returns a heap-allocated object, the caller must release the memory + * after consuming it. + * 5. The first line of every command handler should be: "ValkeyModule_AutoMemory(ctx);". This is for enabling + * auto memory management for the command. + * 6. Every write command must support replication. Call "ValkeyModule_ReplicateVerbatim(ctx)" to tell Valkey to + * replicate the command. + * 7. Any write command that increases total memory utilization, should be created using "write deny-oom" flags. + * e.g., JSON.SET should be defined as "write deny-oom", while JSON.DEL does not need "deny-oom" as it can't + * increase the total memory. + * + * Coding Conventions & Best Practices: + * 1. Every command handler is named as Command_JsonXXX, where XXX is command name. + * 2. Every callback method is named as DocumentType_XXX, where XXX indicates callback interface method. + * 3. Majority of the code are command handler methods. Command arguments processing code are separated out into + * helper structs named as XXXCmdsArgs, and helper methods named as parseXXXCmdArgs, where XXX is command name. + */ + +#include "json/json.h" +#include "json/dom.h" +#include "json/rapidjson_includes.h" +#include "json/alloc.h" +#include "json/stats.h" +#include "json/memory.h" +#include "./include/valkeymodule.h" +#include +#include +#include + +#define MODULE_VERSION 10201 +#define MODULE_NAME "json" +#define DOCUMENT_TYPE_NAME "ReJSON-RL" +#define DOCUMENT_TYPE_ENCODING_VERSION 3 /* Currently support 1 or 3 */ + +#define ERRMSG_JSON_DOCUMENT_NOT_FOUND "NONEXISTENT JSON document is not found" +#define ERRMSG_NEW_VALKEY_KEY_PATH_NOT_ROOT "SYNTAXERR A new Valkey key's path must be root" +#define ERRMSG_CANNOT_DISABLE_MODULE_DUE_TO_OUTSTADING_DATA \ + "Cannot disable the module because there are outstanding document keys" + +#define STATIC /* decorator for static functions, remove so that backtrace symbols include these */ + +ValkeyModuleType *DocumentType; // Module type + +#define DEFAULT_MAX_DOCUMENT_SIZE (0) // Infinite +#define DEFAULT_DEFRAG_THRESHOLD (64 * 1024 * 1024) // 64MB +static size_t config_max_document_size = DEFAULT_MAX_DOCUMENT_SIZE; +static size_t config_defrag_threshold = DEFAULT_DEFRAG_THRESHOLD; + +#define DEFAULT_MAX_PATH_LIMIT 128 +static size_t config_max_path_limit = DEFAULT_MAX_PATH_LIMIT; + +#define DEFAULT_MAX_PARSER_RECURSION_DEPTH 200 +static size_t config_max_parser_recursion_depth = DEFAULT_MAX_PARSER_RECURSION_DEPTH; + +#define DEFAULT_MAX_RECURSIVE_DESCENT_TOKENS 20 +static size_t config_max_recursive_descent_tokens = DEFAULT_MAX_RECURSIVE_DESCENT_TOKENS; + +#define DEFAULT_MAX_QUERY_STRING_SIZE (128 * 1024) // 128KB +static size_t config_max_query_string_size = DEFAULT_MAX_QUERY_STRING_SIZE; + +KeyTable *keyTable = nullptr; +rapidjson::HashTableFactors rapidjson::hashTableFactors; +rapidjson::HashTableStats rapidjson::hashTableStats; + +bool enforce_rdb_version_check = false; + +extern size_t hash_function(const char *text, size_t length); + +size_t json_get_max_document_size() { + return config_max_document_size; +} + +size_t json_get_defrag_threshold() { + return config_defrag_threshold; +} + +size_t json_get_max_path_limit() { + return config_max_path_limit; +} + +size_t json_get_max_parser_recursion_depth() { + return config_max_parser_recursion_depth; +} + +size_t json_get_max_recursive_descent_tokens() { + return config_max_recursive_descent_tokens; +} + +size_t json_get_max_query_string_size() { + return config_max_query_string_size; +} + +#define CHECK_DOCUMENT_SIZE_LIMIT(ctx, new_doc_size) \ +if (!(ValkeyModule_GetContextFlags(ctx) & VALKEYMODULE_CTX_FLAGS_REPLICATED) && \ + json_get_max_document_size() > 0 && (new_doc_size > json_get_max_document_size())) { \ + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(JSONUTIL_DOCUMENT_SIZE_LIMIT_EXCEEDED)); \ +} + +// module config params +// NOTE: We save a copy of the value for each config param instead of pointer address, because the compiler does +// not allow casting const pointer to pointer. + +extern "C" { + #define VALKEYMODULE_EXPERIMENTAL_API +} + +// instrumentation configs +static int instrument_enabled_insert = 0; +static int instrument_enabled_update = 0; +static int instrument_enabled_delete = 0; +static int instrument_enabled_dump_doc_before = 0; +static int instrument_enabled_dump_doc_after = 0; +static int instrument_enabled_dump_value_before_delete = 0; + +bool json_is_instrument_enabled_insert() { + return instrument_enabled_insert == 1; +} +bool json_is_instrument_enabled_update() { + return instrument_enabled_update == 1; +} +bool json_is_instrument_enabled_delete() { + return instrument_enabled_delete == 1; +} +bool json_is_instrument_enabled_dump_doc_before() { + return instrument_enabled_dump_doc_before == 1; +} +bool json_is_instrument_enabled_dump_doc_after() { + return instrument_enabled_dump_doc_after == 1; +} +bool json_is_instrument_enabled_dump_value_before_delete() { + return instrument_enabled_dump_value_before_delete == 1; +} + +#define REGISTER_BOOL_CONFIG(ctx, name, default_val, privdata, getfn, setfn) { \ + if (ValkeyModule_RegisterBoolConfig(ctx, name, default_val, VALKEYMODULE_CONFIG_DEFAULT, \ + getfn, setfn, nullptr, privdata) == VALKEYMODULE_ERR) { \ + ValkeyModule_Log(ctx, "warning", "Failed to register module config \"%s\".", name); \ + return VALKEYMODULE_ERR; \ + } \ +} + +#define REGISTER_NUMERIC_CONFIG(ctx, name, default_val, flag, min, max, privdata, getfn, setfn) { \ + if (ValkeyModule_RegisterNumericConfig(ctx, name, default_val, flag, min, max, \ + getfn, setfn, nullptr, privdata) == VALKEYMODULE_ERR ) { \ + ValkeyModule_Log(ctx, "warning", "Failed to register module config \"%s\".", name); \ + return VALKEYMODULE_ERR; \ + } \ +} + +/* ============================== Helper Methods ============================== */ + +/* Verify that the document key exists and is a document key. + * @param key - OUTPUT parameter, pointer to ValkeyModuleKey pointer. + */ +STATIC JsonUtilCode verify_doc_key(ValkeyModuleCtx *ctx, ValkeyModuleString *rmKey, ValkeyModuleKey **key, + bool readOnly = false) { + *key = static_cast(ValkeyModule_OpenKey(ctx, rmKey, + readOnly? VALKEYMODULE_READ : VALKEYMODULE_READ | VALKEYMODULE_WRITE)); + if (ValkeyModule_KeyType(*key) == VALKEYMODULE_KEYTYPE_EMPTY) return JSONUTIL_DOCUMENT_KEY_NOT_FOUND; + if (ValkeyModule_ModuleTypeGetType(*key) != DocumentType) return JSONUTIL_NOT_A_DOCUMENT_KEY; + return JSONUTIL_SUCCESS; +} + +/* Fetch JSON at a single path. + * If the document key does not exist, the command will return null without an error. + * If the key is not a document key, the command will return error code JSONUTIL_NOT_A_DOCUMENT_KEY. + * If the JSON path is invalid or does not exist, the method will return error code JSONUTIL_INVALID_JSON_PATH. + * + * @param format - controls format of returned JSON string. + * if nullptr, return JSON in compact format (no space, no indent, no newline). + * @param oss - output stream + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +STATIC JsonUtilCode fetch_json(ValkeyModuleCtx *ctx, ValkeyModuleString *rmKey, const char *path, + PrintFormat *format, ReplyBuffer &oss) { + ValkeyModuleKey *key; + JsonUtilCode rc = verify_doc_key(ctx, rmKey, &key, true); + if (rc != JSONUTIL_SUCCESS) return rc; + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // fetch value at the path + return dom_get_value_as_str(doc, path, format, oss); +} + +/* Fetch JSON at multiple paths. Values at multiple paths will be aggregated into a JSON object, + * in which each path is a key. + * If the document key does not exist, the command will return null without an error. + * If the key is not a document key, the command will return error code JSONUTIL_NOT_A_DOCUMENT_KEY. + * If the JSON path is invalid or does not exist, the path's corresponding value will be JSON null. + * + * @param format - controls format of returned JSON string. + * if nullptr, return JSON in compact format (no space, no indent, no newline). + * @param oss - output stream, the string represents an aggregated JSON object in which each path is a key. + * @return JSONUTIL_SUCCESS if success. Other codes indicate failure. + */ +STATIC JsonUtilCode fetch_json_multi_paths(ValkeyModuleCtx *ctx, ValkeyModuleString *rmKey, const char **paths, + const int num_paths, PrintFormat *format, ReplyBuffer &oss) { + ValkeyModuleKey *key; + JsonUtilCode rc = verify_doc_key(ctx, rmKey, &key, true); + if (rc != JSONUTIL_SUCCESS) return rc; + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // fetch values at the path + return dom_get_values_as_str(doc, paths, num_paths, format, oss); +} + +/* ================= Helper Methods: Parsing Command Args ================== */ + +typedef struct { + ValkeyModuleString *key; // required + const char *path; // required + const char *json; // required + size_t json_len; + + // The following two booleans map to the optional arg "NX | XX". + // NX - set the key only if it does not exist. XX - set the key only if it exists. + bool is_create_only; // NX: set the key only if it does not exist + bool is_update_only; // XX: set the key only if it exists. +} SetCmdArgs; + +STATIC JsonUtilCode parseSetCmdArgs(ValkeyModuleString **argv, const int argc, SetCmdArgs *args) { + memset(args, 0, sizeof(SetCmdArgs)); + + // we need 4 or 5 arguments + if (argc != 4 && argc != 5) { + return JSONUTIL_WRONG_NUM_ARGS; + } + + args->key = argv[1]; + args->path = ValkeyModule_StringPtrLen(argv[2], nullptr); + args->json = ValkeyModule_StringPtrLen(argv[3], &args->json_len); + + if (argc == 5) { + const char *cond = ValkeyModule_StringPtrLen(argv[4], nullptr); + if (!strcasecmp(cond, "NX")) { + args->is_create_only = true; + } else if (!strcasecmp(cond, "XX")) { + args->is_update_only = true; + } else { + return JSONUTIL_COMMAND_SYNTAX_ERROR; + } + } + return JSONUTIL_SUCCESS; +} + +STATIC JsonUtilCode parseGetCmdArgs(ValkeyModuleString **argv, const int argc, ValkeyModuleString **key, + PrintFormat *format, ValkeyModuleString ***paths, int *num_paths) { + *key = nullptr; + memset(format, 0, sizeof(PrintFormat)); + *paths = nullptr; + *num_paths = 0; + + // we need at least 2 arguments + if (argc < 2) return JSONUTIL_WRONG_NUM_ARGS; + + *key = argv[1]; + int i = 2; // index of the next arg to process + int path_count = 0; + ValkeyModuleString **first_path = nullptr; + + // Process the remaining arguments and verify that all path arguments are positioned at the end. + // If an arg is not one of 4 options (NEWLINE/SPACE/INDENT/NOESCAPE), treat it as path argument, + // increment the path count, and continue. Whenever one of the 4 options is found, check the path count. + // If it is > 0, which means there is at least one path argument in the middle (not at the end), then + // exit the loop and return an error code. + // + // If the argument is one of NEWLINE/SPACE/INDENT but it is the last argument, return with error, because + // the argument requires a following argument. + while (i < argc) { + const char *token = ValkeyModule_StringPtrLen(argv[i], nullptr); + if (!strcasecmp(token, "NEWLINE")) { + if (i == argc - 1) return JSONUTIL_COMMAND_SYNTAX_ERROR; + format->newline = ValkeyModule_StringPtrLen(argv[++i], nullptr); + } else if (!strcasecmp(token, "SPACE")) { + if (i == argc - 1) return JSONUTIL_COMMAND_SYNTAX_ERROR; + format->space = ValkeyModule_StringPtrLen(argv[++i], nullptr); + } else if (!strcasecmp(token, "INDENT")) { + if (i == argc - 1) return JSONUTIL_COMMAND_SYNTAX_ERROR; + format->indent = ValkeyModule_StringPtrLen(argv[++i], nullptr); + } else if (!strcasecmp(token, "NOESCAPE")) { + // NOESCAPE is only for legacy compatibility and is noop. + } else { + // treat it as a path argument + path_count++; + if (first_path == nullptr) first_path = &argv[i]; + } + ++i; + } + + *paths = first_path; + *num_paths = path_count; + return JSONUTIL_SUCCESS; +} + +/* A helper method to parse a simple command, which has two arguments: + * key: required + * path: optional, defaults to root path + */ +STATIC JsonUtilCode parseSimpleCmdArgs(ValkeyModuleString **argv, const int argc, + ValkeyModuleString **key, const char **path) { + *key = nullptr; + *path = nullptr; + + // there should be either 2 or 3 arguments + if (argc != 2 && argc != 3) return JSONUTIL_WRONG_NUM_ARGS; + + *key = argv[1]; + if (argc == 3) { + *path = ValkeyModule_StringPtrLen(argv[2], nullptr); + } + if (*path == nullptr) *path = "."; // default to root path + return JSONUTIL_SUCCESS; +} + +STATIC JsonUtilCode parseNumIncrOrMultByCmdArgs(ValkeyModuleString **argv, const int argc, + ValkeyModuleString **key, const char **path, JValue *jvalue) { + *key = nullptr; + *path = nullptr; + + // we need exactly 4 arguments + if (argc != 4) return JSONUTIL_WRONG_NUM_ARGS; + + *key = argv[1]; + *path = ValkeyModule_StringPtrLen(argv[2], nullptr); + + JParser parser; + size_t arg_length; + const char *arg = ValkeyModule_StringPtrLen(argv[3], &arg_length); + if (parser.Parse(arg, arg_length).HasParseError() || !parser.GetJValue().IsNumber()) { + return JSONUTIL_VALUE_NOT_NUMBER; + } + *jvalue = parser.GetJValue(); + return JSONUTIL_SUCCESS; +} + +STATIC JsonUtilCode parseStrAppendCmdArgs(ValkeyModuleString **argv, const int argc, + ValkeyModuleString **key, const char **path, + const char**json, size_t *json_len) { + *key = nullptr; + *path = "."; // defaults to root path + *json = nullptr; + *json_len = 0; + + // we need exactly 3 or 4 arguments + if (argc != 3 && argc != 4) return JSONUTIL_WRONG_NUM_ARGS; + + *key = argv[1]; + if (argc == 3) { + *json = ValkeyModule_StringPtrLen(argv[2], json_len); + } else { + *path = ValkeyModule_StringPtrLen(argv[2], nullptr); + *json = ValkeyModule_StringPtrLen(argv[3], json_len); + } + return JSONUTIL_SUCCESS; +} + +typedef struct { + ValkeyModuleString *key; // required + const char *path; // required + long num_values; // number of values to append + const char **jsons; + size_t *json_len_arr; + size_t total_json_len; +} ArrAppendCmdArgs; + +STATIC JsonUtilCode parseArrAppendCmdArgs(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, const int argc, + ArrAppendCmdArgs *args) { + memset(args, 0, sizeof(ArrAppendCmdArgs)); + + // we need at least 4 arguments + if (argc < 4) return JSONUTIL_WRONG_NUM_ARGS; + + args->key = argv[1]; + args->path = ValkeyModule_StringPtrLen(argv[2], nullptr); + args->num_values = argc - 3; + args->jsons = static_cast(ValkeyModule_PoolAlloc(ctx, args->num_values * sizeof(const char *))); + args->json_len_arr = static_cast(ValkeyModule_PoolAlloc(ctx, args->num_values * sizeof(size_t))); + for (int i=0; i < args->num_values; i++) { + args->jsons[i] = ValkeyModule_StringPtrLen(argv[i+3], &(args->json_len_arr[i])); + args->total_json_len += args->json_len_arr[i]; + } + + return JSONUTIL_SUCCESS; +} + +STATIC JsonUtilCode parseArrPopCmdArgs(ValkeyModuleString **argv, const int argc, + ValkeyModuleString **key, const char **path, int64_t *index) { + *key = nullptr; + *path = "."; // defaults to the root path if not provided + *index = -1; // defaults to -1 if not provided, which means the last element. + + // there should be 2 or 3 or 4 arguments + if (argc != 2 && argc != 3 && argc != 4) return JSONUTIL_WRONG_NUM_ARGS; + + *key = argv[1]; + if (argc > 2) *path = ValkeyModule_StringPtrLen(argv[2], nullptr); + if (argc > 3) { + long long idx = 0; + if (ValkeyModule_StringToLongLong(argv[3], &idx) == VALKEYMODULE_ERR) return JSONUTIL_VALUE_NOT_INTEGER; + *index = idx; + } + return JSONUTIL_SUCCESS; +} + +typedef struct { + ValkeyModuleString *key; // required + const char *path; // required + int64_t index; // required + long num_values; // number of values to insert + const char **jsons; + size_t *json_len_arr; + size_t total_json_len; +} ArrInsertCmdArgs; + +STATIC JsonUtilCode parseArrInsertCmdArgs(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, const int argc, + ArrInsertCmdArgs *args) { + memset(args, 0, sizeof(ArrInsertCmdArgs)); + + // we need at least 5 arguments + if (argc < 5) return JSONUTIL_WRONG_NUM_ARGS; + + args->key = argv[1]; + args->path = ValkeyModule_StringPtrLen(argv[2], nullptr); + + long long index = 0; + if (ValkeyModule_StringToLongLong(argv[3], &index) == VALKEYMODULE_ERR) return JSONUTIL_VALUE_NOT_INTEGER; + args->index = index; + + args->num_values = argc - 4; + args->jsons = static_cast(ValkeyModule_PoolAlloc(ctx, args->num_values * sizeof(const char *))); + args->json_len_arr = static_cast(ValkeyModule_PoolAlloc(ctx, args->num_values * sizeof(size_t))); + for (int i=0; i < args->num_values; i++) { + args->jsons[i] = ValkeyModule_StringPtrLen(argv[i+4], &(args->json_len_arr[i])); + args->total_json_len += args->json_len_arr[i]; + } + return JSONUTIL_SUCCESS; +} + +/* + * A helper method to parse arguments for ArrayTrim command. + * @param start - start index, inclusive + * @param stop - stop index, inclusive + * @return + */ +STATIC JsonUtilCode parseArrTrimCmdArgs(ValkeyModuleString **argv, const int argc, + ValkeyModuleString **key, const char **path, int64_t *start, int64_t *stop) { + *key = nullptr; + *path = nullptr; + *start = 0; + *stop = 0; + + // we need exactly 5 arguments + if (argc != 5) return JSONUTIL_WRONG_NUM_ARGS; + + *key = argv[1]; + *path = ValkeyModule_StringPtrLen(argv[2], nullptr); + + long long start_idx = 0; + if (ValkeyModule_StringToLongLong(argv[3], &start_idx) == VALKEYMODULE_ERR) return JSONUTIL_VALUE_NOT_INTEGER; + *start = start_idx; + + long long stop_idx = 0; + if (ValkeyModule_StringToLongLong(argv[4], &stop_idx) == VALKEYMODULE_ERR) return JSONUTIL_VALUE_NOT_INTEGER; + *stop = stop_idx; + return JSONUTIL_SUCCESS; +} + +typedef struct { + ValkeyModuleString *key; // required + const char *path; // required + const char *scalar_val; // required, scalar json value + size_t scalar_val_len; + int64_t start; // optional, start index, inclusive, defaults to 0 + int64_t stop; // optional, stop index, exclusive, defaults to 0 +} ArrIndexCmdArgs; + +STATIC JsonUtilCode parseArrIndexCmdArgs(ValkeyModuleString **argv, const int argc, ArrIndexCmdArgs *args) { + memset(args, 0, sizeof(ArrIndexCmdArgs)); + + // there should be 4 or 5 or 6 arguments + if (argc != 4 && argc != 5 && argc != 6) return JSONUTIL_WRONG_NUM_ARGS; + + args->key = argv[1]; + args->path = ValkeyModule_StringPtrLen(argv[2], nullptr); + args->scalar_val = ValkeyModule_StringPtrLen(argv[3], &args->scalar_val_len); + + if (argc > 4) { + long long start = 0; + if (ValkeyModule_StringToLongLong(argv[4], &start) == VALKEYMODULE_ERR) return JSONUTIL_VALUE_NOT_INTEGER; + args->start = start; + } + + if (argc > 5) { + long long stop = 0; + if (ValkeyModule_StringToLongLong(argv[5], &stop) == VALKEYMODULE_ERR) return JSONUTIL_VALUE_NOT_INTEGER; + args->stop = stop; + } + return JSONUTIL_SUCCESS; +} + +STATIC JsonUtilCode parseMemoryOrFieldsSubCmdArgs(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, const int argc, + ValkeyModuleKey **key, const char **path, bool& default_path) { + *key = nullptr; + *path = "."; // defaults to the root path + default_path = true; + + // there should be either 3 or 4 arguments + if (argc != 3 && argc != 4) return JSONUTIL_WRONG_NUM_ARGS; + + JsonUtilCode rc = verify_doc_key(ctx, argv[2], key, true); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (argc > 3) { + *path = ValkeyModule_StringPtrLen(argv[3], nullptr); + default_path = false; + } + return JSONUTIL_SUCCESS; +} + +/* ============================= Command Handlers =========================== */ + +int Command_JsonSet(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + SetCmdArgs args; + JsonUtilCode rc = parseSetCmdArgs(argv, argc, &args); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) + return ValkeyModule_WrongArity(ctx); + else + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // verify valkey keys + ValkeyModuleKey *key = static_cast(ValkeyModule_OpenKey(ctx, args.key, + VALKEYMODULE_READ | VALKEYMODULE_WRITE)); + int type = ValkeyModule_KeyType(key); + if (type != VALKEYMODULE_KEYTYPE_EMPTY && ValkeyModule_ModuleTypeGetType(key) != DocumentType) { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(JSONUTIL_NOT_A_DOCUMENT_KEY)); + } + + bool is_new_valkey_key = (type == VALKEYMODULE_KEYTYPE_EMPTY); + bool is_root_path = jsonutil_is_root_path(args.path); + + if (is_new_valkey_key) { + if (!is_root_path) + return ValkeyModule_ReplyWithError(ctx, ERRMSG_NEW_VALKEY_KEY_PATH_NOT_ROOT); + if (args.is_update_only) + return ValkeyModule_ReplyWithNull(ctx); + } else { + if (is_root_path && args.is_create_only) + return ValkeyModule_ReplyWithNull(ctx); + } + + // begin tracking memory + int64_t begin_val = jsonstats_begin_track_mem(); + + if (is_root_path) { // root doc + // parse incoming JSON string + JDocument *doc; + rc = dom_parse(ctx, args.json, args.json_len, &doc); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, doc_size); + + if (json_is_instrument_enabled_insert() || json_is_instrument_enabled_update()) { + size_t len; + const char* key_cstr = ValkeyModule_StringPtrLen(args.key, &len); + std::size_t key_hash = std::hash{}(std::string_view(key_cstr, len)); + ValkeyModule_Log(ctx, "warning", + "Dump document structure before setting JSON key (hashed) %zu whole doc %p:", + key_hash, static_cast(doc)); + DumpRedactedJValue(doc->GetJValue(), nullptr, "warning"); + } + + // set Valkey key + ValkeyModule_ModuleTypeSetValue(key, DocumentType, doc); + + // update stats + jsonstats_update_stats_on_insert(doc, true, 0, doc_size, doc_size); + } else { + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + if (doc == nullptr) return ValkeyModule_ReplyWithError(ctx, ERRMSG_JSON_DOCUMENT_NOT_FOUND); + + size_t orig_doc_size = dom_get_doc_size(doc); + rc = dom_set_value(ctx, doc, args.path, args.json, args.json_len, args.is_create_only, args.is_update_only); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_NX_XX_CONDITION_NOT_SATISFIED) + return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t new_doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, new_doc_size); + + // update stats + jsonstats_update_stats_on_update(doc, orig_doc_size, new_doc_size, args.json_len); + } + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.set", args.key); + return ValkeyModule_ReplyWithSimpleString(ctx, "OK"); +} + +int Command_JsonGet(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + PrintFormat format; + ValkeyModuleString **paths; + int num_paths; + JsonUtilCode rc = parseGetCmdArgs(argv, argc, &key_str, &format, &paths, &num_paths); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) + return ValkeyModule_WrongArity(ctx); + else + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch json + ReplyBuffer oss(ctx, true); + if (num_paths == 0) { + // default to the root path + rc = fetch_json(ctx, key_str, ".", &format, oss); + } else if (num_paths == 1) { + const char *cstr_path = ValkeyModule_StringPtrLen(paths[0], nullptr); + rc = fetch_json(ctx, key_str, cstr_path, &format, oss); + } else { + const char **cstr_paths = static_cast(ValkeyModule_PoolAlloc(ctx, + num_paths * sizeof(const char*))); + int format_args_offset = 0; + for (int i = 0; i < num_paths; i++) { + const char *token = ValkeyModule_StringPtrLen(paths[i+format_args_offset], nullptr); + // no need to check on the first one, we already know it's pointing to the right place + bool look_for_formatting = i > 0; + + // we already know from parseGetCmdArgs that we're going to find another path eventually + while (look_for_formatting) { + look_for_formatting = false; + if (!strcasecmp(token, "NEWLINE") || !strcasecmp(token, "SPACE") || !strcasecmp(token, "INDENT")) { + format_args_offset += 2; + look_for_formatting = true; + } else if (!strcasecmp(token, "NOESCAPE")) { + format_args_offset++; + look_for_formatting = true; + } + if (look_for_formatting) { + token = ValkeyModule_StringPtrLen(paths[i+format_args_offset], nullptr); + } + } + cstr_paths[i] = ValkeyModule_StringPtrLen(paths[i+format_args_offset], nullptr); + } + rc = fetch_json_multi_paths(ctx, key_str, cstr_paths, num_paths, &format, oss); + } + + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) + return ValkeyModule_ReplyWithNull(ctx); + else + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // return JSON to client + oss.Reply(); + return VALKEYMODULE_OK; +} + +int Command_JsonMGet(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + // we need at least 3 arguments + if (argc < 3) return ValkeyModule_WrongArity(ctx); + + int num_keys = argc - 2; + const char *path = ValkeyModule_StringPtrLen(argv[argc-1], nullptr); + + // create a vector of string streams to store JSON per key + jsn::vector vec(num_keys); + for (int i=0; i < num_keys; i++) { + vec[i].Initialize(ctx, false); + JsonUtilCode rc = fetch_json(ctx, argv[i + 1], path, nullptr, vec[i]); + if (rc != JSONUTIL_SUCCESS && rc != JSONUTIL_DOCUMENT_KEY_NOT_FOUND && + rc != JSONUTIL_INVALID_JSON_PATH && rc != JSONUTIL_JSON_PATH_NOT_EXIST) { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + } + + // return array of bulk strings to client + ValkeyModule_ReplyWithArray(ctx, num_keys); + for (int i=0; i < num_keys; i++) { + if (vec[i].GetLength() == 0) { + ValkeyModule_ReplyWithNull(ctx); + } else { + vec[i].Reply(); + } + } + return VALKEYMODULE_OK; +} + +int Command_JsonDel(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) + return ValkeyModule_WrongArity(ctx); + else + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) { + // ignore non-existing keys + return ValkeyModule_ReplyWithLongLong(ctx, 0); + } else { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + } + + if (jsonutil_is_root_path(path)) { + if (json_is_instrument_enabled_delete()) { + size_t len; + const char* key_cstr = ValkeyModule_StringPtrLen(key_str, &len); + std::size_t key_hash = std::hash{}(std::string_view(key_cstr, len)); + ValkeyModule_Log(ctx, "warning", "deleting whole JSON key (hashed) %zu", key_hash); + } + + // delete the key from Valkey Dict + ValkeyModule_DeleteKey(key); + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.del", key_str); + + return ValkeyModule_ReplyWithLongLong(ctx, 1); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t orig_doc_size = dom_get_doc_size(doc); + + // begin tracking memory + int64_t begin_val = jsonstats_begin_track_mem(); + + // delete value at path + size_t num_vals_deleted; + rc = dom_delete_value(doc, path, num_vals_deleted); + + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_INVALID_JSON_PATH || rc == JSONUTIL_JSON_PATH_NOT_EXIST) { + // ignore invalid or non-existent path + return ValkeyModule_ReplyWithLongLong(ctx, 0); + } else { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + } + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t new_doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, new_doc_size); + + // update stats + jsonstats_update_stats_on_delete(doc, false, orig_doc_size, new_doc_size, abs(delta)); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.del", key_str); + return ValkeyModule_ReplyWithLongLong(ctx, num_vals_deleted); +} + +/** + * A helper method to send a reply to the client for JSON.NUMINCRBY or JSON.NUMMULTBY. + */ +STATIC void reply_numincrby_nummultby(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the last updated number value. + for (auto it = vec.rbegin(); it != vec.rend(); it++) { + if (!std::isnan(*it)) { // NaN indicates wrong object type + char buf[BUF_SIZE_DOUBLE_JSON]; + size_t len = jsonutil_double_to_string(*it, buf, sizeof(buf)); + ValkeyModule_ReplyWithStringBuffer(ctx, buf, len); + return; + } + } + // It's impossible to reach here, because the upstream method has verified there is at least one number value. + ValkeyModule_Assert(false); + } else { + // JSONPath: return serialized string of an array of values. + // If a value is NaN, its corresponding returned element is JSON null. + jsn::string s = "["; + for (uint i=0; i < vec.size(); i++) { + if (i > 0) s.append(","); + if (std::isnan(vec[i])) { + s.append("null"); + } else { + char double_to_string_buf[BUF_SIZE_DOUBLE_JSON]; + jsonutil_double_to_string(vec[i], double_to_string_buf, sizeof(double_to_string_buf)); + s.append(double_to_string_buf); + } + } + s.append("]"); + ValkeyModule_ReplyWithStringBuffer(ctx, s.c_str(), s.length()); + } +} + +int Command_JsonNumIncrBy(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JValue jvalue; + JsonUtilCode rc = parseNumIncrOrMultByCmdArgs(argv, argc, &key_str, &path, &jvalue); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // increment the value at path + jsn::vector vec; + bool is_v2_path; + rc = dom_increment_by(doc, path, &jvalue, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.numincrby", key_str); + + // convert the result to bulk string and send the reply to the client + reply_numincrby_nummultby(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +int Command_JsonNumMultBy(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JValue jvalue; + JsonUtilCode rc = parseNumIncrOrMultByCmdArgs(argv, argc, &key_str, &path, &jvalue); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // multiply the value at path + jsn::vector vec; + bool is_v2_path; + rc = dom_multiply_by(doc, path, &jvalue, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.nummultby", key_str); + + // convert the result to bulk string and send the reply to the client + reply_numincrby_nummultby(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +/** + * A helper method to send a reply to the client for JSON.STRLEN and JSON.OBJLEN. + */ +STATIC void reply_strlen_objlen(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the first value. + for (auto it = vec.begin(); it != vec.end(); it++) { + if (*it != SIZE_MAX) { // SIZE_MAX indicates wrong object type + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + return; + } + } + // It's impossible to reach here, because the upstream method has verified there is at least + // one string/object value. + ValkeyModule_Assert(false); + } else { + // JSONPath: return an array of lengths. + // If a value is SIZE_MAX, its corresponding element is null. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + if ((*it) == SIZE_MAX) { // SIZE_MAX indicates wrong object type + ValkeyModule_ReplyWithNull(ctx); + } else { + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + } + } + } +} + +int Command_JsonStrLen(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key, true); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // get string lengths + jsn::vector vec; + bool is_v2_path; + rc = dom_string_length(doc, path, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + reply_strlen_objlen(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +/** + * A helper method to send a reply to the client for JSON.STRAPPEND. + */ +STATIC void reply_strappend(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the last updated string's length. + for (auto it = vec.rbegin(); it != vec.rend(); it++) { + if (*it != SIZE_MAX) { // SIZE_MAX indicates wrong object type + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + return; + } + } + // It's impossible to reach here, because the upstream method has verified there is at least + // one string value. + ValkeyModule_Assert(false); + } else { + // JSONPath: return an array of lengths. + // If a value is SIZE_MAX, its corresponding element is null. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + if ((*it) == SIZE_MAX) { // SIZE_MAX indicates wrong object type + ValkeyModule_ReplyWithNull(ctx); + } else { + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + } + } + } +} + +int Command_JsonStrAppend(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + const char *json; + size_t json_len; + JsonUtilCode rc = parseStrAppendCmdArgs(argv, argc, &key_str, &path, &json, &json_len); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t orig_doc_size = dom_get_doc_size(doc); + CHECK_DOCUMENT_SIZE_LIMIT(ctx, orig_doc_size + json_len) + + // begin tracking memory + int64_t begin_val = jsonstats_begin_track_mem(); + + // do string append + jsn::vector vec; + bool is_v2_path; + rc = dom_string_append(doc, path, json, json_len, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t new_doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, new_doc_size); + + // update stats + jsonstats_update_stats_on_update(doc, orig_doc_size, new_doc_size, json_len); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.strappend", key_str); + + reply_strappend(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +/** + * A helper method to send a reply to the client for JSON.TOGGLE. + */ +STATIC void reply_toggle(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the first value. + for (auto it = vec.begin(); it != vec.end(); it++) { + if (*it != -1) { // -1 means the value is not boolean + // convert the result to string + const char *buf = (*it == 1? "true" : "false"); + ValkeyModule_ReplyWithStringBuffer(ctx, buf, strlen(buf)); + return; + } + } + // It's impossible to reach here, because the upstream method has verified there is at least + // one boolean value. + ValkeyModule_Assert(false); + } else { + // JSONPath: return an array of new values. + // 0 - false + // 1 - true + // -1 - the value is not boolean, corresponding return value is null. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + if ((*it) == -1) { + ValkeyModule_ReplyWithNull(ctx); + } else { + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + } + } + } +} + +int Command_JsonToggle(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) + return ValkeyModule_WrongArity(ctx); + else + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // toggle the boolean value at this path + jsn::vector vec; + bool is_v2_path; + rc = dom_toggle(doc, path, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.toggle", key_str); + + reply_toggle(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +int Command_JsonObjLen(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key, true); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // get object length + jsn::vector vec; + bool is_v2_path; + rc = dom_object_length(doc, path, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + reply_strlen_objlen(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +/** + * A helper method to send a reply to the client for JSON.OBJKEYS. + */ +STATIC void reply_objkeys(jsn::vector> &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return an array of keys. + // If there are multiple objects, return the keys of the first object. + if (vec.empty()) { + ValkeyModule_ReplyWithEmptyArray(ctx); + } else { + for (auto it = vec.begin(); it != vec.end(); it++) { + if (!it->empty()) { + ValkeyModule_ReplyWithArray(ctx, it->size()); + for (jsn::string &key : *it) { + ValkeyModule_ReplyWithStringBuffer(ctx, key.c_str(), key.length()); + } + return; + } + } + ValkeyModule_ReplyWithEmptyArray(ctx); + } + } else { + // JSONPath: return an array of array of keys. + // In the first level vector, number of items is number of objects. In the second level vector, number of + // items is number keys in the object. If an object has no keys, its corresponding return value is empty array. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + if (it->empty()) { + ValkeyModule_ReplyWithEmptyArray(ctx); + } else { + ValkeyModule_ReplyWithArray(ctx, it->size()); + for (jsn::string &key : *it) { + ValkeyModule_ReplyWithStringBuffer(ctx, key.c_str(), key.length()); + } + } + } + } +} + +int Command_JsonObjKeys(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key, true); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // get object keys + jsn::vector> vec; + bool is_v2_path; + rc = dom_object_keys(doc, path, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_INVALID_JSON_PATH || rc == JSONUTIL_JSON_PATH_NOT_EXIST) + return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + reply_objkeys(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +/** + * A helper method to send a reply to the client for some array commands. + */ +STATIC void reply_array_command(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the first value. + for (auto it = vec.begin(); it != vec.end(); it++) { + if (*it != SIZE_MAX) { // SIZE_MAX indicates wrong type + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + return; + } + } + // It's impossible to reach here, because the upstream method has verified there is at least + // one array value. + ValkeyModule_Assert(false); + } else { + // JSONPath: return an array of lengths. + // If a value is SIZE_MAX, its corresponding element is null. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + if ((*it) == SIZE_MAX) { // SIZE_MAX indicates wrong type + ValkeyModule_ReplyWithNull(ctx); + } else { + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + } + } + } +} + +int Command_JsonArrLen(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key, true); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // get array length + jsn::vector vec; + bool is_v2_path; + rc = dom_array_length(doc, path, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + reply_array_command(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +int Command_JsonArrAppend(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ArrAppendCmdArgs args; + JsonUtilCode rc = parseArrAppendCmdArgs(ctx, argv, argc, &args); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, args.key, &key); + if (rc != JSONUTIL_SUCCESS) { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t orig_doc_size = dom_get_doc_size(doc); + + // begin tracking memory + int64_t begin_val = jsonstats_begin_track_mem(); + + // do array append + jsn::vector vec; + bool is_v2_path; + rc = dom_array_append(ctx, doc, args.path, args.jsons, args.json_len_arr, args.num_values, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t new_doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, new_doc_size); + + // update stats + jsonstats_update_stats_on_update(doc, orig_doc_size, new_doc_size, args.total_json_len); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.arrappend", args.key); + + reply_array_command(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +/** + * A helper method to send a reply to the client for JSON.ARRPOP. + */ +STATIC void reply_arrpop(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the first value. + for (auto it = vec.begin(); it != vec.end(); it++) { + if (it->GetLength() != 0) { // emtpy indicates empty array or wrong type + ValkeyModule_ReplyWithStringBuffer(ctx, it->GetString(), it->GetLength()); + return; + } + } + ValkeyModule_ReplyWithNull(ctx); + } else { + // JSONPath: return an array of lengths. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + if (it->GetLength() == 0) { // emtpy indicates empty array or wrong type + ValkeyModule_ReplyWithNull(ctx); + } else { + ValkeyModule_ReplyWithStringBuffer(ctx, it->GetString(), it->GetLength()); + } + } + } +} + +int Command_JsonArrPop(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + int64_t index; + JsonUtilCode rc = parseArrPopCmdArgs(argv, argc, &key_str, &path, &index); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key); + if (rc != JSONUTIL_SUCCESS) { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t orig_doc_size = dom_get_doc_size(doc); + + // begin tracking memory + int64_t begin_val = jsonstats_begin_track_mem(); + + // do array pop + jsn::vector vec; + bool is_v2_path; + rc = dom_array_pop(doc, path, index, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_EMPTY_JSON_ARRAY) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t new_doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, new_doc_size); + + // update stats + jsonstats_update_stats_on_delete(doc, false, orig_doc_size, new_doc_size, abs(delta)); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.arrpop", key_str); + + reply_arrpop(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +int Command_JsonArrInsert(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ArrInsertCmdArgs args; + JsonUtilCode rc = parseArrInsertCmdArgs(ctx, argv, argc, &args); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, args.key, &key); + if (rc != JSONUTIL_SUCCESS) { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t orig_doc_size = dom_get_doc_size(doc); + + // begin tracking + int64_t begin_val = jsonstats_begin_track_mem(); + + // do array insert + jsn::vector vec; + bool is_v2_path; + rc = dom_array_insert(ctx, doc, args.path, args.index, args.jsons, args.json_len_arr, args.num_values, + vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t new_doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, new_doc_size); + + // update stats + jsonstats_update_stats_on_insert(doc, false, orig_doc_size, new_doc_size, args.total_json_len); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.arrinsert", args.key); + + reply_array_command(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +int Command_JsonArrTrim(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + int64_t start; + int64_t stop; + JsonUtilCode rc = parseArrTrimCmdArgs(argv, argc, &key_str, &path, &start, &stop); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key); + if (rc != JSONUTIL_SUCCESS) { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t orig_doc_size = dom_get_doc_size(doc); + + // begin tracking memory + int64_t begin_val = jsonstats_begin_track_mem(); + + // do array trim + jsn::vector vec; + bool is_v2_path; + rc = dom_array_trim(doc, path, start, stop, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t new_doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, new_doc_size); + + // update stats + jsonstats_update_stats_on_delete(doc, false, orig_doc_size, new_doc_size, abs(delta)); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.arrtrim", key_str); + + reply_array_command(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +int Command_JsonClear(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) + return ValkeyModule_WrongArity(ctx); + else + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t orig_doc_size = dom_get_doc_size(doc); + + // begin tracking memory + int64_t begin_val = jsonstats_begin_track_mem(); + + // do element clear + size_t elements_cleared; + rc = dom_clear(doc, path, elements_cleared); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + // end tracking memory + int64_t delta = jsonstats_end_track_mem(begin_val); + size_t new_doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, new_doc_size); + + // update stats + jsonstats_update_stats_on_delete(doc, false, orig_doc_size, new_doc_size, abs(delta)); + + // replicate the command + ValkeyModule_ReplicateVerbatim(ctx); + + ValkeyModule_NotifyKeyspaceEvent(ctx, VALKEYMODULE_NOTIFY_GENERIC, "json.clear", key_str); + return ValkeyModule_ReplyWithLongLong(ctx, static_cast(elements_cleared)); +} + +/** + * A helper method to send a reply to the client for JSON.ARRINDEX. + */ +STATIC void reply_arrindex(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the first value. + for (auto it = vec.begin(); it != vec.end(); it++) { + if (*it != INT64_MAX) { // INT64_MAX indicates wrong type + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + return; + } + } + // It's impossible to reach here, because the upstream method has verified there is at least + // one array value. + ValkeyModule_Assert(false); + } else { + // JSONPath: return an array of lengths. + // If a value is SIZE_MAX, its corresponding element is null. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + if ((*it) == INT64_MAX) { // INT64_MAX indicates wrong type + ValkeyModule_ReplyWithNull(ctx); + } else { + ValkeyModule_ReplyWithLongLong(ctx, static_cast(*it)); + } + } + } +} + +int Command_JsonArrIndex(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ArrIndexCmdArgs args; + JsonUtilCode rc = parseArrIndexCmdArgs(argv, argc, &args); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, args.key, &key, true); + if (rc != JSONUTIL_SUCCESS) { + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // do array index of + jsn::vector vec; + bool is_v2_path; + rc = dom_array_index_of(doc, args.path, args.scalar_val, args.scalar_val_len, + args.start, args.stop, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + reply_arrindex(vec, is_v2_path, ctx); + return VALKEYMODULE_OK; +} + +/** + * A helper method to send a reply to the client for JSON.TYPE. + */ +STATIC int reply_type(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the first value. + if (vec.empty()) { + // It's impossible to reach here, because the upstream method has verified there is at least one value. + ValkeyModule_Assert(false); + } else { + auto it = vec.begin(); + return ValkeyModule_ReplyWithSimpleString(ctx, it->c_str()); + } + } else { + // JSONPath: return an array of types. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + ValkeyModule_ReplyWithSimpleString(ctx, it->c_str()); + } + return VALKEYMODULE_OK; + } +} + +int Command_JsonType(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key, true); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // get type of the value + jsn::vector vec; + bool is_v2_path; + rc = dom_value_type(doc, path, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_INVALID_JSON_PATH || rc == JSONUTIL_JSON_PATH_NOT_EXIST) + return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + return reply_type(vec, is_v2_path, ctx); +} + +int Command_JsonResp(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + ValkeyModuleString *key_str; + const char *path; + JsonUtilCode rc = parseSimpleCmdArgs(argv, argc, &key_str, &path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + ValkeyModuleKey *key; + rc = verify_doc_key(ctx, key_str, &key, true); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // reply with RESP protocol + rc = dom_reply_with_resp(ctx, doc, path); + if (rc != JSONUTIL_SUCCESS) return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + + return VALKEYMODULE_OK; +} + +STATIC JsonUtilCode processMemorySubCmd(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, const int argc, + jsn::vector &vec, bool &is_v2_path) { + ValkeyModuleKey *key; + const char *path; + bool default_path; + JsonUtilCode rc = parseMemoryOrFieldsSubCmdArgs(ctx, argv, argc, &key, &path, default_path); + if (rc != JSONUTIL_SUCCESS) return rc; + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // compute memory size of the JSON element + return dom_mem_size(doc, path, vec, is_v2_path, default_path); +} + +STATIC JsonUtilCode processFieldsSubCmd(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, const int argc, + jsn::vector &vec, bool &is_v2_path) { + ValkeyModuleKey *key; + const char *path; + bool default_path; + JsonUtilCode rc = parseMemoryOrFieldsSubCmdArgs(ctx, argv, argc, &key, &path, default_path); + if (rc != JSONUTIL_SUCCESS) return rc; + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // get number of fields for the value + return dom_num_fields(doc, path, vec, is_v2_path); +} + +STATIC JsonUtilCode processDepthSubCmd(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, const int argc, size_t *depth) { + // there should be exactly 3 arguments + if (argc != 3) return JSONUTIL_WRONG_NUM_ARGS; + + ValkeyModuleKey *key; + JsonUtilCode rc = verify_doc_key(ctx, argv[2], &key, true); + if (rc != JSONUTIL_SUCCESS) return rc; + + // fetch doc object from Valkey dict + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + + // get max path depth of the doc + dom_path_depth(doc, depth); + return JSONUTIL_SUCCESS; +} + +struct MaxDepthKey { + MaxDepthKey() : max_depth(0), keyname() {} + size_t max_depth; + jsn::string keyname; +}; + +STATIC void scan_max_depth_key_callback(ValkeyModuleCtx *ctx, ValkeyModuleString *keyname, ValkeyModuleKey *key, + void *privdata) { + VALKEYMODULE_NOT_USED(ctx); + if (ValkeyModule_ModuleTypeGetType(key) == DocumentType) { + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t depth = 0; + dom_path_depth(doc, &depth); + MaxDepthKey *mdk = static_cast(privdata); + if (depth > mdk->max_depth) { + mdk->max_depth = depth; + const char *s = ValkeyModule_StringPtrLen(keyname, nullptr); + mdk->keyname = jsn::string(s); + } + } +} + +STATIC void processMaxDepthKeySubCmd(ValkeyModuleCtx *ctx, MaxDepthKey *mdk) { + // scan keys + ValkeyModuleScanCursor *cursor = ValkeyModule_ScanCursorCreate(); + while (ValkeyModule_Scan(ctx, cursor, scan_max_depth_key_callback, mdk)) {} + ValkeyModule_ScanCursorDestroy(cursor); +} + +struct MaxSizeKey { + MaxSizeKey() : max_size(0), keyname() {} + size_t max_size; + jsn::string keyname; +}; + +STATIC void scan_max_size_key_callback(ValkeyModuleCtx *ctx, ValkeyModuleString *keyname, ValkeyModuleKey *key, + void *privdata) { + VALKEYMODULE_NOT_USED(ctx); + if (ValkeyModule_ModuleTypeGetType(key) == DocumentType) { + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + size_t size = dom_get_doc_size(doc); + MaxSizeKey *msk = static_cast(privdata); + if (size > msk->max_size) { + msk->max_size = size; + const char *s = ValkeyModule_StringPtrLen(keyname, nullptr); + msk->keyname = jsn::string(s); + } + } +} + +STATIC void processMaxSizeKeySubCmd(ValkeyModuleCtx *ctx, MaxSizeKey *msk) { + // scan keys + ValkeyModuleScanCursor *cursor = ValkeyModule_ScanCursorCreate(); + while (ValkeyModule_Scan(ctx, cursor, scan_max_size_key_callback, msk)) {} + ValkeyModule_ScanCursorDestroy(cursor); +} + +struct KeyTableValidate { + std::unordered_map counts; + size_t handles = 0; + void walk_json(JValue &v) { + if (v.IsObject()) { + ValkeyModule_Log(nullptr, "debug", "Found Object"); + for (JValue::MemberIterator m = v.MemberBegin(); m != v.MemberEnd(); ++m) { + ValkeyModule_Log(nullptr, "debug", "Found Member : %s", m->name->getText()); + counts[&*(m->name)]++; + handles++; + walk_json(m->value); + } + } else if (v.IsArray()) { + for (size_t i = 0; i < v.Size(); ++i) { + walk_json(v[i]); + } + } + } +}; + +STATIC void keytable_validate(ValkeyModuleCtx *ctx, ValkeyModuleString *keyname, ValkeyModuleKey *key, void *privdata) { + VALKEYMODULE_NOT_USED(ctx); + VALKEYMODULE_NOT_USED(keyname); + auto ktv = reinterpret_cast(privdata); + if (ValkeyModule_ModuleTypeGetType(key) == DocumentType) { + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + ktv->walk_json(doc->GetJValue()); + } +} + +STATIC std::string processKeytableCheckCmd(ValkeyModuleCtx *ctx, size_t *handles, size_t *keys) { + KeyTableValidate validate; + // + // Step 1, walk all of the keys in all of the databases, gathering the current reference counts for each key + // + int OriginalDb = ValkeyModule_GetSelectedDb(ctx); + int dbnum = 0; + while (ValkeyModule_SelectDb(ctx, dbnum) == VALKEYMODULE_OK) { + ValkeyModuleScanCursor *cursor = ValkeyModule_ScanCursorCreate(); + while (ValkeyModule_Scan(ctx, cursor, keytable_validate, &validate)) {} + ValkeyModule_ScanCursorDestroy(cursor); + dbnum++; + } + ValkeyModule_Assert(ValkeyModule_SelectDb(ctx, OriginalDb) == VALKEYMODULE_OK); + *handles = validate.handles; + *keys = validate.counts.size(); + // + // See if we agree on the overall totals + // + KeyTable::Stats stats = keyTable->getStats(); + if (stats.handles != validate.handles || stats.size != validate.counts.size()) { + std::ostringstream os; + os << "Mismatch on totals: Found: Handles:" << validate.handles << " & " << validate.counts.size() + << " Expected: " << stats.handles << " & " << stats.size; + return os.str(); + } + // + // Step 2, for each key, check the reference count against the KeyTable + // + return keyTable->validate_counts(validate.counts); +} + +/** + * A helper method to send a reply to the client for JSON.DEBUG MEMORY | FIELDS. + */ +STATIC int reply_debug_memory_fields(jsn::vector &vec, const bool is_v2_path, ValkeyModuleCtx *ctx) { + if (!is_v2_path) { + // Legacy path: return a single value, which is the first value. + if (vec.empty()) { + // It's impossible to reach here, because the upstream method has verified there is at least one value. + ValkeyModule_Assert(false); + } else { + auto it = vec.begin(); + return ValkeyModule_ReplyWithLongLong(ctx, *it); + } + } else { + // JSONPath: return an array of integers. + ValkeyModule_ReplyWithArray(ctx, vec.size()); + for (auto it = vec.begin(); it != vec.end(); it++) { + ValkeyModule_ReplyWithLongLong(ctx, *it); + } + return VALKEYMODULE_OK; + } +} + +int Command_JsonDebug(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) { + ValkeyModule_AutoMemory(ctx); + + if (argc < 2) { + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + return VALKEYMODULE_ERR; + } + return ValkeyModule_WrongArity(ctx); + } + + const char *subcmd = ValkeyModule_StringPtrLen(argv[1], nullptr); + if (!strcasecmp(subcmd, "MEMORY")) { + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + if (argc < 3) { + return VALKEYMODULE_ERR; + } else { + ValkeyModule_KeyAtPos(ctx, 2); + return VALKEYMODULE_OK; + } + } + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = processMemorySubCmd(ctx, argv, argc, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + return reply_debug_memory_fields(vec, is_v2_path, ctx); + } else if (!strcasecmp(subcmd, "FIELDS")) { + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + if (argc < 3) { + return VALKEYMODULE_ERR; + } else { + ValkeyModule_KeyAtPos(ctx, 2); + return VALKEYMODULE_OK; + } + } + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = processFieldsSubCmd(ctx, argv, argc, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + return reply_debug_memory_fields(vec, is_v2_path, ctx); + } else if (!strcasecmp(subcmd, "DEPTH")) { + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + if (argc < 3) { + return VALKEYMODULE_ERR; + } else { + ValkeyModule_KeyAtPos(ctx, 2); + return VALKEYMODULE_OK; + } + } + size_t depth = 0; + JsonUtilCode rc = processDepthSubCmd(ctx, argv, argc, &depth); + if (rc != JSONUTIL_SUCCESS) { + if (rc == JSONUTIL_WRONG_NUM_ARGS) return ValkeyModule_WrongArity(ctx); + if (rc == JSONUTIL_DOCUMENT_KEY_NOT_FOUND) return ValkeyModule_ReplyWithNull(ctx); + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(rc)); + } + return ValkeyModule_ReplyWithLongLong(ctx, depth); + } else if (!strcasecmp(subcmd, "MAX-DEPTH-KEY")) { + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + return VALKEYMODULE_ERR;; + } + + // ATTENTION: + // THIS IS AN UNDOCUMENTED SUBCOMMAND, TO BE USED FOR DEV TEST ONLY. DON'T RUN IT ON A PRODUCTION SYSTEM. + // KEY SCAN MAY RUN FOR A LONG TIME LOCKING OUT ALL OTHER CLIENTS. + + // there should be exactly 2 arguments + if (argc != 2) return ValkeyModule_WrongArity(ctx); + + MaxDepthKey mdk; + processMaxDepthKeySubCmd(ctx, &mdk); + ValkeyModule_ReplyWithArray(ctx, 2); + ValkeyModule_ReplyWithLongLong(ctx, mdk.max_depth); + ValkeyModule_ReplyWithSimpleString(ctx, mdk.keyname.c_str()); + return VALKEYMODULE_OK; + } else if (!strcasecmp(subcmd, "MAX-SIZE-KEY")) { + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + return VALKEYMODULE_ERR; + } + + // ATTENTION: + // THIS IS AN UNDOCUMENTED SUBCOMMAND, TO BE USED FOR DEV TEST ONLY. DON'T RUN IT ON A PRODUCTION SYSTEM. + // KEY SCAN MAY RUN FOR A LONG TIME LOCKING OUT ALL OTHER CLIENTS. + + // there should be exactly 2 arguments + if (argc != 2) return ValkeyModule_WrongArity(ctx); + + MaxSizeKey msk; + processMaxSizeKeySubCmd(ctx, &msk); + ValkeyModule_ReplyWithArray(ctx, 2); + ValkeyModule_ReplyWithLongLong(ctx, msk.max_size); + ValkeyModule_ReplyWithSimpleString(ctx, msk.keyname.c_str()); + return VALKEYMODULE_OK; + } else if (!strcasecmp(subcmd, "KEYTABLE-CHECK")) { + // Validate that all use-counts of KeyTable are correct + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + return VALKEYMODULE_ERR; + } + + // ATTENTION: + // THIS IS AN UNDOCUMENTED SUBCOMMAND, TO BE USED FOR DEV TEST ONLY. DON'T RUN IT ON A PRODUCTION SYSTEM. + // KEY SCAN MAY RUN FOR A LONG TIME LOCKING OUT ALL OTHER CLIENTS. + + // there should be exactly 2 arguments + if (argc != 2) return ValkeyModule_WrongArity(ctx); + + size_t handles, keys; + std::string error_message = processKeytableCheckCmd(ctx, &handles, &keys); + if (error_message.length() > 0) { + return ValkeyModule_ReplyWithError(ctx, error_message.c_str()); + } else { + ValkeyModule_Log(ctx, "info", "KeyTableCheck completed ok, Handles:%zu, Keys:%zu", handles, keys); + ValkeyModule_ReplyWithArray(ctx, 2); + ValkeyModule_ReplyWithLongLong(ctx, handles); + ValkeyModule_ReplyWithLongLong(ctx, keys); + return VALKEYMODULE_OK; + } + } else if (!strcasecmp(subcmd, "KEYTABLE-CORRUPT")) { + // Validate that all use-counts of KeyTable are correct + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + return VALKEYMODULE_ERR; + } + + // ATTENTION: + // THIS IS AN UNDOCUMENTED SUBCOMMAND, TO BE USED FOR DEV TEST ONLY. DON'T RUN IT ON A PRODUCTION SYSTEM. + // + + + + // there should be exactly 3 arguments + if (argc != 3) return ValkeyModule_WrongArity(ctx); + + size_t len; + const char *str = ValkeyModule_StringPtrLen(argv[2], &len); + + KeyTable_Handle h = keyTable->makeHandle(str, len); + ValkeyModule_Log(ctx, "warning", "*** Handle %s count is now %zd", str, h->getRefCount()); + return ValkeyModule_ReplyWithSimpleString(ctx, "OK"); + } else if (!strcasecmp(subcmd, "KEYTABLE-DISTRIBUTION")) { + // compute longest runs of non-empty hashtable entries, a direct measure of key distribution and + // worst-case run-time for lookup/insert/delete + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + return VALKEYMODULE_ERR; + } + + // ATTENTION: + // THIS IS AN UNDOCUMENTED SUBCOMMAND, DON'T RUN IT ON A PRODUCTION SYSTEM + // UNLESS YOU KNOW WHAT YOU'RE DOING -- IT CAN LOCK THE MAINTHREAD FOR SEVERAL SECONDS + // + + + + // there should be exactly 3 arguments + if (argc != 3) return ValkeyModule_WrongArity(ctx); + size_t topN = atol(ValkeyModule_StringPtrLen(argv[2], nullptr)); + + KeyTable::LongStats ls = keyTable->getLongStats(topN); + ValkeyModule_ReplyWithArray(ctx, 2 * ls.runs.size()); + for (auto it = ls.runs.rbegin(); it != ls.runs.rend(); ++it) { + ValkeyModule_ReplyWithLongLong(ctx, it->first); + ValkeyModule_ReplyWithLongLong(ctx, it->second); + } + return VALKEYMODULE_OK; + } else if (!strcasecmp(subcmd, "HELP")) { + if (ValkeyModule_IsKeysPositionRequest(ctx)) { + return VALKEYMODULE_ERR; + } + + // there should be exactly 2 arguments + if (argc != 2) return ValkeyModule_WrongArity(ctx); + + std::vector cmds; + cmds.push_back("JSON.DEBUG MEMORY [path] - report memory size (bytes). " + "Without path reports document size without keys. " + "With path reports size including keys"); + cmds.push_back("JSON.DEBUG DEPTH - report the maximum path depth of the JSON document."); + cmds.push_back("JSON.DEBUG FIELDS [path] - report number of fields in the " + "JSON element. Path defaults to root if not provided."); + cmds.push_back("JSON.DEBUG HELP - print help message."); + cmds.push_back("------- DANGER, LONG RUNNING COMMANDS, DON'T USE ON PRODUCTION SYSTEM --------"); + cmds.push_back("JSON.DEBUG MAX-DEPTH-KEY - Find JSON key with maximum depth"); + cmds.push_back("JSON.DEBUG MAX-SIZE-KEY - Find JSON key with largest memory size"); + cmds.push_back("JSON.DEBUG KEYTABLE-CHECK - Extended KeyTable integrity check"); + cmds.push_back("JSON.DEBUG KEYTABLE-CORRUPT - Intentionally corrupt KeyTable handle counts"); + cmds.push_back("JSON.DEBUG KEYTABLE-DISTRIBUTION - Find and count topN longest runs in KeyTable"); + + ValkeyModule_ReplyWithArray(ctx, cmds.size()); + for (auto& s : cmds) ValkeyModule_ReplyWithSimpleString(ctx, s.c_str()); + return VALKEYMODULE_OK; + } + + return ValkeyModule_ReplyWithError(ctx, jsonutil_code_to_message(JSONUTIL_UNKNOWN_SUBCOMMAND)); +} + +/* =========================== Callback Methods =========================== */ + +/* + * Load an OBJECT using the IO machinery. + */ +void *DocumentType_RdbLoad(ValkeyModuleIO *rdb, int encver) { + if (encver > DOCUMENT_TYPE_ENCODING_VERSION) { + ValkeyModule_LogIOError(rdb, "warning", + "Cannot load document type version %d, because current module's document version is %d.", + encver, DOCUMENT_TYPE_ENCODING_VERSION); + return nullptr; + } + + // begin tracking memory + JDocument *doc; + int64_t begin_val = jsonstats_begin_track_mem(); + JsonUtilCode rc = dom_load(&doc, rdb, encver); + int64_t delta = jsonstats_end_track_mem(begin_val); + if (rc != JSONUTIL_SUCCESS) { + ValkeyModule_Assert(delta == 0); + return nullptr; + } + // end tracking memory + size_t doc_size = dom_get_doc_size(doc) + delta; + dom_set_doc_size(doc, doc_size); + + // update stats + jsonstats_update_stats_on_insert(doc, true, 0, doc_size, doc_size); + return doc; +} + +/* + * Save the Document. Convert it into chunks, write them and then write an EOF marker + */ +void DocumentType_RdbSave(ValkeyModuleIO *rdb, void *value) { + JDocument *doc = static_cast(value); + dom_save(doc, rdb, DOCUMENT_TYPE_ENCODING_VERSION); + // + // Let's make sure any I/O error generates a log entry + // + if (ValkeyModule_IsIOError(rdb)) { + ValkeyModule_LogIOError(rdb, "warning", "Unable to save JSON object, I/O error"); + } +} + +void *DocumentType_Copy(ValkeyModuleString *from_key_name, ValkeyModuleString *to_key_name, + const void *value) { + VALKEYMODULE_NOT_USED(from_key_name); // We don't care about the from/to key names. + VALKEYMODULE_NOT_USED(to_key_name); + const JDocument *source = static_cast(value); + JDocument *doc = dom_copy(source); + // Treat this the same as JSON.SET . + size_t doc_size = dom_get_doc_size(doc); + jsonstats_update_stats_on_insert(doc, true, 0, doc_size, doc_size); + return doc; +} + +/* + * Defrag callback. + * If the JSON object size is less than or equal to the defrag threshold, the JSON object is + * re-allocated. The re-allocation is done by copying the original object into a new one, + * swapping them, and deleting the original one. Note that the current implementation does not + * support defrag stop and resume, which is needed for very large JSON objects. + */ +int DocumentType_Defrag(ValkeyModuleDefragCtx *ctx, ValkeyModuleString *key, void **value) { + VALKEYMODULE_NOT_USED(ctx); + VALKEYMODULE_NOT_USED(key); + ValkeyModule_Assert(*value != nullptr); + JDocument *orig = static_cast(*value); + size_t doc_size = dom_get_doc_size(orig); + // We do not want to defrag a key larger than the default max document size. + // If there is a need to do that, increase the defrag-threshold config value. + if (doc_size <= json_get_defrag_threshold()) { + JDocument *new_doc = dom_copy(orig); + dom_set_bucket_id(new_doc, dom_get_bucket_id(orig)); + *value = new_doc; + dom_free_doc(orig); // free the original value + jsonstats_increment_defrag_count(); + jsonstats_increment_defrag_bytes(doc_size); + } + return 0; +} + +void DocumentType_AofRewrite(ValkeyModuleIO *aof, ValkeyModuleString *key, void *value) { + JDocument *doc = static_cast(value); + rapidjson::StringBuffer oss; + dom_serialize(doc, nullptr, oss); + ValkeyModule_EmitAOF(aof, "JSON.SET", "scc", key, ".", oss.GetString()); +} + +void DocumentType_Free(void *value) { + JDocument *doc = static_cast(value); + size_t orig_doc_size = dom_get_doc_size(doc); + + // update stats + jsonstats_update_stats_on_delete(doc, true, orig_doc_size, 0, orig_doc_size); + + dom_free_doc(doc); +} + +size_t DocumentType_MemUsage(const void *value) { + const JDocument *doc = static_cast(value); + return dom_get_doc_size(doc); +} + +// NOTE: Valkey will prefix every section and field name with the module name. +void Module_Info(ValkeyModuleInfoCtx *ctx, int for_crash_report) { + VALKEYMODULE_NOT_USED(for_crash_report); + + // section: core metrics +#define beginSection(name) \ + if (ValkeyModule_InfoAddSection(ctx, const_cast(name)) != VALKEYMODULE_ERR) { +#define endSection() } + +#define addULongLong(name, value) { \ + if (ValkeyModule_InfoAddFieldULongLong(ctx, const_cast(name), value) == VALKEYMODULE_ERR) { \ + ValkeyModule_Log(nullptr, "warning", "Can't add info variable %s", name); \ + } \ +} + +#define addDouble(name, value) { \ + if (ValkeyModule_InfoAddFieldDouble(ctx, const_cast(name), value) == VALKEYMODULE_ERR) { \ + ValkeyModule_Log(nullptr, "warning", "Can't add info variable %s", name); \ + } \ +} + + + // + // User visible metrics + // + beginSection("core_metrics") + addULongLong("total_memory_bytes", jsonstats_get_used_mem() + keyTable->getStats().bytes); + addULongLong("num_documents", jsonstats_get_num_doc_keys()); + endSection(); + + beginSection("ext_metrics") + addULongLong("max_path_depth_ever_seen", jsonstats_get_max_depth_ever_seen()); + addULongLong("max_document_size_ever_seen", jsonstats_get_max_size_ever_seen()); + addULongLong("total_malloc_bytes_used", memory_usage()); + addULongLong("memory_traps_enabled", memory_traps_enabled()); + addULongLong("defrag_count", jsonstats_get_defrag_count()); + addULongLong("defrag_bytes", jsonstats_get_defrag_bytes()); + endSection(); + + beginSection("document_composition") + addULongLong("boolean_count", logical_stats.boolean_count); + addULongLong("number_count", logical_stats.number_count); + addULongLong("sum_extra_numeric_chars", logical_stats.sum_extra_numeric_chars); + addULongLong("string_count", logical_stats.string_count); + addULongLong("sum_string_chars", logical_stats.sum_string_chars); + addULongLong("null_count", logical_stats.null_count); + addULongLong("array_count", logical_stats.array_count); + addULongLong("sum_array_elements", logical_stats.sum_array_elements); + addULongLong("object_count", logical_stats.object_count); + addULongLong("sum_object_members", logical_stats.sum_object_members); + addULongLong("sum_object_key_chars", logical_stats.sum_object_key_chars); + endSection(); + + // section: histograms + beginSection("histograms") + char name[128]; + char buf[1024]; + snprintf(name, sizeof(name), "doc_histogram"); + jsonstats_sprint_doc_hist(buf, sizeof(buf)); + ValkeyModule_InfoAddFieldCString(ctx, name, buf); + + snprintf(name, sizeof(name), "read_histogram"); + jsonstats_sprint_read_hist(buf, sizeof(buf)); + ValkeyModule_InfoAddFieldCString(ctx, name, buf); + + snprintf(name, sizeof(name), "insert_histogram"); + jsonstats_sprint_insert_hist(buf, sizeof(buf)); + ValkeyModule_InfoAddFieldCString(ctx, name, buf); + + snprintf(name, sizeof(name), "update_histogram"); + jsonstats_sprint_update_hist(buf, sizeof(buf)); + ValkeyModule_InfoAddFieldCString(ctx, name, buf); + + snprintf(name, sizeof(name), "delete_histogram"); + jsonstats_sprint_delete_hist(buf, sizeof(buf)); + ValkeyModule_InfoAddFieldCString(ctx, name, buf); + + snprintf(name, sizeof(name), "histogram_buckets"); + jsonstats_sprint_hist_buckets(buf, sizeof(buf)); + ValkeyModule_InfoAddFieldCString(ctx, name, buf); + endSection(); +} + +// +// Change a KeyTable parameter. Validate the change first. +// +int handleFactor(float KeyTable::Factors::*f, const void *v, const char *name) { + float value = *reinterpret_cast(v) / 100.0f; + KeyTable::Factors factors; + factors = keyTable->getFactors(); + float oldvalue = factors.*f; + factors.*f = value; + const char *validity = KeyTable::isValidFactors(factors); + if (validity == nullptr) { + keyTable->setFactors(factors); + ValkeyModule_Log(nullptr, "debug", "Set %s to %f (was %f)", name, double(value), double(oldvalue)); + return VALKEYMODULE_OK; + } else { + ValkeyModule_Log(nullptr, "warning", "Error setting parameter %s to %g", + validity, double(value)); + return VALKEYMODULE_ERR; + } +} + +// +// Change a HashTable parameter. Validate the change first +// +template +int handleHashTableFactor(T rapidjson::HashTableFactors::*f, const void *v, T scale_factor) { + int unscaled = *reinterpret_cast(v); + T value = unscaled / scale_factor; + rapidjson::HashTableFactors h = rapidjson::hashTableFactors; + h.*f = value; + const char *validity = h.isValid(); + if (validity == nullptr) { + rapidjson::hashTableFactors = h; + return VALKEYMODULE_OK; + } else { + ValkeyModule_Log(nullptr, "warning", "Error setting parameter %s from (unscaled: %d)", + validity, unscaled); + return VALKEYMODULE_ERR; + } +} + +// +// Resize the number of shards in the keyTable. this isn't multi-thread safe. But the current AppConfig architecture +// doesn't provide a good way to solve this problem. Also, we only do it when the table is empty. As long as there +// are no background operations in progress (slot migration, threadsave) we're good. Sadly there's no easy way for +// a module to detect that. Once we have RM_ApplyConfig, we'll restrict this to only happen at initialization time. +// and close this small timing hole. +// + +KeyTable::Factors destroyKeyTable() { + KeyTable::Factors factors; + factors = keyTable->getFactors(); + keyTable->~KeyTable(); + memory_free(keyTable); + keyTable = nullptr; + return factors; +} + +void initKeyTable(unsigned numShards, KeyTable::Factors factors) { + ValkeyModule_Assert(keyTable == nullptr); + ValkeyModule_Log(nullptr, "debug", "Setting shards to %d", numShards); + KeyTable::Config config; + config.malloc = memory_alloc; + config.free = memory_free; + config.hash = hash_function; + config.numShards = numShards; + keyTable = new(memory_alloc(sizeof(KeyTable))) KeyTable(config); + keyTable->setFactors(factors); +} + +// +// Handle "config set json.key-table-num-shards" +// +int handleSetNumShards(const void *v) { + int value = *reinterpret_cast(v); + auto s = keyTable->getStats(); + if (s.handles != 0) { + ValkeyModule_Log(nullptr, "warning", "Can't change numShards after initialization"); + return VALKEYMODULE_ERR; + } + if (value < KeyTable::MIN_SHARDS || value > KeyTable::MAX_SHARDS) { + ValkeyModule_Log(nullptr, "warning", "numShards value out of range"); + return VALKEYMODULE_ERR; + } + initKeyTable(value, destroyKeyTable()); + return VALKEYMODULE_OK; +} + +int Config_GetInstrumentEnabled(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + return *static_cast(privdata); +} + +int Config_SetInstrumentEnabled(const char *name, int val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(err); + *static_cast(privdata) = val; + return VALKEYMODULE_OK; +} + +// +// Handle "config set json.enable-memory-traps" +// +int Config_GetMemoryTrapsEnable(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return memory_traps_enabled(); +} + +int Config_SetMemoryTrapsEnable(const char *name, int value, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(err); + VALKEYMODULE_NOT_USED(privdata); + ValkeyModule_Log(nullptr, "warning", "Changing memory traps to %d", value); + size_t num_json_keys = jsonstats_get_num_doc_keys(); + auto s = keyTable->getStats(); + if (num_json_keys > 0 || s.handles != 0) { + static char errmsg[] = "Can't change memory traps with JSON data present"; + *err = ValkeyModule_CreateString(nullptr, errmsg, strlen(errmsg)); + ValkeyModule_Log(nullptr, "warning", "Can't change memory traps with %zu JSON keys present", num_json_keys); + return VALKEYMODULE_ERR; + } + auto shards = keyTable->getNumShards(); + auto factors = destroyKeyTable(); + ValkeyModule_Assert(memory_usage() == 0); + ValkeyModule_Assert(memory_traps_control(value)); + initKeyTable(shards, factors); + return VALKEYMODULE_OK; +} + +int Config_GetEnforceRdbVersionCheck(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + return *static_cast(privdata)? 1 : 0; +} + +int Config_SetEnforceRdbVersionCheck(const char *name, int val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(err); + *static_cast(privdata) = (val == 1); + return VALKEYMODULE_OK; +} + +long long Config_GetSizeConfig(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + return *static_cast(privdata); +} + +int Config_SetSizeConfig(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(err); + *static_cast(privdata) = val; + return VALKEYMODULE_OK; +} + +long long Config_GetKeyTableGrowFactor(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return keyTable->getStats().factors.grow * 100; +} + +int Config_SetKeyTableGrowFactor(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleFactor(&KeyTable::Factors::grow, &val, name); +} + +long long Config_GetKeyTableShrinkFactor(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return keyTable->getStats().factors.shrink * 100; +} + +int Config_SetKeyTableShrinkFactor(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleFactor(&KeyTable::Factors::shrink, &val, name); +} + +long long Config_GetKeyTableMinLoadFactor(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return keyTable->getStats().factors.minLoad * 100; +} + +int Config_SetKeyTableMinLoadFactor(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleFactor(&KeyTable::Factors::minLoad, &val, name); +} + +long long Config_GetKeyTableMaxLoadFactor(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return keyTable->getStats().factors.maxLoad * 100; +} + +int Config_SetKeyTableMaxLoadFactor(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleFactor(&KeyTable::Factors::maxLoad, &val, name); +} + +long long Config_GetKeyTableNumShards(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return keyTable->getNumShards(); +} + +int Config_SetKeyTableNumShards(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleSetNumShards(&val); +} + +long long Config_GetHashTableGrowFactor(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return rapidjson::hashTableFactors.grow * 100; +} + +int Config_SetHashTableGrowFactor(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleHashTableFactor(&rapidjson::HashTableFactors::grow, &val, 100.f); +} + +long long Config_GetHashTableShrinkFactor(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return rapidjson::hashTableFactors.shrink * 100; +} + +int Config_SetHashTableShrinkFactor(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleHashTableFactor(&rapidjson::HashTableFactors::shrink, &val, 100.f); +} + +long long Config_GetHashTableMinLoadFactor(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return rapidjson::hashTableFactors.minLoad * 100; +} + +int Config_SetHashTableMinLoadFactor(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleHashTableFactor(&rapidjson::HashTableFactors::minLoad, &val, 100.f); +} + +long long Config_GetHashTableMaxLoadFactor(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return rapidjson::hashTableFactors.maxLoad * 100; +} + +int Config_SetHashTableMaxLoadFactor(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleHashTableFactor(&rapidjson::HashTableFactors::maxLoad, &val, 100.f); +} + +long long Config_GetHashTableMinSize(const char *name, void *privdata) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + return rapidjson::hashTableFactors.minHTSize; +} + +int Config_SetHashTableMinSize(const char *name, long long val, void *privdata, ValkeyModuleString **err) { + VALKEYMODULE_NOT_USED(name); + VALKEYMODULE_NOT_USED(privdata); + VALKEYMODULE_NOT_USED(err); + return handleHashTableFactor(&rapidjson::HashTableFactors::minHTSize, &val, size_t(1)); +} + +int registerModuleConfigs(ValkeyModuleCtx *ctx) { + REGISTER_BOOL_CONFIG(ctx, "enable-memory-traps", 0, nullptr, + Config_GetMemoryTrapsEnable, Config_SetMemoryTrapsEnable) + REGISTER_BOOL_CONFIG(ctx, "enable-instrument-insert", 0, &instrument_enabled_insert, + Config_GetInstrumentEnabled, Config_SetInstrumentEnabled) + REGISTER_BOOL_CONFIG(ctx, "enable-instrument-update", 0, &instrument_enabled_update, + Config_GetInstrumentEnabled, Config_SetInstrumentEnabled) + REGISTER_BOOL_CONFIG(ctx, "enable-instrument-delete", 0, &instrument_enabled_delete, + Config_GetInstrumentEnabled, Config_SetInstrumentEnabled) + REGISTER_BOOL_CONFIG(ctx, "enable-instrument-dump-doc-before", 0, + &instrument_enabled_dump_doc_before, + Config_GetInstrumentEnabled, Config_SetInstrumentEnabled) + REGISTER_BOOL_CONFIG(ctx, "enable-instrument-dump-doc-after", 0, + &instrument_enabled_dump_doc_after, + Config_GetInstrumentEnabled, Config_SetInstrumentEnabled) + REGISTER_BOOL_CONFIG(ctx, "enable-instrument-dump-value-before-delete", 0, + &instrument_enabled_dump_value_before_delete, + Config_GetInstrumentEnabled, Config_SetInstrumentEnabled) + REGISTER_BOOL_CONFIG(ctx, "enforce-rdb-version-check", 0, &enforce_rdb_version_check, + Config_GetEnforceRdbVersionCheck, Config_SetEnforceRdbVersionCheck) + + REGISTER_NUMERIC_CONFIG(ctx, "max-document-size", DEFAULT_MAX_DOCUMENT_SIZE, VALKEYMODULE_CONFIG_MEMORY, 0, + LLONG_MAX, &config_max_document_size, Config_GetSizeConfig, Config_SetSizeConfig) + REGISTER_NUMERIC_CONFIG(ctx, "defrag-threshold", DEFAULT_DEFRAG_THRESHOLD, VALKEYMODULE_CONFIG_MEMORY, 0, + LLONG_MAX, &config_defrag_threshold, Config_GetSizeConfig, Config_SetSizeConfig) + REGISTER_NUMERIC_CONFIG(ctx, "max-path-limit", 128, VALKEYMODULE_CONFIG_DEFAULT, 0, INT_MAX, + &config_max_path_limit, Config_GetSizeConfig, Config_SetSizeConfig) + REGISTER_NUMERIC_CONFIG(ctx, "max-parser-recursion-depth", 200, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, &config_max_parser_recursion_depth, Config_GetSizeConfig, + Config_SetSizeConfig) + REGISTER_NUMERIC_CONFIG(ctx, "max-recursive-descent-tokens", 20, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, &config_max_recursive_descent_tokens, Config_GetSizeConfig, + Config_SetSizeConfig) + REGISTER_NUMERIC_CONFIG(ctx, "max-query-string-size", 128*1024, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, &config_max_query_string_size, Config_GetSizeConfig, Config_SetSizeConfig) + + REGISTER_NUMERIC_CONFIG(ctx, "key-table-grow-factor", 100, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, nullptr, Config_GetKeyTableGrowFactor, Config_SetKeyTableGrowFactor) + REGISTER_NUMERIC_CONFIG(ctx, "key-table-shrink-factor", 50, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, nullptr, Config_GetKeyTableShrinkFactor, Config_SetKeyTableShrinkFactor) + REGISTER_NUMERIC_CONFIG(ctx, "key-table-min-load-factor", 25, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, nullptr, Config_GetKeyTableMinLoadFactor, Config_SetKeyTableMinLoadFactor) + REGISTER_NUMERIC_CONFIG(ctx, "key-table-max-load-factor", 85, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, nullptr, Config_GetKeyTableMaxLoadFactor, Config_SetKeyTableMaxLoadFactor) + REGISTER_NUMERIC_CONFIG(ctx, "key-table-num-shards", 32768, VALKEYMODULE_CONFIG_DEFAULT, KeyTable::MIN_SHARDS, + KeyTable::MAX_SHARDS, nullptr, Config_GetKeyTableNumShards, Config_SetKeyTableNumShards) + + REGISTER_NUMERIC_CONFIG(ctx, "hash-table-grow-factor", 100, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, nullptr, Config_GetHashTableGrowFactor, Config_SetHashTableGrowFactor) + REGISTER_NUMERIC_CONFIG(ctx, "hash-table-shrink-factor", 50, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, nullptr, Config_GetHashTableShrinkFactor, Config_SetHashTableShrinkFactor) + REGISTER_NUMERIC_CONFIG(ctx, "hash-table-min-load-factor", 25, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, nullptr, Config_GetHashTableMinLoadFactor, Config_SetHashTableMinLoadFactor) + REGISTER_NUMERIC_CONFIG(ctx, "hash-table-max-load-factor", 85, VALKEYMODULE_CONFIG_DEFAULT, 0, + INT_MAX, nullptr, Config_GetHashTableMaxLoadFactor, Config_SetHashTableMaxLoadFactor) + REGISTER_NUMERIC_CONFIG(ctx, "hash-table-min-size", 64, VALKEYMODULE_CONFIG_DEFAULT, 0, INT_MAX, + nullptr, Config_GetHashTableMinSize, Config_SetHashTableMinSize) + + ValkeyModule_LoadConfigs(ctx); + return VALKEYMODULE_OK; +} + +/* + * Install stub datatype callback for aux_load. + */ +bool install_stub(ValkeyModuleCtx *ctx, + const char *type_name, + int encver, + int (*aux_load)(ValkeyModuleIO *, int encver, int when)) { + ValkeyModuleTypeMethods type_methods; + memset(&type_methods, 0, sizeof(ValkeyModuleTypeMethods)); + type_methods.version = VALKEYMODULE_TYPE_METHOD_VERSION; + type_methods.aux_load = aux_load; + if (ValkeyModule_CreateDataType(ctx, type_name, encver, &type_methods) == nullptr) { + ValkeyModule_Log(ctx, "warning", "Failed to create data type %s", type_name); + return false; + } + ValkeyModule_Log(ctx, "debug", "Successfully installed stub data type %s", type_name); + return true; +} + +/* + * Check a string value, fail if the expected value isn't present. + */ +bool checkString(ValkeyModuleIO *ctx, const char *value, const char *caller) { + size_t str_len; + std::unique_ptr str(ValkeyModule_LoadStringBuffer(ctx, &str_len)); + if (strncmp(value, str.get(), str_len)) { + ValkeyModule_Log(nullptr, "warning", "%s: Unexpected value in RDB. Expected %s Received %s", + caller, value, str.get()); + return false; + } + return true; +} + +/* + * Check an integer value, fail + */ +bool checkInt(ValkeyModuleIO *ctx, uint64_t value, const char *caller) { + uint64_t val = ValkeyModule_LoadUnsigned(ctx); + if (value != val) { + ValkeyModule_Log(nullptr, "warning", "%s: Unexpected value in RDB Expected: %lx Received: %lx", + caller, value, val); + return false; + } + return true; +} + +/* + * Check the encoding version, For unsupported versions we ALWAYS put out a message in the log + * but we only fail the RDB load if the config tells us to do it. + */ +bool checkVersion(const char *type_name, int encver, int expected_encver) { + if (encver != expected_encver) { + if (enforce_rdb_version_check) { + ValkeyModule_Log(nullptr, "warning", "Unsupported Encoding Version %d for type:%s expected %d", + encver, type_name, expected_encver); + return false; + } else { + ValkeyModule_Log(nullptr, "warning", + "Unsupported Encoding Version %d for type:%s expected %d, WILL ATTEMPT LOADING ANYWAYS", + encver, type_name, expected_encver); + } + } + return true; +} + +bool checkVersionRange(const char *type_name, int encver, int ver_low, int ver_high) { + if (encver < ver_low || encver > ver_high) { + if (enforce_rdb_version_check) { + ValkeyModule_Log(nullptr, "warning", "Unsupported Encoding Version %d for type:%s expected [%d:%d]", + encver, type_name, ver_low, ver_high); + return false; + } else { + ValkeyModule_Log(nullptr, "warning", + "Unsupported Encoding Version %d for type:%s expected [%d:%d]," + " WILL ATTEMPT TO LOAD ANYWAYS", + encver, type_name, ver_low, ver_high); + } + } + return true; +} + +/* + * Stub for scdtype0 data type. + */ +#define SCDTYPE_ENCVER 1 +int scdtype_aux_load(ValkeyModuleIO *ctx, int encver, int when) { + if (!checkVersion("sdctype0", encver, SCDTYPE_ENCVER)) return VALKEYMODULE_ERR; + if (when == VALKEYMODULE_AUX_AFTER_RDB) { + if (!checkInt(ctx, 0, "scdtype")) return VALKEYMODULE_ERR; + } + return VALKEYMODULE_OK; +} + +#define GEARSDT_ENCVER 3 +int gearsdt_aux_load(ValkeyModuleIO *ctx, int encver, int when) { + if (!checkVersion("gearsdt", encver, GEARSDT_ENCVER)) return VALKEYMODULE_ERR; + if (when == VALKEYMODULE_AUX_AFTER_RDB) { + if (!checkString(ctx, "StreamReader", "gears-dt")) return VALKEYMODULE_ERR; + if (!checkInt(ctx, 0, "gears-dt")) return VALKEYMODULE_ERR; + if (!checkString(ctx, "CommandReader", "gears-dt")) return VALKEYMODULE_ERR; + if (!checkInt(ctx, 0, "gears-dt")) return VALKEYMODULE_ERR; + if (!checkString(ctx, "KeysReader", "gears-dt")) return VALKEYMODULE_ERR; + if (!checkInt(ctx, 0, "gears-dt")) return VALKEYMODULE_ERR; + if (!checkString(ctx, "", "gears-dt")) return VALKEYMODULE_ERR; + } + return VALKEYMODULE_OK; +} + +#define GEARSRQ_ENCVER 1 +int gearsrq_aux_load(ValkeyModuleIO *ctx, int encver, int when) { + if (!checkVersion("gearsrq", encver, GEARSRQ_ENCVER)) return VALKEYMODULE_ERR; + if (when == VALKEYMODULE_AUX_BEFORE_RDB) { + if (!checkInt(ctx, 0, "gearsrq")) return VALKEYMODULE_ERR; + } + return VALKEYMODULE_OK; +} + +/* + * The hash function is FNV-1a (See https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function) + * We are looking for a 38-bit hash function. As recommended, we use the 64-bit FNV-1a constants and then + * use XOR-folding to reduce the hash to 38 bits (as well as improving the randomness of the low order bit) + */ +size_t hash_function(const char *text, size_t length) { + const unsigned char *t = reinterpret_cast(text); + size_t hsh = 14695981039346656037ull; + for (size_t i = 0; i < length; ++i) { + hsh = (hsh ^ t[i]) * 1099511628211ull; + } + // + // Now reduce to 38-bits + // + return hsh ^ (hsh >> 38); +} + +void DocumentType_Digest(ValkeyModuleDigest *ctx, void *vdoc) { + JDocument *doc = reinterpret_cast(vdoc); + dom_compute_digest(ctx, doc); +} + +bool set_command_info(ValkeyModuleCtx *ctx, const char *name, int32_t arity) { + // Get command + ValkeyModuleCommand *command = ValkeyModule_GetCommand(ctx, name); + if (command == nullptr) { + ValkeyModule_Log(ctx, "warning", "Failed to get command '%s'", name); + return false; + } + ValkeyModuleCommandInfo info; + memset(&info, 0, sizeof(info)); + info.version = VALKEYMODULE_COMMAND_INFO_VERSION; + info.arity = arity; + + if (ValkeyModule_SetCommandInfo(command, &info) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command info for %s", name); + return false; + } + return true; +} +// Overloaded method for setting key permissions +bool set_command_info(ValkeyModuleCtx *ctx, const char *name, int32_t arity, uint64_t keyspec_flags, + int bs_index, std::tuple key_range) { + // Get command + ValkeyModuleCommand *command = ValkeyModule_GetCommand(ctx, name); + if (command == nullptr) { + ValkeyModule_Log(ctx, "warning", "Failed to get command '%s'", name); + return false; + } + ValkeyModuleCommandInfo info; + memset(&info, 0, sizeof(info)); + info.version = VALKEYMODULE_COMMAND_INFO_VERSION; + info.arity = arity; + + // We only need one key_spec entry, but key_specs are sent as a null-entry terminated array, + // so we leave a second value filled with 0s + ValkeyModuleCommandKeySpec cmdKeySpec[2]; + memset(cmdKeySpec, 0, sizeof(cmdKeySpec)); + + cmdKeySpec[0].flags = keyspec_flags; + cmdKeySpec[0].begin_search_type = VALKEYMODULE_KSPEC_BS_INDEX; + cmdKeySpec[0].bs.index = {bs_index}; + cmdKeySpec[0].find_keys_type = VALKEYMODULE_KSPEC_FK_RANGE; + cmdKeySpec[0].fk.range = {std::get<0>(key_range), std::get<1>(key_range), std::get<2>(key_range)}; + + info.key_specs = &cmdKeySpec[0]; + + if (ValkeyModule_SetCommandInfo(command, &info) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command info for %s", name); + return false; + } + return true; +} + +/* ================================ Module OnLoad ============================= */ + +extern "C" int ValkeyModule_OnLoad(ValkeyModuleCtx *ctx) { + // Register the module + if (ValkeyModule_Init(ctx, MODULE_NAME, MODULE_VERSION, VALKEYMODULE_APIVER_1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to initialize module %s version %d", MODULE_NAME, MODULE_VERSION); + return VALKEYMODULE_ERR; + } + + // Register module type callbacks + ValkeyModuleTypeMethods type_methods; + memset(&type_methods, 0, sizeof(ValkeyModuleTypeMethods)); + type_methods.version = VALKEYMODULE_TYPE_METHOD_VERSION; + type_methods.rdb_load = DocumentType_RdbLoad; + type_methods.rdb_save = DocumentType_RdbSave; + type_methods.copy = DocumentType_Copy; + type_methods.aof_rewrite = DocumentType_AofRewrite; + type_methods.mem_usage = DocumentType_MemUsage; + type_methods.free = DocumentType_Free; + type_methods.digest = DocumentType_Digest; + type_methods.defrag = DocumentType_Defrag; + + + // Create module type + DocumentType = ValkeyModule_CreateDataType(ctx, DOCUMENT_TYPE_NAME, + DOCUMENT_TYPE_ENCODING_VERSION, &type_methods); + if (DocumentType == nullptr) { + ValkeyModule_Log(ctx, "warning", "Failed to create data type %s encver %d", + DOCUMENT_TYPE_NAME, DOCUMENT_TYPE_ENCODING_VERSION); + return VALKEYMODULE_ERR; + } + + /* + * Now create the stub datatypes for search + */ + if (!install_stub(ctx, "scdtype00", SCDTYPE_ENCVER, scdtype_aux_load)) return VALKEYMODULE_ERR; + if (!install_stub(ctx, "GEARS_DT0", GEARSDT_ENCVER, gearsdt_aux_load)) return VALKEYMODULE_ERR; + if (!install_stub(ctx, "GEAR_REQ0", GEARSRQ_ENCVER, gearsrq_aux_load)) return VALKEYMODULE_ERR; + + // Indicate that we can handle I/O errors ourself. + ValkeyModule_SetModuleOptions(ctx, VALKEYMODULE_OPTIONS_HANDLE_IO_ERRORS); + + // Initialize metrics + JsonUtilCode rc = jsonstats_init(); + if (rc != JSONUTIL_SUCCESS) { + ValkeyModule_Log(ctx, "warning", "%s", jsonutil_code_to_message(rc)); + return VALKEYMODULE_ERR; + } + + // Register info callback + if (ValkeyModule_RegisterInfoFunc(ctx, Module_Info) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to register module info callback."); + return VALKEYMODULE_ERR; + } + + const char *cmdflg_readonly = "fast readonly"; + const char *cmdflg_slow_write_deny = "write deny-oom"; + const char *cmdflg_fast_write = "fast write"; + const char *cmdflg_fast_write_deny = "fast write deny-oom"; + const char *cmdflg_debug = "readonly getkeys-api"; + char json_category[] = "json"; + + if (ValkeyModule_AddACLCategory(ctx, json_category) == VALKEYMODULE_ERR) + return VALKEYMODULE_ERR; + + const char *cat_readonly = "json read fast"; + const char *cat_slow_write_deny = "json write slow"; + const char *cat_fast_write = "json write fast"; + const char *cat_fast_write_deny = "json write fast"; + const char *cat_debug = "json read slow"; + + // Register commands + if (ValkeyModule_CreateCommand(ctx, "JSON.SET", Command_JsonSet, cmdflg_slow_write_deny, 1, 1, 1) + == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.SET."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.SET"), cat_slow_write_deny) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.SET."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.GET", Command_JsonGet, cmdflg_readonly, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.GET."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.GET"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.GET."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.MGET", Command_JsonMGet, cmdflg_readonly, 1, -2, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.MGET."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.MGET"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.MGET."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.DEL", Command_JsonDel, cmdflg_fast_write, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.DEL."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.DEL"), cat_fast_write) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.DEL."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.FORGET", Command_JsonDel, cmdflg_fast_write, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.FORGET."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.FORGET"), cat_fast_write) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.FORGET."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.NUMINCRBY", Command_JsonNumIncrBy, + cmdflg_fast_write, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.NUMINCRBY."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.NUMINCRBY"), cat_fast_write) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.NUMINCRBY."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.NUMMULTBY", Command_JsonNumMultBy, + cmdflg_fast_write, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.NUMMULTBY."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.NUMMULTBY"), cat_fast_write) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.NUMMULTBY."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.STRLEN", Command_JsonStrLen, cmdflg_readonly, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.STRLEN."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.STRLEN"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.STRLEN."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.STRAPPEND", Command_JsonStrAppend, + cmdflg_fast_write_deny, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.STRAPPEND."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.STRAPPEND"), cat_fast_write_deny) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.STRAPPEND."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.TOGGLE", Command_JsonToggle, + cmdflg_fast_write_deny, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.TOGGLE."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.TOGGLE"), cat_fast_write_deny) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.TOGGLE."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.OBJLEN", Command_JsonObjLen, cmdflg_readonly, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.OBJLEN."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.OBJLEN"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.OBJLEN."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.OBJKEYS", Command_JsonObjKeys, cmdflg_readonly, 1, 1, 1) == + VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.OBJKEYS."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.OBJKEYS"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.OBJKEYS."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.ARRLEN", Command_JsonArrLen, cmdflg_readonly, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.ARRLEN."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.ARRLEN"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.ARRLEN."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.ARRAPPEND", Command_JsonArrAppend, + cmdflg_fast_write_deny, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.ARRAPPEND."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.ARRAPPEND"), cat_fast_write_deny) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.ARRAPPEND."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateCommand(ctx, "JSON.ARRPOP", Command_JsonArrPop, cmdflg_fast_write, 1, 1, 1) == + VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.ARRPOP."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.ARRPOP"), cat_fast_write) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.ARRPOP."); + return VALKEYMODULE_ERR; + } + + + if (ValkeyModule_CreateCommand(ctx, "JSON.ARRINSERT", Command_JsonArrInsert, + cmdflg_fast_write_deny, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.ARRINSERT."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.ARRINSERT"), cat_fast_write_deny) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.ARRINSERT."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.ARRTRIM", Command_JsonArrTrim, cmdflg_fast_write, + 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.ARRTRIM."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.ARRTRIM"), cat_fast_write) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.ARRTRIM."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.CLEAR", Command_JsonClear, cmdflg_fast_write, + 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.CLEAR."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.CLEAR"), cat_fast_write) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.CLEAR."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.ARRINDEX", Command_JsonArrIndex, cmdflg_readonly, + 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.ARRINDEX."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.ARRINDEX"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.ARRINDEX."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.TYPE", Command_JsonType, cmdflg_readonly, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.TYPE."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.TYPE"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.TYPE."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.RESP", Command_JsonResp, cmdflg_readonly, 1, 1, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.RESP."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.RESP"), cat_readonly) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.RESP."); + return VALKEYMODULE_ERR; + } + + if (ValkeyModule_CreateCommand(ctx, "JSON.DEBUG", NULL, cmdflg_debug, 0, 0, 0) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_SetCommandACLCategories(ValkeyModule_GetCommand(ctx,"JSON.DEBUG"), cat_debug) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to set command category for JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + + + // Register JSON.DEBUG subcommands + ValkeyModuleCommand *parent = ValkeyModule_GetCommand(ctx, "JSON.DEBUG"); + if (ValkeyModule_CreateSubcommand(parent, "MEMORY", Command_JsonDebug, "", 2, 2, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand MEMORY for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateSubcommand(parent, "DEPTH", Command_JsonDebug, "", 2, 2, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand DEPTH for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateSubcommand(parent, "FIELDS", Command_JsonDebug, "", 2, 2, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand FIELDS for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateSubcommand(parent, "HELP", Command_JsonDebug, "", 0, 0, 0) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand HELP for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateSubcommand(parent, "MAX-DEPTH-KEY", Command_JsonDebug, "", 0, 0, 0) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand MAX-DEPTH-KEY for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateSubcommand(parent, "MAX-SIZE-KEY", Command_JsonDebug, "", 0, 0, 0) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand MAX-SIZE-KEY for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateSubcommand(parent, "KEYTABLE-CHECK", Command_JsonDebug, "", 0, 0, 0) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand KEYTABLE-CHECK for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateSubcommand(parent, "KEYTABLE-CORRUPT", Command_JsonDebug, "", 2, 2, 1) == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand KEYTABLE-CORRUPT for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + if (ValkeyModule_CreateSubcommand(parent, "KEYTABLE-DISTRIBUTION", Command_JsonDebug, "", 0, 0, 0) + == VALKEYMODULE_ERR) { + ValkeyModule_Log(ctx, "warning", "Failed to create subcommand KEYTABLE-DISTRIBUTION for command JSON.DEBUG."); + return VALKEYMODULE_ERR; + } + + // key-spec flags categories + const uint64_t ks_read_write_update = VALKEYMODULE_CMD_KEY_RW | VALKEYMODULE_CMD_KEY_UPDATE; + const uint64_t ks_read_write_insert = VALKEYMODULE_CMD_KEY_RW | VALKEYMODULE_CMD_KEY_INSERT; + const uint64_t ks_read_write_delete = VALKEYMODULE_CMD_KEY_RW | VALKEYMODULE_CMD_KEY_DELETE; + + const uint64_t ks_read_write_access_update = ks_read_write_update | VALKEYMODULE_CMD_KEY_ACCESS; + const uint64_t ks_read_write_access_delete = ks_read_write_delete | VALKEYMODULE_CMD_KEY_ACCESS; + + const uint64_t ks_read_only = VALKEYMODULE_CMD_KEY_RO; + const uint64_t ks_read_only_access = VALKEYMODULE_CMD_KEY_RO | VALKEYMODULE_CMD_KEY_ACCESS; + + // Commands under RW + Update + if (!set_command_info(ctx, "JSON.SET", -4, ks_read_write_update, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + // Commands under RW + Insert + if (!set_command_info(ctx, "JSON.ARRAPPEND", -4, ks_read_write_insert, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.STRAPPEND", -3, ks_read_write_insert, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.ARRINSERT", -5, ks_read_write_insert, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + // Commands under RW + Delete + if (!set_command_info(ctx, "JSON.DEL", -2, ks_read_write_delete, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.FORGET", -2, ks_read_write_delete, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.ARRTRIM", 5, ks_read_write_delete, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.CLEAR", -2, ks_read_write_delete, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + // Commands under RW + Access + Update + if (!set_command_info(ctx, "JSON.NUMINCRBY", 4, ks_read_write_access_update, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.NUMMULTBY", 4, ks_read_write_access_update, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.TOGGLE", -2, ks_read_write_access_update, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + // Commands under RW + Access + Delete + if (!set_command_info(ctx, "JSON.ARRPOP", -2, ks_read_write_access_delete, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + + // Commands under RO + Access + if (!set_command_info(ctx, "JSON.GET", -2, ks_read_only_access, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.MGET", -3, ks_read_only_access, 1, std::make_tuple(-2, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.OBJKEYS", -2, ks_read_only_access, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.ARRINDEX", -4, ks_read_only_access, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.TYPE", -2, ks_read_only_access, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.RESP", -2, ks_read_only_access, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + + // Commands under RO + if (!set_command_info(ctx, "JSON.STRLEN", -2, ks_read_only, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.OBJLEN", -2, ks_read_only, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.ARRLEN", -2, ks_read_only, 1, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + + // JSON.DEBUG and its sub-commands + if (!set_command_info(ctx, "JSON.DEBUG", -2)) return VALKEYMODULE_ERR; + if (!set_command_info(ctx, "JSON.DEBUG|MEMORY", -3, ks_read_only_access, 2, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.DEBUG|FIELDS", -3, ks_read_only_access, 2, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.DEBUG|DEPTH", 3, ks_read_only_access, 2, std::make_tuple(0, 1, 0))) { + return VALKEYMODULE_ERR; + } + if (!set_command_info(ctx, "JSON.DEBUG|HELP", 2)) return VALKEYMODULE_ERR; + // admin commands + if (!set_command_info(ctx, "JSON.DEBUG|MAX-DEPTH-KEY", 2)) return VALKEYMODULE_ERR; + if (!set_command_info(ctx, "JSON.DEBUG|MAX-SIZE-KEY", 2)) return VALKEYMODULE_ERR; + if (!set_command_info(ctx, "JSON.DEBUG|KEYTABLE-CHECK", 2)) return VALKEYMODULE_ERR; + if (!set_command_info(ctx, "JSON.DEBUG|KEYTABLE-CORRUPT", 3)) return VALKEYMODULE_ERR; + if (!set_command_info(ctx, "JSON.DEBUG|KEYTABLE-DISTRIBUTION", 3)) return VALKEYMODULE_ERR; + + if (!memory_traps_control(false)) { + ValkeyModule_Log(ctx, "warning", "Failed to setup memory trap control"); + return VALKEYMODULE_ERR; + } + + // + // Setup the global string table + // + initKeyTable(KeyTable::MAX_SHARDS, KeyTable::Factors()); + if (registerModuleConfigs(ctx) == VALKEYMODULE_ERR) return VALKEYMODULE_ERR; + + return VALKEYMODULE_OK; +} diff --git a/src/json/json.h b/src/json/json.h new file mode 100644 index 0000000..fa46123 --- /dev/null +++ b/src/json/json.h @@ -0,0 +1,22 @@ +#ifndef VALKEYJSONMODULE_JSON_H_ +#define VALKEYJSONMODULE_JSON_H_ + +#include + +size_t json_get_max_document_size(); +size_t json_get_defrag_threshold(); +size_t json_get_max_path_limit(); +size_t json_get_max_parser_recursion_depth(); +size_t json_get_max_recursive_descent_tokens(); +size_t json_get_max_query_string_size(); + +bool json_is_instrument_enabled_insert(); +bool json_is_instrument_enabled_update(); +bool json_is_instrument_enabled_delete(); +bool json_is_instrument_enabled_dump_doc_before(); +bool json_is_instrument_enabled_dump_doc_after(); +bool json_is_instrument_enabled_dump_value_before_delete(); + +#define DOUBLE_CHARS_CUTOFF 24 + +#endif // VALKEYJSONMODULE_JSON_H_ diff --git a/src/json/json_api.cc b/src/json/json_api.cc new file mode 100644 index 0000000..dc88b56 --- /dev/null +++ b/src/json/json_api.cc @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include "json/json_api.h" +#include "json/dom.h" +#include "json/memory.h" + +extern ValkeyModuleType* DocumentType; + +int is_json_key(ValkeyModuleCtx *ctx, ValkeyModuleKey *key) { + VALKEYMODULE_NOT_USED(ctx); + if (key == nullptr || ValkeyModule_KeyType(key) == VALKEYMODULE_KEYTYPE_EMPTY) return 0; + return (ValkeyModule_ModuleTypeGetType(key) == DocumentType? 1: 0); +} + +int is_json_key2(ValkeyModuleCtx *ctx, ValkeyModuleString *keystr) { + ValkeyModuleKey *key = static_cast(ValkeyModule_OpenKey(ctx, keystr, VALKEYMODULE_READ)); + int is_json = is_json_key(ctx, key); + ValkeyModule_CloseKey(key); + return is_json; +} + +static JDocument* get_json_document(ValkeyModuleCtx *ctx, const char *keyname, const size_t key_len) { + ValkeyModuleString *keystr = ValkeyModule_CreateString(ctx, keyname, key_len); + ValkeyModuleKey *key = static_cast(ValkeyModule_OpenKey(ctx, keystr, VALKEYMODULE_READ)); + if (!is_json_key(ctx, key)) { + ValkeyModule_CloseKey(key); + ValkeyModule_FreeString(ctx, keystr); + return nullptr; + } + JDocument *doc = static_cast(ValkeyModule_ModuleTypeGetValue(key)); + ValkeyModule_CloseKey(key); + ValkeyModule_FreeString(ctx, keystr); + return doc; +} + +int get_json_value_type(ValkeyModuleCtx *ctx, const char *keyname, const size_t key_len, const char *path, + char **type, size_t *len) { + *type = nullptr; + *len = 0; + JDocument *doc = get_json_document(ctx, keyname, key_len); + if (doc == nullptr) return -1; + + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_value_type(doc, path, vec, is_v2_path); + if (rc != JSONUTIL_SUCCESS || vec.empty()) return -1; + *type = static_cast(ValkeyModule_Alloc(vec[0].length() + 1)); + *len = vec[0].length(); + snprintf(*type, *len + 1, "%s", vec[0].c_str()); + return 0; +} + +int get_json_value(ValkeyModuleCtx *ctx, const char *keyname, const size_t key_len, const char *path, + char **value, size_t *len) { + *value = nullptr; + *len = 0; + JDocument *doc = get_json_document(ctx, keyname, key_len); + if (doc == nullptr) return -1; + + rapidjson::StringBuffer buf; + JsonUtilCode rc = dom_get_value_as_str(doc, path, nullptr, buf, false); + if (rc != JSONUTIL_SUCCESS) return -1; + *len = buf.GetLength(); + *value = static_cast(ValkeyModule_Alloc(*len + 1)); + snprintf(*value, *len + 1, "%s", buf.GetString()); + return 0; +} + +int get_json_values_and_types(ValkeyModuleCtx *ctx, const char *keyname, const size_t key_len, const char **paths, + const int num_paths, char ***values, size_t **lengths, char ***types, size_t **type_lengths) { + ValkeyModule_Assert(values != nullptr); + ValkeyModule_Assert(lengths != nullptr); + *values = nullptr; + *lengths = nullptr; + if (types != nullptr) *types = nullptr; + if (type_lengths != nullptr) *type_lengths = nullptr; + JDocument *doc = get_json_document(ctx, keyname, key_len); + if (doc == nullptr) return -1; + + *values = static_cast(ValkeyModule_Alloc(num_paths * sizeof(char *))); + *lengths = static_cast(ValkeyModule_Alloc(num_paths * sizeof(size_t))); + memset(*values, 0, num_paths * sizeof(char *)); + memset(*lengths, 0, num_paths * sizeof(size_t)); + for (int i = 0; i < num_paths; i++) { + rapidjson::StringBuffer buf; + JsonUtilCode rc = dom_get_value_as_str(doc, paths[i], nullptr, buf, false); + if (rc == JSONUTIL_SUCCESS) { + (*lengths)[i] = buf.GetLength(); + (*values)[i] = static_cast(ValkeyModule_Alloc((*lengths)[i] + 1)); + snprintf((*values)[i], (*lengths)[i] + 1, "%s", buf.GetString()); + } else { + (*values)[i] = nullptr; + } + } + + if (types != nullptr) { + ValkeyModule_Assert(type_lengths != nullptr); + + *types = static_cast(ValkeyModule_Alloc(num_paths * sizeof(char *))); + *type_lengths = static_cast(ValkeyModule_Alloc(num_paths * sizeof(size_t))); + memset(*types, 0, num_paths * sizeof(char *)); + memset(*type_lengths, 0, num_paths * sizeof(size_t)); + for (int i = 0; i< num_paths; i++) { + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_value_type(doc, paths[i], vec, is_v2_path); + if (rc == JSONUTIL_SUCCESS && !vec.empty()) { + (*type_lengths)[i] = vec[0].length(); + (*types)[i] = static_cast(ValkeyModule_Alloc((*type_lengths)[i] + 1)); + snprintf((*types)[i], (*type_lengths)[i] + 1, "%s", vec[0].c_str()); + } else { + (*types)[i] = nullptr; + } + } + } + return 0; +} diff --git a/src/json/json_api.h b/src/json/json_api.h new file mode 100644 index 0000000..6ac3630 --- /dev/null +++ b/src/json/json_api.h @@ -0,0 +1,70 @@ +/** + * JSON C API for Search Module + */ +#ifndef VALKEYJSONMODULE_JSON_API_H_ +#define VALKEYJSONMODULE_JSON_API_H_ + +#include + +typedef struct ValkeyModuleCtx ValkeyModuleCtx; +typedef struct ValkeyModuleKey ValkeyModuleKey; +typedef struct ValkeyModuleString ValkeyModuleString; + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Is it a JSON key? + */ +int is_json_key(ValkeyModuleCtx *ctx, ValkeyModuleKey *key); + +/** + * Another version of is_json_key, given key name as ValkeyModuleString + */ +int is_json_key2(ValkeyModuleCtx *ctx, ValkeyModuleString *keystr); + +/** + * Get the type of the JSON value at the path. The path is expected to point to a single value. + * If multiple values match the path, only the type of the first one is returned. + * + * @type output param, JSON type. The caller is responsible for freeing the memory. + * @len output param, length of JSON type. + * @return 0 - success, 1 - error + */ +int get_json_value_type(ValkeyModuleCtx *ctx, const char *keyname, const size_t key_len, const char *path, + char **type, size_t *len); + +/** + * Get serialized JSON value at the path. The path is expected to point to a single value. + * If multiple values match the path, only the first one is returned. + * + * @value output param, serialized JSON string. The caller is responsible for freeing the memory. + * @len output param, length of JSON string. + * @return 0 - success, 1 - error + */ +int get_json_value(ValkeyModuleCtx *ctx, const char *keyname, const size_t key_len, const char *path, + char **value, size_t *len); + +/** + * Get serialized JSON values and JSON types at multiple paths. Each path is expected to point to a single value. + * If multiple values match the path, only the first one is returned. + * + * @values Output param, array of JSON strings. + * The caller is responsible for freeing the memory: the array '*values' as well as all the strings '(*values)[i]'. + * @lengths Output param, array of lengths of each JSON string. + * The caller is responsible for freeing the memory: the array '*lengths'. + * @types Output param, array of types as strings. The caller is responsible for freeing the memory. + * The caller is responsible for freeing the memory: the array '*types' as well as all the strings '(*types)[i]'. + * @type_lengths Output param, array of lengths of each type string. + * The caller is responsible for freeing the memory: the array '*type_lengths'. + * @return 0 - success, 1 - error + */ +int get_json_values_and_types(ValkeyModuleCtx *ctx, const char *keyname, const size_t key_len, const char **paths, + const int num_paths, char ***values, size_t **lengths, char ***types, size_t **type_lengths); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/json/keytable.cc b/src/json/keytable.cc new file mode 100644 index 0000000..4fddd3d --- /dev/null +++ b/src/json/keytable.cc @@ -0,0 +1,659 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include <./include/valkeymodule.h> +} + +#define KEYTABLE_ASSERT ValkeyModule_Assert +#include + +/*************************************************************************************************** + * + * The shard implements a hashtable of entries. Each entry consists of a pointer to a unique string. + * + * We implement open addressing using linear probing (see https://en.wikipedia.org/wiki/Linear_probing) + * See the text for details of the search, insert and deletion algorithms. + * + * A hash table is a vector of pointers to KeyTable_Layout objects. Thus for insertion, searching + * and deletion only a single hashing of the key passed in as a parameter is required, no subsequent + * hash computations are done. + * + * The rehash algorithm needs the hash value for each key in order to insert it into the hash table. + * For simplicity, rehashing is done synchronously. + * + * If the new table size is less than 2^19, we store the low order 19 bits of the original hash value in + * the hash-table entry itself (since it's a pointer) and just use that. Thus the only memory associated + * with the new and old hash tables needs to be accessed, reducing the cache footprint. + * + * If the new table size is greater than 2^19, we're out of bits in the pointer so we have to use the + * full value of the hash. That hash value is not recomputed, because we saved it when we generated + * the entry originally (see KeyTable_Layout), so it's just fetched. But fetching of the hash value will + * cause an extra cache miss, further increasing the cost of a rehash for large tables. + * + * There's a trade-off between the number of shards and the size of each shard hashtable. We really want + * to keep the shard hashtable below 2^19 so that rehashes are fast. Thus when a table size grows to + * be larger than 2^19, we put out a warning into the log that performance would suffer and we should + * increase the number of shards. Someday we could hook this up to an alarm. + * + */ + +// non-constant so unit tests can control it +size_t MAX_FAST_TABLE_SIZE = PtrWithMetaData::METADATA_MASK + 1; + +struct KeyTable_Shard { + typedef PtrWithMetaData EntryType; + size_t capacity; // Number of entries in table + size_t size; // Number of current entries + size_t bytes; // number of bytes of all current entries + size_t handles; // number of handles outstanding + size_t maxSearch; // Max length of a search, since last read + EntryType *entries; // Array of String Entries + std::mutex mutex; // lock for this shard, mutable for "validate" + uint32_t rehashes; // number of rehashes, since last read + static constexpr size_t MIN_TABLE_SIZE = 4; + + // + // This logic implements the optimization that for Fast tables, we just get the low 19-bits of + // the original hash value. Thereby avoiding an extra cache hit to fetch it from the Key itself + // + size_t getHashValueFromEntry(const EntryType& e) const { + size_t hashValue; + if (capacity < MAX_FAST_TABLE_SIZE) { + hashValue = e.getMetaData(); + } else { + hashValue = e->getOriginalHash(); + KEYTABLE_ASSERT((hashValue & EntryType::METADATA_MASK) == e.getMetaData()); + } + return hashValue; + } + + void makeTable(const KeyTable& t, size_t newCapacity) { + newCapacity = std::max(newCapacity, MIN_TABLE_SIZE); + KEYTABLE_ASSERT(newCapacity != capacity); // oops full or empty. + capacity = newCapacity; + entries = new (t.malloc(capacity * sizeof(EntryType))) EntryType[capacity]; + } + + KeyTable_Shard() : mutex() { + capacity = 0; + size = 0; + bytes = 0; + handles = 0; + entries = nullptr; + rehashes = 0; + maxSearch = 0; + } + + // + // The only real use for this is in the unit tests. + // We scan the table to make sure all keys are gone. + // + void destroy(KeyTable& t) { + MEMORY_VALIDATE(entries); + for (size_t i = 0; i < capacity; ++i) { + if (entries[i]) { + KEYTABLE_ASSERT(entries[i]->IsStuck()); + t.free(&*entries[i]); + } + entries[i].clear(); + } + t.free(entries); + entries = nullptr; + } + + ~KeyTable_Shard() { + KEYTABLE_ASSERT(entries == nullptr); + } + + float loadFactor() { return float(size) / float(capacity); } + + size_t hashIndex(size_t hash) const { return hash % capacity; } + + KeyTable_Layout *insert(KeyTable& t, size_t hsh, const char *ptr, size_t len, bool noescape) { + std::scoped_lock lck(mutex); + while (loadFactor() > t.getFactors().maxLoad) { + // + // Oops, table too full, resize it larger. + // + size_t newSize = capacity + std::max(size_t(capacity * t.getFactors().grow), size_t(1)); + resizeTable(t, newSize); + if (newSize >= MAX_FAST_TABLE_SIZE) { + ValkeyModule_Log(nullptr, "warning", + "Fast KeyTable Shard size exceeded, increase " + "json.key-table-num-shards to improve performance"); + } + } + size_t ix = hashIndex(hsh); + size_t metadata = hsh & EntryType::METADATA_MASK; + MEMORY_VALIDATE(entries); + for (size_t searches = 0; searches < capacity; ++searches) { + EntryType &entry = entries[ix]; + if (!entry) { + // + // Empty, insert it here. + // + handles++; + size++; + bytes += len; + maxSearch = std::max(searches, maxSearch); + + KeyTable_Layout *p = KeyTable_Layout::makeLayout(t.malloc, ptr, len, hsh, noescape); + entry = EntryType(p, metadata); + return p; + } else if (entry.getMetaData() == metadata && // Early out, don't hit the cache line.... + len == entry->getLength() && + 0 == std::memcmp(ptr, entry->getText(), len)) { + // + // easy case. String already present, just bump the refcount and we're done. + // Use saturating arithmetic so it never fails. If you manage to legitimately + // have a reuse count > 2^29 then you'll never be able to recover the memory + // from that string. But who cares? + // + maxSearch = std::max(searches, maxSearch); + handles++; + if (entry->incrRefCount()) { + t.stuckKeys++; + } + return &*entry; + } + if (++ix >= capacity) { + ix = 0; + } + } + KEYTABLE_ASSERT(false); + return nullptr; + } + + // + // forward distance is defined as the number of increments (wrapping around) it takes to go + // from "from" to "to". Accounting for wrap-around + // + size_t forward_distance(size_t from, size_t to) { + size_t result; + if (from <= to) { + result = to - from; + } else { + result = (to + capacity) - from; + } + KEYTABLE_ASSERT(result < capacity); + return result; + } + + KeyTable_Layout *clone(KeyTable& t, const KeyTable_Handle& h) { + std::scoped_lock lck(mutex); + handles++; + if (h->incrRefCount()) { + t.stuckKeys++; + } + return const_cast(&*h); + } + + void destroyHandle(const KeyTable& t, KeyTable_Handle& h, size_t hsh) { + std::scoped_lock lck(mutex); + handles--; + if (h->decrRefCount() > 0) { + h.clear(); // Kill the handle + return; // Easy case, still referenced. + } + // + // Ok, we need to remove this string from the hashtable. + // + size_t ix = hashIndex(hsh); + MEMORY_VALIDATE(entries); + for (size_t searches = 0; searches < capacity; ++searches) { + if (&*entries[ix] == &*h) { + // + // Found it!!! + // Update stats, nuke the handle and recover the space. + // + KEYTABLE_ASSERT(entries[ix].getMetaData() == (hsh & EntryType::METADATA_MASK)); + KEYTABLE_ASSERT(entries[ix]->getRefCount() == 0); + KEYTABLE_ASSERT(size > 0); + KEYTABLE_ASSERT(bytes >= h->getLength()); + bytes -= h->getLength(); + size--; + h.theHandle->poisonOriginalHash(); + t.free(&*h.theHandle); + h.clear(); // Kill the handle + entries[ix].clear(); + // + // Now reestablish the invariant of the algorithm by scanning forward until + // we hit another empty cell. While we're scanning we may have to move keys down + // into the newly freed slot. + // + size_t empty_ix = ix; // Remember where the empty slot is. + if (++ix >= capacity) ix = 0; // Next entry + while (entries[ix]) { + KEYTABLE_ASSERT(!entries[empty_ix]); + KEYTABLE_ASSERT(empty_ix != ix); + searches++; + // + // This non-empty key might have to be moved down to the empty slot. + // That happens if the forward_distance of the empty slot is less than + // The forward_distance of the current slot to the native slot for this key. + // + size_t nativeSlot = hashIndex(getHashValueFromEntry(entries[ix])); + if (forward_distance(nativeSlot, ix) > forward_distance(nativeSlot, empty_ix)) { + // Yes, this key can be moved. + entries[empty_ix] = entries[ix]; + entries[ix].clear(); + empty_ix = ix; + } + if (++ix >= capacity) ix = 0; + } + maxSearch = std::max(searches, maxSearch); + // + // Having removed an entry, check for rehashing + // + if (loadFactor() < t.getFactors().minLoad && capacity > MIN_TABLE_SIZE) { + size_t reduction = std::max(size_t(capacity * t.getFactors().shrink), size_t(1)); + resizeTable(t, capacity - reduction); + } + return; + } + if (++ix >= capacity) { + ix = 0; + } + } + KEYTABLE_ASSERT(false); // Not found ???? + } + + void resizeTable(const KeyTable& t, size_t newSize) { + uint64_t startTime = ValkeyModule_Milliseconds(); + if (capacity == newSize) return; // Nothing to do. + KEYTABLE_ASSERT(newSize >= size); // Otherwise it won't fit. + rehashes++; + MEMORY_VALIDATE(entries); + EntryType *oldEntries = entries; + size_t oldCapacity = capacity; + makeTable(t, newSize); + for (size_t i = 0; i < oldCapacity; ++i) { + if (oldEntries[i]) { + // + // Found valid entry, Compute hash to see where it goes. + // + EntryType& oldEntry = oldEntries[i]; + KEYTABLE_ASSERT(oldEntry->getRefCount() > 0); + size_t ix = hashIndex(getHashValueFromEntry(oldEntry)); + for (size_t searches = 0; searches < capacity; ++searches) { + if (!entries[ix]) { + // + // Empty, insert it. + // + entries[ix] = oldEntry; + maxSearch = std::max(searches, maxSearch); + goto nextOldEntry; + } + if (++ix >= capacity) ix = 0; + } + KEYTABLE_ASSERT(false); // can't fail if + } + nextOldEntry:{} + } + t.free(oldEntries); + uint64_t duration = ValkeyModule_Milliseconds() - startTime; + if (duration == 0) duration = 1; + uint64_t keys_per_second = (size / duration) * 1000; + ValkeyModule_Log(nullptr, "notice", + "Keytable Resize to %zu completed in %lu ms (%lu / sec)", + capacity, duration, keys_per_second); + } + + // + // Validate all of the entries and the counters. Unit test stuff. + // + std::string validate(const KeyTable& t, size_t shardNumber) const { + std::scoped_lock lck(const_cast(this->mutex)); // Cheat on the mutex + size_t this_refs = 0; + size_t this_size = 0; + size_t this_bytes = 0; + for (size_t i = 0; i < capacity; ++i) { + EntryType e = entries[i]; + if (e) { + this_size++; + this_refs += e->getRefCount(); + this_bytes += e->getLength(); + size_t orig_hash = t.hash(e->getText(), e->getLength()); + size_t correct_metadata = orig_hash & EntryType::METADATA_MASK; + size_t nativeIx = hashIndex(orig_hash); + // Validate the metadata field + if (e.getMetaData() != correct_metadata) { + std::ostringstream os; + os << "Found bad metadata in slot " << i << " Metadata:" << e.getMetaData() + << " Where it should be: " << correct_metadata << " Hash:" << orig_hash + << " TableSize:" << capacity; + return os.str(); + } + // + // Check the Invariant. If this entry isn't in its original slot (hashIndex) then + // none of the locations between the original slot and this one may be empty. + // + for (size_t ix = nativeIx; ix != i;) { + if (!entries[ix]) { + // Error + std::ostringstream os; + os << "Found invalid empty location at slot " << ix << " While validating" + << " key in slot " << i << " From NativeSlot:" << nativeIx + << " TableSize:" << capacity; + return os.str(); + } + if (++ix >= capacity) ix = 0; + } + } + } + // compare the counts. The summed refcounts only match handle counts if no stuck strings + if (this_size != size || + (t.stuckKeys == 0 ? this_refs != handles : false) || + this_bytes != bytes) { + std::ostringstream os; + os << "Count mismatch for shard: " << shardNumber << " Capacity:" << capacity + << " Handles:" << handles << " sum(refcounts):" << this_refs + << " Size:" << size << " this_size:" << this_size + << " Bytes:" << bytes << " this_bytes:" << this_bytes; + return os.str(); + } + return std::string(); // Empty means no failure. + } + std::string validate_counts(std::unordered_map& counts) const { + std::string result; + std::scoped_lock lck(const_cast(this->mutex)); // Cheat on the mutex + for (size_t i = 0; i < capacity; ++i) { + EntryType e = entries[i]; + if (e) { + if (counts[&*e] != e->getRefCount()) { + std::ostringstream os; + os + << "Found bad count for key: " << e->getText() + << " Found: " << e->getRefCount() + << " Expected:" << counts[&*e] + << "\n"; + result += os.str(); + } else { + counts.erase(&*e); + } + } + } + return result; // Empty means no failures found. + } + // Add our stats to the total so far. + void updateStats(KeyTable::Stats& s) { + std::scoped_lock lck(mutex); + s.size += size; + s.bytes += bytes; + s.handles += handles; + s.maxTableSize = std::max(s.maxTableSize, capacity); + s.minTableSize = std::min(s.minTableSize, capacity); + s.totalTable += capacity; + s.rehashes += rehashes; + s.maxSearch = std::max(s.maxSearch, maxSearch); + // + // Reset the counters + // + maxSearch = 0; + rehashes = 0; + } + // Add our stats + void updateLongStats(KeyTable::LongStats& s, size_t topN) { + std::scoped_lock lck(mutex); + size_t thisRun = 0; + for (size_t i = 0; i < capacity; ++i) { + if (entries[i]) { + thisRun++; + } else if (thisRun != 0) { + s.runs[thisRun]++; + while (s.runs.size() > topN) { + s.runs.erase(s.runs.begin()); + } + thisRun = 0; + } + } + } +}; + +/* + * Setup the KeyTable itself. + */ +KeyTable::KeyTable(const Config& cfg) : + malloc(cfg.malloc), + free(cfg.free), + hash(cfg.hash), + numShards(cfg.numShards), + stuckKeys(0) +{ + KEYTABLE_ASSERT(numShards > 0); + KEYTABLE_ASSERT(malloc && free && hash); + shards = new(malloc(numShards * sizeof(KeyTable_Shard))) KeyTable_Shard[numShards]; + for (size_t i = 0; i < numShards; ++i) shards[i].makeTable(*this, 1); + KEYTABLE_ASSERT(!isValidFactors(factors)); +} + +KeyTable::~KeyTable() { + MEMORY_VALIDATE(shards); + for (size_t i = 0; i < numShards; ++i) { + shards[i].destroy(*this); + shards[i].~KeyTable_Shard(); + } + free(shards); + shards = nullptr; +} + +std::string KeyTable::validate() const { + std::string s; + for (size_t i = 0; i < numShards; ++i) { + s += shards[i].validate(*this, i); + } + return s; +} + +std::string KeyTable::validate_counts(std::unordered_map& counts) const { + std::string result; + result = validate(); + if (!result.empty()) return result; + // + // Now, we need to double compare the current keytable against the counts array. + // We scan the KeyTables and lookup each handle as we go, erasing it from the input counts map. + // If at the end, there are any counts entries left, then we definitely have a problem. + // + for (size_t i = 0; i < numShards; ++i) { + result += shards[i].validate_counts(counts); + } + if (!result.empty()) return result; + // + // Now validate we found everything + // + if (!counts.empty()) { + for (auto& c : counts) { + std::ostringstream os; + os << "Lingering Handle found: " << c.first->getText() << " Count:" << c.second << "\n"; + result += os.str(); + } + } + return result; +} + +KeyTable::Stats KeyTable::getStats() const { + + Stats s{}; + + // + // Global stats + // + s.stuckKeys = stuckKeys; + s.factors = getFactors(); + // + // Now sum up the per-shard stats + // + for (size_t i = 0; i < numShards; ++i) { + shards[i].updateStats(s); + } + return s; +} + +KeyTable::LongStats KeyTable::getLongStats(size_t topN) const { + LongStats s; + // + // Now sum up the per-shard stats + // + for (size_t i = 0; i < numShards; ++i) { + shards[i].updateLongStats(s, topN); + } + return s; +} + +/* + * Take 19 bits from hash, avoid the low end of the hash value as this is used for the per-shard index. + */ +size_t KeyTable::shardNumberFromHash(size_t hash) { + return (hash >> 40) % numShards; +} + +size_t KeyTable::hashcodeFromHash(size_t hash) { + return hash & KeyTable_Handle::MAX_HASHCODE; +} + +/* + * Upsert a string, returns a handle for this insertion. + * + * This function hashes the string and dispatches the operation to the appropriate shard. + */ +KeyTable_Handle KeyTable::makeHandle(const char *ptr, size_t len, bool noescape) { + size_t hsh = hash(ptr, len); + size_t shardNum = shardNumberFromHash(hsh); + KeyTable_Layout *s = shards[shardNum].insert(*this, hsh, ptr, len, noescape); + return KeyTable_Handle(s, hashcodeFromHash(hsh)); +} + +/* + * Clone an existing handle + */ +KeyTable_Handle KeyTable::clone(const KeyTable_Handle& h) { + size_t hsh = hash(h.GetString(), h.GetStringLength()); + size_t shardNum = shardNumberFromHash(hsh); + KeyTable_Layout *s = shards[shardNum].clone(*this, h); + return KeyTable_Handle(s, hashcodeFromHash(hsh)); +} + +/* + * Destroy a Handle. + * + * While technically, we don't have to hash the string to determine the shard, the shard-level + * destroy operation will need the hash, so we do it here for symmetry. + */ +void KeyTable::destroyHandle(KeyTable_Handle& h) { + if (!h) return; // Empty + size_t hsh = hash(h.GetString(), h.GetStringLength()); + KEYTABLE_ASSERT(!h->isPoisoned()); + KEYTABLE_ASSERT(hsh == h->getOriginalHash()); + size_t shardNum = shardNumberFromHash(hsh); + shards[shardNum].destroyHandle(*this, h, hsh); +} + +void KeyTable::setFactors(const Factors& f) { + KEYTABLE_ASSERT(!isValidFactors(f)); + // Grab all of the locks to ensure consistency + for (size_t i = 0; i < numShards; ++i) { + shards[i].mutex.lock(); + } + factors = f; + for (size_t i = 0; i < numShards; ++i) { + shards[i].mutex.unlock(); + } +} + +const char *KeyTable::isValidFactors(const Factors& f) { + // + // first the easy ones.... + // + if (f.minLoad <= 0) return "minLoad <= 0.0"; + if (f.maxLoad > 1.0f) return "maxLoad > 1.0"; + if (f.minLoad >= f.maxLoad) return "minLoad >= maxLoad"; + if (f.grow <= 0) return "Grow <= 0.0"; + if (f.shrink <= 0) return "Shrink <= 0.0"; + // + // The shrink factor requires additional validation because we want to make sure that + // rehash down will always succeed, i.e., you can't shrink TOO much or you're toast. + // (because it won't fit ;-)) + // + if (f.shrink > (1.0f - f.minLoad)) return "Shrink too large"; + return nullptr; // We're good !!! +} + +/****************************************************************************************** + * Implement KeyTable_Layout + * + * Efficiently store three quantities in sequential memory: RefCount, Length, Text[0..Length-1] + * + * We use either 1, 2, 3 or 4 bytes to store the length. + */ + +// Maximum legal refcount. 2^29-1 +static uint32_t MAX_REF_COUNT = 0x1FFFFFFF; + +bool KeyTable_Layout::IsStuck() const { + return refCount >= MAX_REF_COUNT; +} + +bool KeyTable_Layout::incrRefCount() const { + if (IsStuck()) { + return true; // Saturated + } else { + refCount++; + return false; + } +} + +size_t KeyTable_Layout::decrRefCount() const { + KEYTABLE_ASSERT(refCount > 0); + if (!IsStuck()) refCount--; + return refCount; +} + +size_t KeyTable_Layout::getLength() const { + // Length is stored in little-endian format + size_t len = 0; + for (size_t i = 0; i <= lengthBytes; ++i) { + len |= *reinterpret_cast(bytes+i) << (i * 8); + } + return len; +} + +const char *KeyTable_Layout::getText() const { + return bytes + lengthBytes + 1; +} + +KeyTable_Layout *KeyTable_Layout::makeLayout(void *(*malloc)(size_t), const char *ptr, size_t len, + size_t hash, bool noescape) { + size_t lengthBytes = (len <= 0xFF) ? 1 : + (len <= 0xFFFF) ? 2: + (len <= 0xFFFFFF) ? 3: + (len <= 0xFFFFFFFF) ? 4 : + 0; + KeyTable_Layout *p = reinterpret_cast(malloc( + sizeof(KeyTable_Layout) + lengthBytes + len)); + p->original_hash = hash; + p->noescapeFlag = noescape; + p->refCount = 1; + p->lengthBytes = lengthBytes - 1; + // store the length in little-endian format + for (size_t i = 0; i < lengthBytes; ++i) { + p->bytes[i] = len >> (8 * i); + } + std::memcpy(p->bytes + lengthBytes, ptr, len); + return p; +} + + +// Unit test only. +void KeyTable_Layout::setMaxRefCount(uint32_t maxRefCount) { + KEYTABLE_ASSERT(sizeof(KeyTable_Layout) == 5 + 8); + KEYTABLE_ASSERT(maxRefCount <= MAX_REF_COUNT); // can only shrink it. + MAX_REF_COUNT = maxRefCount; +} + diff --git a/src/json/keytable.h b/src/json/keytable.h new file mode 100644 index 0000000..f59efc3 --- /dev/null +++ b/src/json/keytable.h @@ -0,0 +1,386 @@ +#ifndef _KEYTABLE_H +#define _KEYTABLE_H + +/************************************************************************************************ + * + * The key table. This thread-safe object implements unique use-counted immutable strings. + * + * This object is a repository of immutable strings. You put a string into the repository + * and you get back an immutable handle (8-bytes). The handle can be cheaply de-referenced to yield the + * underlying string text (when needed). When you're done with the handle you give it back to the + * string table. So far, nothing special. + * + * The key table maintains a reference count for each string in the table AND it guarantees that + * each string in the table is unique. Further, two insertions of the same string will yield the + * same handle, meaning that once you've converted a string into a handle you can do equality + * comparisons on other strings simply by comparing the handles for equality. + * + * After initialization, there are only two operations on the global hashtable. + * + * (1) Insert string, return handle. (string is copied, the caller can discard his memory) + * (2) discard handle. + * + * Both operations are thread-safe and the handles are NOT locked to a thread. + * The handle represents a resource allocation within the table and thus every call to (1) must + * eventually have a call to (2) to release the resource (i.e., decrement the refcount) + * + * ********************************************************************************************** + */ + +/* + * IMPLEMENTATION + * + * Each unique string (with it's metadata, length & refcount) is stored in a separately malloc'ed + * chunk of memory. The handle contains a pointer to this data. Thus having obtained a handle, + * access to the underlying string is trivially cheap. Since the handle and the string itself are + * immutable, no thread locking need be done to access the data. + * + * A separate data structure (table & shards) contains a mapping table that implements the raw + * API to convert a string to a handle. That mapping table is sharded with each shard being + * protected by a mutex. A string is received and hashed. The hashcode selects the shard for that + * string. The shard is locked, the mapping table is consulted to locate a previous copy of the + * string. If the string is found, the refcount is incremented and a handle is constructed from + * the existing pointer and shard number. If the string isn't found, a new malloc string is created, + * the mapping is updated and a handle is constructed and returned. + * + * The mapping is implemented as a hashtable using linear hashing. Each hash table entry is simply + * the malloc'ed pointer and 19-bits of hash code. Various conditions can cause a rehashing event, + * rehashing is always done as a single operation on the entire hashtable while the mutex is held, + * i.e., there is no incremental re-hashing. This makes it very easy to ensure multi-thread correctness. + * Worst-case CPU loads due to rehashing are limited because the size of a shard hashtable is itself + * limited to 2^19 entries. You vary the number of shards to handle the worst-case number of strings + * in the table. + * + * The refcount for a string is currently fixed at 30-bits. Increment and decrement of the refcount + * is done with saturating arithmetic, meaning that if a string ever hits the maximum refcount it + * will never be deleted from the table. This isn't considered to be a problem. + * + */ + +#include +#include +#include +#include +#include +#include +#include "json/alloc.h" + +#ifndef KEYTABLE_ASSERT +#define KEYTABLE_ASSERT(x) RAPIDJSON_ASSERT(x) +#endif + +/* + * This class implements a pointer with up to 19 bits of additional metadata. On x86_64 and Aarch-64 + * There are 16 bits at the top of the pointer that are unused (guaranteed to be zero) and we assume + * that the pointer references malloc'ed memory with a minimum of 8-byte alignment, guaranteeing that + * another 3 bits of usable storage. + * + * Other versions of this class could be implemented for systems that don't meet the requirements + * above and simply store the metadata adjacent to a full pointer (i.e., 32-bit systems). + * + */ +template +class PtrWithMetaData { + public: + enum { METADATA_MASK = (1 << 19)-1 }; // Largest value that fits. + + const T& operator*() const { return *getPointer(); } + const T* operator->() const { return getPointer(); } + T& operator*() { return *getPointer(); } + T* operator->() { return getPointer(); } + + // + // It's "C"-ism, that you test pointers for null/!null by doing "if (ptr)" or "if (!ptr)" + // C++ considers that as a conversion to a boolean. Which is implemented by this operator + // To be clear, we include the Metadata in the comparison. + // + operator bool() const { return bits != 0; } // if (PtrWithMetaData) invokes this operator + + size_t getMetaData() const { return ror(bits, 48) & METADATA_MASK; } + void setMetaData(size_t metadata) { + KEYTABLE_ASSERT(0 == (metadata & ~METADATA_MASK)); + bits = (bits & PTR_MASK) | ror(metadata, 16); + } + PtrWithMetaData() : bits(0) {} + PtrWithMetaData(T *ptr, size_t metadata) { + KEYTABLE_ASSERT(0 == (~PTR_MASK & reinterpret_cast(ptr))); + KEYTABLE_ASSERT(0 == (metadata & ~METADATA_MASK)); + bits = reinterpret_cast(ptr) | ror(metadata, 16); + } + void clear() { bits = 0; } + // + // Comparison operations also include the metadata + // + bool operator==(const PtrWithMetaData& rhs) const { return bits == rhs.bits; } + bool operator!=(const PtrWithMetaData& rhs) const { return bits != rhs.bits; } + + friend std::ostream& operator<<(std::ostream& os, const PtrWithMetaData& ptr) { + return os << "Ptr:" << reinterpret_cast(&*ptr) << " MetaData:" << ptr.getMetaData(); + } + + void swap(PtrWithMetaData& rhs) { std::swap(bits, rhs.bits); } + + private: + size_t bits; + T* getPointer() const { return MEMORY_VALIDATE(reinterpret_cast(bits & PTR_MASK)); } + // Circular rotate right (count <= 64) + static constexpr size_t ror(size_t v, unsigned count) { + return (v >> count) | (v << (64-count)); + } + static const size_t PTR_MASK = ~ror(METADATA_MASK, 16); +}; + +// +// This is the struct that's accessed by dereferencing a KeyTable_Handle. +// Normal users should only look at len and text fields -- and consider them immutable. +// For Normal users, the only statement about refcount is that it will be non-zero as long as +// any handle exists. +// +// Privileged unit tests look at the refcount field also... +// +struct KeyTable_Layout { + // + // Create a string layout. allocates some memory + // + static KeyTable_Layout *makeLayout(void *(*malloc)(size_t), const char *ptr, + size_t len, size_t hash, bool noescape); + // + // Interrogate existing layout + // + size_t getRefCount() const { return refCount; } + size_t getLength() const; + const char *getText() const; + bool IsStuck() const; + bool getNoescape() const { return noescapeFlag != 0; } + enum { POISON_VALUE = 0xdeadbeeffeedfeadull }; + size_t getOriginalHash() const { return original_hash; } + void poisonOriginalHash() { original_hash = POISON_VALUE; } + bool isPoisoned() const { return original_hash == POISON_VALUE; } + // Unit test + static void setMaxRefCount(uint32_t maxRefCount); + + protected: + KeyTable_Layout(); // Nobody gets to create one. + friend class KeyTable_Shard; // Only class allowed to manipulate reference count + bool incrRefCount() const; // true => saturated + size_t decrRefCount() const; // returns current count + size_t original_hash; // Remember original hash + mutable uint32_t refCount:29; // Ref count. + uint32_t noescapeFlag:1; // String doesn't need to be escaped + uint32_t lengthBytes:2; // 0, 1, 2 or 3 => 1, 2, 3 or 4 bytes of length + char bytes[1]; // length bytes + text bytes +} __attribute__((packed)); // Don't let compiler round size of up 8 bytes. + +struct KeyTable_Handle { + /***************************** Public Handle Interface *******************************/ + // + // get a pointer to the text of the string. This pointer has the same lifetime as the + // string_table_handle object itself. + // + const KeyTable_Layout& operator*() const { return *theHandle; } + const KeyTable_Layout* operator->() const { return &*theHandle; } + const char *GetString() const { return theHandle->getText(); } + size_t GetStringLength() const { return theHandle->getLength(); } + const std::string_view GetStringView() const + { return std::string_view(theHandle->getText(), theHandle->getLength()); } + size_t GetHashcode() const { return theHandle.getMetaData(); } + bool IsNoescape() const { return theHandle->getNoescape(); } + + enum { MAX_HASHCODE = PtrWithMetaData::METADATA_MASK }; + // + // Assignment is only allowed into a empty handle. + // + KeyTable_Handle& operator=(const KeyTable_Handle& rhs) { + KEYTABLE_ASSERT(!theHandle); + theHandle = rhs.theHandle; + const_cast(rhs).theHandle.clear(); + return *this; + } + // + // Do assignment into raw storage + // + void RawAssign(const KeyTable_Handle& rhs) { + theHandle = rhs.theHandle; + const_cast(rhs).theHandle.clear(); + } + // + // move semantics are allowed + // + KeyTable_Handle(KeyTable_Handle&& rhs) { + theHandle = rhs.theHandle; + rhs.theHandle.clear(); + } + // + // Comparison + // + bool operator==(const KeyTable_Handle& rhs) const { return theHandle == rhs.theHandle; } + bool operator!=(const KeyTable_Handle& rhs) const { return theHandle != rhs.theHandle; } + + operator bool() const { return bool(theHandle); } + + friend std::ostream& operator<<(std::ostream& os, const KeyTable_Handle& h) { + return os << "Handle:" << reinterpret_cast(&*(h.theHandle)) + << " Shard:" << h.theHandle.getMetaData() + << " RefCount: " << h->getRefCount() + << " : " << h.GetStringView(); + } + + KeyTable_Handle() : theHandle() {} + ~KeyTable_Handle() { KEYTABLE_ASSERT(!theHandle); } + + void Swap(KeyTable_Handle& rhs) { + theHandle.swap(rhs.theHandle); + } + + private: + friend class KeyTable; + friend struct KeyTable_Shard; + + KeyTable_Handle(KeyTable_Layout *ptr, size_t hashCode) : theHandle(ptr, hashCode) {} + void clear() { theHandle.clear(); } + + PtrWithMetaData theHandle; // The only actual data here. +}; + +/* + * This is the core hashtable, it's invisible externally + */ +struct KeyTable_Shard; + +struct KeyTable { + /*************************** External Table Interface *********************************/ + + enum { MAX_SHARDS = KeyTable_Handle::MAX_HASHCODE, MIN_SHARDS = 1 }; + + + // + // Stuff to create a table. These get copied and can't be changed without + // recreating the entire table. + // + struct Config { + void *(*malloc)(size_t); // Use this to allocate memory + void (*free)(void*); // Use this to free memory + size_t (*hash)(const char *, size_t); // Hash function for strings + size_t numShards; // Number of shards to create + }; + // + // Construct a table. + // + explicit KeyTable(const Config& cfg); + ~KeyTable(); + // + // Make a handle for this string. The target string is copied when necessary. + // + KeyTable_Handle makeHandle(const char *ptr, size_t len, bool noescape = false); + KeyTable_Handle makeHandle(const std::string& s, bool noescape = false) { + return makeHandle(s.c_str(), s.length(), noescape); + } + KeyTable_Handle makeHandle(const std::string_view& s, bool noescape = false) { + return makeHandle(s.data(), s.length(), noescape); + } + + KeyTable_Handle clone(const KeyTable_Handle& rhs); + // + // Destroy a handle + // + void destroyHandle(KeyTable_Handle &h); + // + // Some of the configuration variables can be changed dynamically. + // + struct Factors { + float minLoad; // LoadFactor() < minLoad => rehash down + float maxLoad; // LoadFactor() > maxLoad => rehash up + float shrink; // % to shrink by + float grow; // % to grow by + Factors() : + // Default Factors for the hash table + minLoad(0.25f), // minLoad => .25 + maxLoad(0.85f), // maxLoad => targets O(8) searches [see wikipedia] + shrink(0.5f), // shrink, remove 1/2 of elements. + grow(1.0f) // Grow by 100% + {} + }; + + // + // Get the current configuration + // + const Factors& getFactors() const { return factors; } + // + // Query if this set of factors is valid. + // returns: NULL, If the factors are valid. Otherwise an error string + // This is used to validate a set of factors before setting them. + // + static const char *isValidFactors(const Factors& f); + // + // Change to these factors if valid. This is modestly expensive as it grabs all shard locks + // This will assert if the factors are invalid. + // + void setFactors(const Factors& proposed); + + /* + * Stats you can get at any time. + * + * Reading these is O(numShards) which can be expensive + * + * These stats are computed by summing up across the shards. Each shard is locked and + * then it's contribution is added to the running totals. Because of the time skew for + * the reading, there maybe slight inaccuracies in the presence of multi-thread operations. + */ + struct Stats { + size_t size; // Total number of unique strings in table + size_t bytes; // Total bytes of strings + size_t handles; // Number of outstanding handles + size_t maxTableSize; // Largest Shard table + size_t minTableSize; // Smallest Shard table + size_t totalTable; // sum of table sizes + size_t stuckKeys; // Number of strings that have hit the refcount max. + // + // These counters are reset after being read. + // + size_t maxSearch; // longest search sequence encountered + size_t rehashes; // Number of rehashes + // + // Include a copy of current settable factors. Makes testing easier + // + Factors factors; + }; + Stats getStats() const; + + // + // Long stats are stats that are VERY expensive to compute and are generally only + // used for debug or unit tests. You can see these in the JSON.DEBUG command provided + // you are coming in from an Admin connection. + // + struct LongStats { + std::map runs; // size of of runs, count of #runs + }; + // + // TopN parameter limits size of result to largest N runs. + // Setting N to a relatively small number will reduce the cost of generating the stats. + // + LongStats getLongStats(size_t topN) const; + + KeyTable(const KeyTable& rhs) = delete; // no copies + void operator=(const KeyTable& rhs) = delete; // no assignment + + std::string validate() const; // Unit testing only + std::string validate_counts(std::unordered_map& counts) const; // Debug command + + size_t getNumShards() const { return numShards; } + + private: + friend class KeyTable_Shard; + size_t shardNumberFromHash(size_t hash); + size_t hashcodeFromHash(size_t hash); + KeyTable_Shard* shards; + void *(*malloc)(size_t); // Use this to allocate memory + void (*free)(void *); // Use this to free memory + size_t (*hash)(const char *, size_t); // Hash function for strings + size_t numShards; + std::atomic stuckKeys; // Stuck String count. + Factors factors; +}; + +extern KeyTable *keyTable; // The singleton + +#endif diff --git a/src/json/memory.cc b/src/json/memory.cc new file mode 100644 index 0000000..10343df --- /dev/null +++ b/src/json/memory.cc @@ -0,0 +1,352 @@ +#include +#include + +#include +#include + +#include "json/memory.h" +#include "json/dom.h" + +extern "C" { +#define VALKEYMODULE_EXPERIMENTAL_API +#include <./include/valkeymodule.h> +} + +#define STATIC /* decorator for static functions, remove so that backtrace symbols include these */ + +void *(*memory_alloc)(size_t size); +void (*memory_free)(void *ptr); +void *(*memory_realloc)(void *orig_ptr, size_t new_size); +size_t (*memory_allocsize)(void *ptr); + +bool memoryTrapsEnabled = true; + +static std::atomic totalMemoryUsage; + +size_t memory_usage() { + return totalMemoryUsage; +} + +/* + * When Traps are disabled, The following code is used + */ + +STATIC void *memory_alloc_without_traps(size_t size) { + void *ptr = ValkeyModule_Alloc(size); + totalMemoryUsage += ValkeyModule_MallocSize(ptr); + return ptr; +} + +STATIC void memory_free_without_traps(void *ptr) { + if (!ptr) return; + size_t sz = ValkeyModule_MallocSize(ptr); + ValkeyModule_Assert(sz <= totalMemoryUsage); + totalMemoryUsage -= sz; + ValkeyModule_Free(ptr); +} + +STATIC void *memory_realloc_without_traps(void *ptr, size_t new_size) { + if (ptr) { + size_t old_size = ValkeyModule_MallocSize(ptr); + ValkeyModule_Assert(old_size <= totalMemoryUsage); + totalMemoryUsage -= old_size; + } + ptr = ValkeyModule_Realloc(ptr, new_size); + totalMemoryUsage += ValkeyModule_MallocSize(ptr); + return ptr; +} + +#define memory_allocsize_without_traps ValkeyModule_MallocSize + +// +// Implementation of traps +// + +// +// This word of data preceeds the memory allocation as seen by the client. +// The presence of the length is redundant with calling the low-level allocators memory-size function, +// but that function can be fairly expensive, so by duplicating here we optimize the run-time cost. +// +struct trap_prefix { + mutable uint64_t length:40; + mutable uint64_t valid_prefix:24; + enum { VALID = 0xdeadbe, INVALID = 0xf00dad}; + static trap_prefix *from_ptr( void *p) { return reinterpret_cast< trap_prefix *>(p) - 1; } + static const trap_prefix *from_ptr(const void *p) { return reinterpret_cast(p) - 1; } +}; + +// +// Another word of data is added to end of each allocation. It's set to a known data pattern. +// +struct trap_suffix { + mutable uint64_t valid_suffix; + enum { VALID = 0xdeadfeedbeeff00dull, INVALID = ~VALID }; + static trap_suffix *from_prefix(trap_prefix *p) { + return reinterpret_cast(p + 1 + (p->length >> 3)); + } + static const trap_suffix *from_prefix(const trap_prefix *p) { + return reinterpret_cast(p + 1 + (p->length >> 3)); + } +}; + +bool memory_validate_ptr(const void *ptr, bool crashOnError) { + if (!ptr) return true; // Null pointers are valid. + auto prefix = trap_prefix::from_ptr(ptr); + if (prefix->valid_prefix != trap_prefix::VALID) { + if (crashOnError) { + ValkeyModule_Log(nullptr, "error", "Validation Failure memory Corrupted at:%p", ptr); + ValkeyModule_Assert(nullptr == "Validate Prefix Corrupted"); + } else { + return false; + } + } + auto suffix = trap_suffix::from_prefix(prefix); + if (suffix->valid_suffix != trap_suffix::VALID) { + if (!crashOnError) return false; + // Dump the first N bytes. Hopefully this might give us a clue what's going wrong.... + size_t malloc_size = ValkeyModule_MallocSize(const_cast(reinterpret_cast(prefix))); + ValkeyModule_Assert(malloc_size >= (sizeof(trap_prefix) + sizeof(trap_suffix))); + size_t available_size = malloc_size - (sizeof(trap_prefix) + sizeof(trap_suffix)); + size_t dump_size = available_size > 256 ? 256 : available_size; + ValkeyModule_Log(nullptr, "error", "Validation Failure memory overrun @%p size:%zu", ptr, available_size); + auto data = static_cast(ptr); + while (dump_size > (4 * sizeof(void *))) { + ValkeyModule_Log(nullptr, "error", "Memory[%p]: %p %p %p %p", + static_cast(data), data[0], data[1], data[2], data[3]); + data += 4; + dump_size -= 4 * sizeof(void *); + } + while (dump_size) { + ValkeyModule_Log(nullptr, "error", "Memory[%p]: %p", + static_cast(data), data[0]); + data++; + dump_size -= sizeof(void *); + } + ValkeyModule_Assert(nullptr == "Validate Suffix Corrupted"); + } + return true; +} + +STATIC void *memory_alloc_with_traps(size_t size) { + size_t requested_bytes = ~7 & (size + 7); // Round up + size_t alloc_bytes = requested_bytes + sizeof(trap_prefix) + sizeof(trap_suffix); + auto prefix = reinterpret_cast(ValkeyModule_Alloc(alloc_bytes)); + totalMemoryUsage += ValkeyModule_MallocSize(prefix); + prefix->valid_prefix = trap_prefix::VALID; + prefix->length = requested_bytes; + auto suffix = trap_suffix::from_prefix(prefix); + suffix->valid_suffix = trap_suffix::VALID; + return reinterpret_cast(prefix + 1); +} + +STATIC void memory_free_with_traps(void *ptr) { + if (!ptr) return; + memory_validate_ptr(ptr); + auto prefix = trap_prefix::from_ptr(ptr); + prefix->valid_prefix = 0; + size_t sz = ValkeyModule_MallocSize(prefix); + ValkeyModule_Assert(sz <= totalMemoryUsage); + totalMemoryUsage -= sz; + ValkeyModule_Free(prefix); +} + +STATIC size_t memory_allocsize_with_traps(void *ptr) { + if (!ptr) return 0; + memory_validate_ptr(ptr); + auto prefix = trap_prefix::from_ptr(ptr); + return prefix->length; +} + +// +// Do a realloc, but this is rare, so we do it suboptimally, i.e., with a copy +// +STATIC void *memory_realloc_with_traps(void *orig_ptr, size_t new_size) { + if (!orig_ptr) return memory_alloc_with_traps(new_size); + memory_validate_ptr(orig_ptr); + auto new_ptr = memory_alloc_with_traps(new_size); + memcpy(new_ptr, orig_ptr, memory_allocsize_with_traps(orig_ptr)); + memory_free_with_traps(orig_ptr); + return new_ptr; +} + +// +// Enable/Disable traps +// +bool memory_traps_control(bool enable) { + if (totalMemoryUsage != 0) { + ValkeyModule_Log(nullptr, "warning", + "Attempt to enable/disable memory traps ignored, %zu outstanding memory.", totalMemoryUsage.load()); + return false; + } + if (enable) { + memory_alloc = memory_alloc_with_traps; + memory_free = memory_free_with_traps; + memory_realloc = memory_realloc_with_traps; + memory_allocsize = memory_allocsize_with_traps; + } else { + memory_alloc = memory_alloc_without_traps; + memory_free = memory_free_without_traps; + memory_realloc = memory_realloc_without_traps; + memory_allocsize = memory_allocsize_without_traps; + } + memoryTrapsEnabled = enable; + return true; +} + +void memory_corrupt_memory(const void *ptr, memTrapsCorruption_t corruption) { + memory_validate_ptr(ptr); + auto prefix = trap_prefix::from_ptr(ptr); + auto suffix = trap_suffix::from_prefix(prefix); + switch (corruption) { + case CORRUPT_PREFIX: + prefix->valid_prefix = trap_prefix::INVALID; + break; + case CORRUPT_LENGTH: + prefix->length--; + break; + case CORRUPT_SUFFIX: + suffix->valid_suffix = trap_suffix::INVALID; + break; + default: + ValkeyModule_Assert(0); + break; + } +} + +void memory_uncorrupt_memory(const void *ptr, memTrapsCorruption_t corruption) { + auto prefix = trap_prefix::from_ptr(ptr); + auto suffix = trap_suffix::from_prefix(prefix); + switch (corruption) { + case CORRUPT_PREFIX: + ValkeyModule_Assert(prefix->valid_prefix == trap_prefix::INVALID); + prefix->valid_prefix = trap_prefix::VALID; + break; + case CORRUPT_LENGTH: + prefix->length++; + break; + case CORRUPT_SUFFIX: + ValkeyModule_Assert(suffix->valid_suffix == trap_suffix::INVALID); + suffix->valid_suffix = trap_suffix::VALID; + break; + default: + ValkeyModule_Assert(0); + break; + } + memory_validate_ptr(ptr); +} + +// +// Helper functions for JSON validation +// +// true => Valid. +// false => NOT VALID +// +bool ValidateJValue(JValue &v) { + auto p = v.trap_GetMallocPointer(false); + if (p && !memory_validate_ptr(p, false)) return false; + if (v.IsObject()) { + for (auto m = v.MemberBegin(); m != v.MemberEnd(); ++m) { + if (!ValidateJValue(m->value)) return false; + } + } else if (v.IsArray()) { + for (size_t i = 0; i < v.Size(); ++i) { + if (!ValidateJValue(v[i])) return false; + } + } + return true; +} + +// +// Dump a JValue with Redaction and memory Validation. +// +// Typical use case: +// +// std::ostringstream os; +// DumpRedactedJValue(os, ); +// +void DumpRedactedJValue(std::ostream& os, const JValue &v, size_t level, int index) { + for (size_t i = 0; i < (3 * level); ++i) os << ' '; // Indent + os << "@" << reinterpret_cast(&v) << " "; + if (index != -1) os << '[' << index << ']' << ' '; + if (v.IsDouble()) { + os << "double string of length " << v.GetDoubleStringLength(); + if (!IS_VALID_MEMORY(v.trap_GetMallocPointer(false))) { + os << " <*INVALID*>\n"; + } else if (v.trap_GetMallocPointer(false)) { + os << " @" << v.trap_GetMallocPointer(false) << "\n"; + } else { + os << "\n"; + } + } else if (v.IsString()) { + os << "String of length " << v.GetStringLength(); + if (!IS_VALID_MEMORY(v.trap_GetMallocPointer(false))) { + os << " <*INVALID*>\n"; + } else if (v.trap_GetMallocPointer(false)) { + os << " @" << v.trap_GetMallocPointer(false) << "\n"; + } else { + os << "\n"; + } + } else if (v.IsObject()) { + os << " Object with " << v.MemberCount() << " Members"; + if (!IS_VALID_MEMORY(v.trap_GetMallocPointer(false))) { + os << " *INVALID*\n"; + } else { + os << " @" << v.trap_GetMallocPointer(false) << '\n'; + index = 0; + for (auto m = v.MemberBegin(); m != v.MemberEnd(); ++m) { + DumpRedactedJValue(os, m->value, level+1, index); + index++; + } + } + } else if (v.IsArray()) { + os << "Array with " << v.Size() << " Members"; + if (!IS_VALID_MEMORY(v.trap_GetMallocPointer(false))) { + os << " *INVALID*\n"; + } else { + os << " @" << v.trap_GetMallocPointer(false) << "\n"; + for (size_t index = 0; index < v.Size(); ++index) { + DumpRedactedJValue(os, v[index], level+1, int(index)); + } + } + } else { + os << "\n"; + } +} + +// +// This class creates an ostream to the Valkey Log. Each line of output is a single call to the ValkeyLog function +// +class ValkeyLogStreamBuf : public std::streambuf { + std::string line; + ValkeyModuleCtx *ctx; + const char *level; + + public: + ValkeyLogStreamBuf(ValkeyModuleCtx *_ctx, const char *_level) : ctx(_ctx), level(_level) {} + ~ValkeyLogStreamBuf() { + if (!line.empty()) { + ValkeyModule_Log(ctx, level, "%s", line.c_str()); + } + } + std::streamsize xsputn(const char *p, std::streamsize n) { + for (std::streamsize i = 0; i < n; ++i) { + overflow(p[i]); + } + return n; + } + int overflow(int c) { + if (c == '\n' || c == EOF) { + ValkeyModule_Log(ctx, level, "%s", line.c_str()); + line.resize(0); + } else { + line += c; + } + return c; + } +}; + +void DumpRedactedJValue(const JValue &v, ValkeyModuleCtx *ctx, const char *level) { + ValkeyLogStreamBuf b(ctx, level); + std::ostream buf(&b); + DumpRedactedJValue(buf, v); +} diff --git a/src/json/memory.h b/src/json/memory.h new file mode 100644 index 0000000..46cf270 --- /dev/null +++ b/src/json/memory.h @@ -0,0 +1,144 @@ +/** + */ +#ifndef VALKEYJSONMODULE_MEMORY_H_ +#define VALKEYJSONMODULE_MEMORY_H_ + +#include + +#include +#include +#include +#include +#include +#include + +// +// Trap implementation +// +// Memory traps are a diagnostic tool intended to catch some categories of memory usage errors. +// +// The Trap system is conceptually a shim layer between the client application and the lower level memory allocator. +// Traps operate by adding to each memory allocation a prefix and a suffix. The prefix and suffix contain known +// data patterns and some internal trap metadata. Subsequent memory operations validate the correctness of the +// prefix and suffix. A special interface is provided to allow any client application to voluntarily request +// memory validation -- presumably before utilizing the underlying memory. +// +// This strategy should catch at least three classes of memory corruption: +// +// (1) double free of memory. +// (2) writes off the end of memory (just the prev/next word, not WAAAAY off the end of memory) +// (3) dangling pointer to previously freed memory (this relies on voluntary memory validation) +// +// Traps can be dynamically enabled/disabled, provided that there is no outstanding memory allocation. +// + +// +// All functions in the module (outsize of memory.cc) should use these to allocate memory +// instead of the ValkeyModule_xxxx functions. +// +extern void *(*memory_alloc)(size_t size); +extern void (*memory_free)(void *ptr); +extern void *(*memory_realloc)(void *orig_ptr, size_t new_size); +extern size_t (*memory_allocsize)(void *ptr); + +// +// Total memory usage. +// +// (1) Includes dom_alloc memory usage. dom_alloc tracks JSON data that's associated with a document +// (2) Includes KeyTable usage, i.e., JSON data that's shared across documents +// (3) Includes STL library allocations +// +extern size_t memory_usage(); + +// +// Are traps enabled? +// +inline bool memory_traps_enabled() { + extern bool memoryTrapsEnabled; + return memoryTrapsEnabled; +} + +// +// External Interface to traps logic +// +// Enables/Disable traps. This can fail if there's outstanding allocated memory. +// +// return true => operation was successful. +// return false => operation failed (there's outstanding memory) +// +bool memory_traps_control(bool enable); + +bool memory_validate_ptr(const void *ptr, bool crashOnError = true); +// +// This version validates memory, but crashes on an invalid pointer +// +template +static inline t *MEMORY_VALIDATE(t *ptr, bool validate = true) { + extern bool memoryTrapsEnabled; + if (memoryTrapsEnabled && validate) memory_validate_ptr(ptr, true); + return ptr; +} + +// +// This version validates memory, but doesn't crash +// +template +static inline bool IS_VALID_MEMORY(t *ptr) { + return memory_validate_ptr(ptr, false); +} + +// +// Classes for STL Containers that utilize memory usage and trap logic. +// +namespace jsn +{ +// +// Our custom allocator +// +template class stl_allocator : public std::allocator { + public: + typedef T value_type; + stl_allocator() = default; + stl_allocator(std::allocator&) {} + stl_allocator(std::allocator&&) {} + template constexpr stl_allocator(const stl_allocator&) noexcept {} + + T *allocate(std::size_t n) { return static_cast(memory_alloc(n*sizeof(T))); } + void deallocate(T *p, std::size_t n) { (void)n; memory_free(p); } +}; + +template using vector = std::vector>; + +template> using set = std::set>; + +template, class KeyEqual = std::equal_to> + using unordered_set = std::unordered_set>; + +typedef std::basic_string, stl_allocator> string; +typedef std::basic_stringstream, stl_allocator> stringstream; + +} // namespace jsn + +// custom specialization of std::hash can be injected in namespace std +template<> +struct std::hash +{ + std::size_t operator()(const jsn::string& s) const noexcept { + return std::hash{}(std::string_view(s.c_str(), s.length())); + } +}; + +// +// Everything below this line is private to this module, it's here for usage by unit tests +// + +typedef enum MEMORY_TRAPS_CORRUPTION { + CORRUPT_PREFIX, + CORRUPT_LENGTH, + CORRUPT_SUFFIX +} memTrapsCorruption_t; + +void memory_corrupt_memory(const void *ptr, memTrapsCorruption_t corrupt); +void memory_uncorrupt_memory(const void *ptr, memTrapsCorruption_t corrupt); + +#endif // VALKEYJSONMODULE_MEMORY_H_ diff --git a/src/json/rapidjson_includes.h b/src/json/rapidjson_includes.h new file mode 100644 index 0000000..2e082a0 --- /dev/null +++ b/src/json/rapidjson_includes.h @@ -0,0 +1,23 @@ +#ifndef _RAPIDJSON_INCLUDES_H +#define _RAPIDJSON_INCLUDES_H + +/* + * This file includes all RapidJSON Files (modified or original). Any RAPIDJSON-global #defines, etc. belong here + */ + +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \ + defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +#define RAPIDJSON_SSE42 1 +#endif + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#define RAPIDJSON_NEON 1 +#endif + +#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 1 + +#include "rapidjson/prettywriter.h" +#include "rapidjson/document.h" +#include + +#endif diff --git a/src/json/selector.cc b/src/json/selector.cc new file mode 100644 index 0000000..1b5e4d8 --- /dev/null +++ b/src/json/selector.cc @@ -0,0 +1,2418 @@ +#include "json/selector.h" +#include "json/util.h" +#include "json/json.h" +#include "json/rapidjson_includes.h" +#include +#include +#include +#include +#include + +#ifdef INSTRUMENT_V2PATH +#define TRACE(level, msg) \ +std::cout << level << " " << msg << std::endl; +#else +#define TRACE(level, msg) +#endif + +#define ENABLE_V2_SYNTAX 1 + +static const char DOUBLE_QUOTE = '"'; +static const char SINGLE_QUOTE = '\''; + +typedef rapidjson::GenericPointer RJPointer; + +struct JPointer : RJPointer { + explicit JPointer(const jsn::string &path) : RJPointer(&allocator) { + *static_cast(this) = RJPointer(path.c_str(), path.length(), &allocator); + if (!IsValid()) error = JSONUTIL_INVALID_JSON_PATH; + } + bool HasError() { return error != JSONUTIL_SUCCESS; } + bool PathExists(JValue& doc) { return Get(doc) != nullptr; } + + // Reexport + using RJPointer::Erase; + using RJPointer::Get; + + JsonUtilCode error = JSONUTIL_SUCCESS; +}; + +thread_local int64_t current_depth = 0; // parser's recursion depth + +class RecursionDepthTracker { + public: + RecursionDepthTracker() { + current_depth++; + } + ~RecursionDepthTracker() { + current_depth--; + } + bool isTooDeep() { return current_depth > static_cast(json_get_max_parser_recursion_depth()); } +}; + +#define CHECK_RECURSION_DEPTH() \ + RecursionDepthTracker _rdtracker; \ + if (_rdtracker.isTooDeep()) return JSONUTIL_PARSER_RECURSION_DEPTH_LIMIT_EXCEEDED; + +#define CHECK_RECURSIVE_DESCENT_TOKENS() \ + if (lex.getRecursiveDescentTokens() > json_get_max_recursive_descent_tokens()) \ + return JSONUTIL_RECURSIVE_DESCENT_TOKEN_LIMIT_EXCEEDED; + +#define CHECK_QUERY_STRING_SIZE(path) \ + if (strlen(path) > json_get_max_query_string_size()) return JSONUTIL_QUERY_STRING_SIZE_LIMIT_EXCEEDED; + +/** + * EBNF Grammar of JSONPath: + * SupportedPath ::= ["$" | "."] RelativePath + * RelativePath ::= empty | RecursivePath | DotPath | BracketPath | QualifiedPath + * RecursivePath ::= ".." SupportedPath + * DotPath ::= "." QualifiedPath + * QualifiedPath ::= QualifiedPathElement RelativePath + * QualifiedPathElement ::= Key | BracketPathElement + * Key ::= "*" [ [ "." ] WildcardFilter ] | UnquotedMemberName + * WildcardFilter ::= "[" "?" "(" FilterExpr ")" "]" + * UnquotedMemberName ::= char { char } + * BracketPath ::= BracketPathElement [ RelativePath ] + * BracketPathElement ::= "[" {SPACE} ( WildcardInBrackets | ((NameInBrackets | IndexExpr) ) {SPACE} "]") + * WildcardInBrackets ::= "*" {SPACE} "]" [ "[" {SPACE} "?" "(" FilterExpr ")" {SPACE} "]" ] + * NameInBrackets ::= QuotedMemberName [ ({SPACE} "," {SPACE} QuotedMemberName)+ ] + * QuotedMemberName ::= "\"" {char} "\"" | "'" {char} "'" + * IndexExpr ::= Filter | SliceStartsWithColon | SliceOrUnionOrIndex + * SliceStartsWithColon ::= {SPACE} ":" {SPACE} [ ":" {SPACE} [Step] | EndAndStep ] ] + * EndAndStep ::= End [{SPACE} ":" {SPACE} [Step]] ] + * SliceOrUnionOrIndex ::= SliceStartsWithInteger | Index | UnionOfIndexes + * SliceStartsWithInteger ::= Start {SPACE} ":" {SPACE} [ ":" {SPACE} [Step] | EndAndStep + * Index ::= Integer + * Integer ::= ["+" | "-"] digit {digit} + * Start ::= Integer + * End ::= Integer + * Step ::= Integer + * UnionOfIndexes ::= Integer ({SPACE} "," {SPACE} Integer)+ + * Filter ::= "?" "(" FilterExpr ")" + * FilterExpr ::= {SPACE} Term { {SPACE} "||" {SPACE} Term {SPACE} } + * Term ::= Factor { {SPACE} "&&" {SPACE} Factor } + * Factor ::= ( "@" ( MemberName | ( [ MemberName ] ComparisonOp ComparisonValue) ) ) | + * ( ComparisonValue ComparisonOp "@" ( MemberName | ( [ MemberName ]) ) ) | + * ( {SPACE} "(" FilterExpr ")" {SPACE} ) + * MemberName ::= ("." (UnquotedMemberName | BracketedMemberName)) | BracketedMemberName + * BracketedMemberName ::= "[" {SPACE} QuotedMemberName {SPACE} "]" + * ComparisonOp ::= {SPACE} "<" | "<="] | ">" | ">=" | "==" | "!=" {SPACE} + * ComparisonValue ::= "null" | Bool | Number | QuotedString | PartialPath + * Bool ::= "true" | "false" + * Number ::= Integer | MemberNameInFilter | ScientificNumber + * QuotedString ::= "\"" {char} "\"" + * PartialPath ::= "$" RelativePath + * SPACE ::= ' ' + */ + +void Lexer::init(const char *path) { + p = path; + this->path = path; + next.type = Token::UNKNOWN; +} + +Token::TokenType Lexer::peekToken() const { + switch (*p) { + case '\0': return Token::END; + case '$': return Token::DOLLAR; + case '.': { + if (*(p+1) == '.') + return Token::DOTDOT; + else + return Token::DOT; + } + case '*': return Token::WILDCARD; + case ':': return Token::COLON; + case ',': return Token::COMMA; + case '?': return Token::QUESTION_MARK; + case '@': return Token::AT; + case '[': return Token::LBRACKET; + case ']': return Token::RBRACKET; + case '(': return Token::LPAREN; + case ')': return Token::RPAREN; + case '\'': return Token::SINGLE_QUOTE; + case '"': return Token::DOUBLE_QUOTE; + case '+': return Token::PLUS; + case '-': return Token::MINUS; + case '/': return Token::DIV; + case '%': return Token::PCT; + case ' ': return Token::SPACE; + case '&': { + if (*(p+1) == '&') + return Token::AND; + else + return Token::SPECIAL_CHAR; + } + case '|': { + if (*(p+1) == '|') + return Token::OR; + else + return Token::SPECIAL_CHAR; + } + case '=': { + if (*(p+1) == '=') + return Token::EQ; + else + return Token::ASSIGN; + } + case '!': { + if (*(p+1) == '=') + return Token::NE; + else + return Token::NOT; + } + case '>': { + if (*(p+1) == '=') + return Token::GE; + else + return Token::GT; + } + case '<': { + if (*(p+1) == '=') + return Token::LE; + else + return Token::LT; + } + default: { + if (std::isdigit(*p)) { + return Token::DIGIT; + } else if (std::isalpha(*p)) { + return Token::ALPHA; + } else { + TRACE("DEBUG", "peekToken special char: " << *p) + return Token::SPECIAL_CHAR; + } + } + } +} + +/** + * Scan the next token. + * @return next token + */ +Token Lexer::nextToken(const bool skipSpace) { + next.type = peekToken(); + switch (next.type) { + case Token::END: return next; + case Token::DOTDOT: + { + rdTokens++; + next.strVal = std::string_view(p, 2); + p++; + p++; + break; + } + case Token::NE: + case Token::GE: + case Token::LE: + case Token::EQ: + case Token::AND: + case Token::OR: + { + next.strVal = std::string_view(p, 2); + p++; + p++; + break; + } + case Token::DIGIT: + case Token::ALPHA: + case Token::SPECIAL_CHAR: + { + next.strVal = std::string_view(p, 1); + p++; + break; + } + case Token::SPACE: + { + if (skipSpace) { + while (*p == ' ') p++; + return nextToken(); + } else { + next.strVal = std::string_view(p, 1); + p++; + break; + } + } + default: + next.strVal = std::string_view(p, 1); + p++; + break; + } + return next; +} + +/** + * If current token matches the given token type, advance to the next token and return true. + * Otherwise, return false. + */ +bool Lexer::matchToken(const Token::TokenType type, const bool skipSpace) { + if (skipSpace && next.type == Token::SPACE) { + while (*p == ' ') p++; + nextToken(); + return matchToken(type); + } + + if (next.type == type) { + nextToken(skipSpace); + return true; + } + return false; +} + +/** + * Scan an integer. An integer is made of the following characters: [0-9]+-. + */ +JsonUtilCode Lexer::scanInteger(int64_t &val) { + val = 0; + if (next.type != Token::DIGIT && next.type != Token::PLUS && next.type != Token::MINUS) + return JSONUTIL_VALUE_NOT_NUMBER; + + if (next.type == Token::DIGIT) { + val = scanUnsignedInteger(); + } else { + int sign = (next.type == Token::PLUS? 1 : -1); + nextToken(); // skip the PLUS/MINUS sign symbol + if (next.type != Token::DIGIT) return JSONUTIL_VALUE_NOT_NUMBER; + val = sign * scanUnsignedInteger(); + } + nextToken(); // advance to the next token + return JSONUTIL_SUCCESS; +} + +int64_t Lexer::scanUnsignedInteger() { + ValkeyModule_Assert(next.type == Token::DIGIT); + int64_t val = *next.strVal.data() - '0'; + while (*p != '\0' && std::isdigit(*p)) { + val = val * 10 + (*p - '0'); + p++; + } + TRACE("DEBUG", "scanUnsignedInteger(): " << val) + return val; +} + +/** + * Scan unquoted object member name, which can contain any symbol except terminator characters. + */ +JsonUtilCode Lexer::scanUnquotedMemberName(StringViewHelper &member_name) { + // Check if the first character is a member name terminator char + static const char *unquotedMemberNameTerminators = ".[]()<>=!'\" |&"; + const char *p_start = next.strVal.data(); + if (strchr(unquotedMemberNameTerminators, *p_start) != nullptr) { + TRACE("ERROR", "scanUnquotedMemberName invalid first char of an expected member name: " << p_start) + return JSONUTIL_INVALID_MEMBER_NAME; + } + size_t len = 1; + + // Scan the remaining path for the first occurrence of any terminator char + size_t length = strcspn(p, unquotedMemberNameTerminators); + len += length; + p += length; + + member_name.setExternalView(std::string_view(p_start, len)); + TRACE("DEBUG", "scanUnquotedMemberName token type: " << next.type << ", token val: " + << next.strVal << ", name: " << member_name.getView()) + nextToken(); // advance to the next token + return JSONUTIL_SUCCESS; +} + +/** + * Scan number in filter expression. A number is made of the following characters: [0-9]+-.Ee. + */ +JsonUtilCode Lexer::scanNumberInFilterExpr(StringViewHelper &number_sv) { + // Check if the first character is a valid number character + static const char *validNumberChars = "+-0123456789.Ee"; + const char *p_start = next.strVal.data(); + if (strchr(validNumberChars, *p_start) == nullptr) { + TRACE("ERROR", "scanNumberInFilterExpr invalid first char of an expected number: " << p_start) + return JSONUTIL_INVALID_NUMBER; + } + size_t len = 1; + + // Scan the remaining path for the prefix that consists entirely of valid number characters + size_t length = strspn(p, validNumberChars); + len += length; + p += length; + + number_sv.setExternalView(std::string_view(p_start, len)); + TRACE("DEBUG", "scanNumberInFilterExpr number: " << number_sv.getView()) + nextToken(); // advance to the next token + return JSONUTIL_SUCCESS; +} + +/** + * Scan an identifier that is an alphanumeric string. + */ +JsonUtilCode Lexer::scanIdentifier(StringViewHelper &sv) { + // Check if the first character is alphanumeric + const char *p_start = next.strVal.data(); + if (!std::isalnum(*p_start)) return JSONUTIL_INVALID_IDENTIFIER; + size_t len = 1; + + // Scan the remaining path for the alphanumeric characters + while (*p != '\0' && std::isalnum(*p)) { + p++; + len++; + } + sv.setExternalView(std::string_view(p_start, len)); + TRACE("DEBUG", "scanIdentifier identifier: " << sv.getView()) + nextToken(); // advance to the next token + return JSONUTIL_SUCCESS; +} + +/** + * Skip whitespaces including the current token. + */ +void Lexer::skipSpaces() { + if (next.type == Token::SPACE) { + nextToken(true); + } else { + while (*p == ' ') p++; + } +} + +/** + * Scan a path value to be fed into a selector + */ +JsonUtilCode Lexer::scanPathValue(StringViewHelper &output) { + static const char *terminators = "]()<>=!'\" |&"; + static const char *numerics = "-+0123456789"; + static const char *quotes = "\"'"; + char current_quote = '"'; + bool in_brackets = false; + bool in_quotes = false; + bool scanning = true; + + const char *p_start = next.strVal.data(); // leading $ + ValkeyModule_Assert(*p_start == '$'); + size_t len = 1; + + // We only check for terminators when we are outside of brackets (and unquoted) + // When we are inside of brackets, we check for numerics (digits or -+) or quoted values + // We track which type of quote we are using with current_quote + while (scanning && *p != '\0') { + if (!in_brackets) { // can't be in quotes without being in brackets first + if (*p == '[') { + in_brackets = true; + p++; + len++; + } else if (strchr(terminators, *p) != nullptr) { + scanning = false; + } else { + p++; + len++; + } + } else { + if (!in_quotes) { + if (strchr(quotes, *p) != nullptr) { + in_quotes = true; + current_quote = *p; + p++; + len++; + } else if (strchr(numerics, *p) != nullptr) { + p++; + len++; + } else if (*p == ']') { + p++; + len++; + in_brackets = false; + } else { + return JSONUTIL_INVALID_JSON_PATH; + } + } else { + if (*p == '\\' && *(p+1) == current_quote) { + p++; + len++; + } else if (*p == current_quote) { + in_quotes = false; + } + p++; + len++; + } + } + } + + output.setExternalView(std::string_view(p_start, len)); + + nextToken(); // advance to the next token + return JSONUTIL_SUCCESS; +} + +/** + * Scan double quoted string that may contain escaped characters. +*/ +JsonUtilCode Lexer::scanDoubleQuotedString(JParser& parser) { + const char *p_start = next.strVal.data(); + size_t len = 1; + ValkeyModule_Assert(*p_start == DOUBLE_QUOTE); + TRACE("DEBUG", "scanDoubleQuotedString *p_start: " << *p_start << ", p: " << p) + + const char *prev = nullptr; + while (*p != '\0') { + if (*p == DOUBLE_QUOTE && (prev == nullptr || *prev != '\\')) { + // reached the end quote + p++; + len++; + break; + } + prev = p; + p++; + len++; + } + std::string_view name = std::string_view(p_start, len); + + // unescape the content using JParser + if (parser.Parse(name).HasParseError()) { + TRACE("ERROR", "scanDoubleQuotedString failed to parse " << name) + return parser.GetParseErrorCode(); + } + TRACE("DEBUG", "scanDoubleQuotedString before unescape: " << name << ", after unescape: " + << parser.GetJValue().GetStringView()) + + nextToken(); // advance to the next token + return JSONUTIL_SUCCESS; +} + +/** + * Scan double quoted string that may contain escaped characters. +*/ +JsonUtilCode Lexer::scanDoubleQuotedString(jsn::stringstream &ss) { + JParser parser; + JsonUtilCode rc = scanDoubleQuotedString(parser); + if (rc != JSONUTIL_SUCCESS) return rc; + ss << parser.GetJValue().GetStringView(); + return JSONUTIL_SUCCESS; +} + +JsonUtilCode Lexer::scanSingleQuotedStringAndConvertToDoubleQuotedString(jsn::stringstream &ss) { + const char *p_start = p; + size_t len = 0; + + const char *prev = nullptr; + while (*p != '\0') { + if (*p == SINGLE_QUOTE && (prev == nullptr || *prev != '\\')) { + // reached the end quote + p++; + break; + } + prev = p; + p++; + len++; + } + // the string view does not include begin and end single quote + std::string_view sv = std::string_view(p_start, len); + ss << "\""; + for (jsn::string::size_type i = 0; i < sv.length(); ++i) { + switch (sv[i]) { + case '"': { + ss << "\\\""; + break; + } + case '\\': { // unescape single quotes + // Since the underlying string must end with a null terminator, we can safely access + // the next character at index i+1. + if (sv[i+1] != '\'') { + ss << sv[i]; + } + break; + } + default: + ss << sv[i]; + break; + } + } + ss << "\""; + + nextToken(); // advance to the next token + return JSONUTIL_SUCCESS; +} + +/** + * Scan single quoted string that may contain escaped characters. + */ +JsonUtilCode Lexer::scanSingleQuotedString(jsn::stringstream &ss) { + const char *p_start = p; + size_t len = 0; + + bool escaped = false; + const char *prev = nullptr; + while (*p != '\0') { + if (*p == '\\') escaped = true; + if (*p == SINGLE_QUOTE && (prev == nullptr || *prev != '\\')) { + // reached the end quote + p++; + break; + } + prev = p; + p++; + len++; + } + // the string view does not include begin and end single quote + std::string_view name = std::string_view(p_start, len); + + if (escaped) { + unescape(name, ss); + TRACE("DEBUG", "scanSingleQuotedString before unescape: " << name << ", after unescape: " + << ss.str()) + } else { + ss << name; + TRACE("DEBUG", "scanSingleQuotedString name: " << ss.str()) + } + + nextToken(); // advance to the next token + return JSONUTIL_SUCCESS; +} + +/** + * A helper function to unescape escaped control characters. It is only used for processing single + * quoted strings. The string view handed down does not contain begin and end single quote. + * + * For double quoted strings, JParser::parse is used to read escaped characters. See scanDoubleQuotedString. + * + * @param input string view excluding begin and end quote + * @param ss output string stream + */ +void Lexer::unescape(const std::string_view &input, jsn::stringstream &ss) { + static const char *ctrlChar_2ndPart = "\\tbfnr'"; + static const char *ctrlChars = "\\\t\b\f\n\r\'\0"; // internal representation of control characters + for (jsn::string::size_type i = 0; i < input.length(); ++i) { + switch (input[i]) { + case '\\': { + if (i == input.length() - 1) { + // reached the end of the input + ss << input[i]; + } else { + // check if the next char is an escaped control character + const char *ptr = strchr(ctrlChar_2ndPart, input[i+1]); + if (ptr != nullptr) { + i++; // skip the backslash, which is used to escape the next character + // output the internal representation of the control character + ss << ctrlChars[ptr - ctrlChar_2ndPart]; + } else { + // This blackslash does not represent an escaped control character. + ss << input[i]; + } + } + break; + } + default: + ss << input[i]; + break; + } + } + TRACE("DEBUG", "unescape before unescape: " << input << ", after unescape: " << ss.str()) +} + +JsonUtilCode Selector::getValues(JValue &root, const char *path) { + JsonUtilCode rc = init(root, path, READ); + if (rc != JSONUTIL_SUCCESS) return rc; + return eval(); +} + +struct pathCompare { + public: + bool operator() (const jsn::string& path1, const jsn::string& path2) const { + // compare path depth + JPointer ptr1 = JPointer(path1); + JPointer ptr2 = JPointer(path2); + size_t depth1 = ptr1.GetTokenCount(); + size_t depth2 = ptr2.GetTokenCount(); + if (depth1 != depth2) return depth1 > depth2; + + const JPointer::Token* tokenArray1 = ptr1.GetTokens(); + const JPointer::Token* tokenArray2 = ptr2.GetTokens(); + bool areBothLeavesIndex = (tokenArray1[depth1 - 1].index != rapidjson::kPointerInvalidIndex && + tokenArray2[depth2 - 1].index != rapidjson::kPointerInvalidIndex); + if (!areBothLeavesIndex) { + if (ptr1 == ptr2) return true; + return !(ptr1 < ptr2); // operator > is not available + } + + // compare path elements up to the parent of leaf + for (size_t i=0; i < depth1 - 1; i++) { + if (tokenArray1[i].index != tokenArray2[i].index) + return tokenArray1[i].index > tokenArray2[i].index; + + if (tokenArray1[i].length != tokenArray2[i].length) + return tokenArray1[i].length > tokenArray2[i].length; + + if (int cmp = std::memcmp(tokenArray1[i].name, tokenArray2[i].name, sizeof(char) * tokenArray1[i].length)) + return cmp > 0; + } + + // compare leaf index + return tokenArray1[depth1 - 1].index > tokenArray2[depth2 - 1].index; + } +}; + +JsonUtilCode Selector::deleteValues(JValue &root, const char *path, size_t &numValsDeleted) { + numValsDeleted = 0; + JsonUtilCode rc = init(root, path, DELETE); + if (rc != JSONUTIL_SUCCESS) return rc; + rc = eval(); + if (rc != JSONUTIL_SUCCESS) return rc; + if (!isV2Path && !hasValues()) return JSONUTIL_JSON_PATH_NOT_EXIST; + if (resultSet.empty()) return getError(); + + TRACE("DEBUG", "deleteValues total values to delete: " << resultSet.size()); + if (json_is_instrument_enabled_delete()) { + ValkeyModule_Log(nullptr, "warning", "deleting %zu values of doc %p at path %s", + resultSet.size(), static_cast(&root), path); + if (!ValidateJValue(root)) { + ValkeyModule_Log(nullptr, "warning", "ERROR: before delete, doc %p is NOT valid!", + static_cast(&root)); + } + if (json_is_instrument_enabled_dump_doc_before()) { + ValkeyModule_Log(nullptr, "warning", "Dump document structure before delete:"); + DumpRedactedJValue(root, nullptr, "warning"); + } + } + + if (resultSet.size() == 1) { + if (json_is_instrument_enabled_delete()) { + ValkeyModule_Log(nullptr, "warning", "deleting value %p of doc %p at path %s", + static_cast(resultSet[0].first), static_cast(&root), path); + if (json_is_instrument_enabled_dump_value_before_delete()) { + DumpRedactedJValue(*resultSet[0].first, nullptr, "warning"); + } + } + if (deleteValue(resultSet[0].second)) numValsDeleted++; + } else { + jsn::set path_set; + for (auto &vInfo : resultSet) { + if (json_is_instrument_enabled_delete()) { + ValkeyModule_Log(nullptr, "warning", "preparing to delete value %p of doc %p at path %s", + static_cast(vInfo.first), static_cast(&root), vInfo.second.c_str()); + if (json_is_instrument_enabled_dump_value_before_delete()) { + DumpRedactedJValue(*vInfo.first, nullptr, "warning"); + } + } + path_set.insert(std::move(vInfo.second)); + } + for (auto it = path_set.begin(); it != path_set.end(); it++) { + if (json_is_instrument_enabled_delete()) { + ValkeyModule_Log(nullptr, "warning", "deleting value of doc %p at path %s", + static_cast(&root), (*it).c_str()); + } + if (deleteValue(*it)) numValsDeleted++; + } + } + + TRACE("DEBUG", "deleteValues deleted " << numValsDeleted << " values"); + if (json_is_instrument_enabled_delete()) { + if (!ValidateJValue(root)) { + ValkeyModule_Log(nullptr, "warning", "ERROR: after delete, doc %p is NOT valid!!", + static_cast(&root)); + } + if (json_is_instrument_enabled_dump_doc_after()) { + ValkeyModule_Log(nullptr, "warning", "Dump document structure after delete:"); + DumpRedactedJValue(root, nullptr, "warning"); + } + } + +#ifdef INSTRUMENT_V2PATH + dom_dump_value(root); +#endif + return JSONUTIL_SUCCESS; +} + +bool Selector::deleteValue(const jsn::string &path) { + TRACE("DEBUG", "deleteValue deleting value at " << path) + JPointer ptr = JPointer(path); + if (ptr.HasError() || !ptr.PathExists(*root)) return false; + return ptr.Erase(*root); +} + +/** + * Single stage insert/update, which commits the operation. + * + * The set op could result in insert or update or both. + * Selector::resultSet - values to be updated + * Selector::insertPaths - set of insert paths + * + * Note that we don't expect Selector::resultSet and Selector::insertPaths to be both non-empty. + */ +JsonUtilCode Selector::setValues(JValue &root, const char *path, JValue &new_val) { + JsonUtilCode rc = prepareSetValues(root, path); + if (rc != JSONUTIL_SUCCESS) return rc; + return commit(new_val); +} + +/** + * Prepare for a 2-stage insert/update. The 2-stage write splits a write operation into two calls: prepareSetValues and + * commit, where prepareSetValues does not change the Valkey data. The purpose of having a 2-stage write is to + * be able to discard the write operation if certain conditions are not satisfied. + * + * Use cases: + * 1. JSON.SET with NX/XX option: We need to verify if NX/XX condition is satisfied before committing the operation. + * 2. Document path limit check: We need to check if the max path limit is exceeded before committing the operation. + * 3. Document size limit check: We need to check if the document size limit is exceeded before committing the + * operation. + */ +JsonUtilCode Selector::prepareSetValues(JValue &root, const char *path) { + JsonUtilCode rc = init(root, path, INSERT_OR_UPDATE); + if (rc != JSONUTIL_SUCCESS) return rc; + rc = eval(); + if (rc != JSONUTIL_SUCCESS) return rc; + return JSONUTIL_SUCCESS; +} + +/** + * Commit a 2-stage insert/update. + */ +JsonUtilCode Selector::commit(JValue &new_val) { + if (resultSet.empty() && insertPaths.empty()) return getError(); + + // handling update + auto &rs = getUniqueResultSet(); + if (!rs.empty()) { + if (json_is_instrument_enabled_update()) { + ValkeyModule_Log(nullptr, "warning", "updating %zu values of doc %p", rs.size(), static_cast(&root)); + if (!ValidateJValue(*root)) { + ValkeyModule_Log(nullptr, "warning", "ERROR: before update, doc %p is NOT valid!", + static_cast(root)); + } + if (json_is_instrument_enabled_dump_doc_before()) { + ValkeyModule_Log(nullptr, "warning", "Dump document structure before update:"); + DumpRedactedJValue(*root, nullptr, "warning"); + } + } + + if (rs.size() == 1) { + JPointer ptr = JPointer(rs[0].second); + if (ptr.HasError()) return ptr.error; + if (json_is_instrument_enabled_update()) { + ValkeyModule_Log(nullptr, "warning", "updating value %p of doc %p at path %s", + static_cast(rs[0].first), static_cast(root), rs[0].second.c_str()); + } + ptr.Swap(*root, new_val, allocator); + TRACE("DEBUG", "commit updated value at " << rs[0].second); + } else { + for (auto &vInfo : rs) { + // copy the new value so that it can be set at multiple paths + JValue new_val_copy(new_val, allocator); + JPointer ptr = JPointer(vInfo.second); + if (ptr.HasError()) return ptr.error; + // An existing path may not exist due to updates of other values. + // However, JPointer will always insert the value if the path does not exist. + // So, we'll do update only if the path still exists. + if (ptr.PathExists(*root)) { + if (json_is_instrument_enabled_update()) { + ValkeyModule_Log(nullptr, "warning", "updating value %p of doc %p at path %s", + static_cast(vInfo.first), static_cast(&root), + vInfo.second.c_str()); + } + ptr.Swap(*root, new_val_copy, allocator); + TRACE("DEBUG", "commit updated value at " << vInfo.second); + } + } + } + + TRACE("DEBUG", "commit updated values: " << rs.size()); + if (json_is_instrument_enabled_update()) { + if (!ValidateJValue(*root)) { + ValkeyModule_Log(nullptr, "warning", "ERROR: after update, doc %p is NOT valid!", + static_cast(root)); + } + if (json_is_instrument_enabled_dump_doc_after()) { + ValkeyModule_Log(nullptr, "warning", "Dump document structure after update:"); + DumpRedactedJValue(*root, nullptr, "warning"); + } + } + } + + // handling insert + if (!insertPaths.empty()) { + if (json_is_instrument_enabled_insert()) { + ValkeyModule_Log(nullptr, "warning", "inserting %zu values into doc %p", + insertPaths.size(), static_cast(root)); + if (!ValidateJValue(*root)) { + ValkeyModule_Log(nullptr, "warning", "ERROR: before insert, doc %p is NOT valid!", + static_cast(root)); + } + if (json_is_instrument_enabled_dump_doc_before()) { + ValkeyModule_Log(nullptr, "warning", "Dump document structure before insert:"); + DumpRedactedJValue(*root, nullptr, "warning"); + } + } + + if (insertPaths.size() == 1) { + JPointer ptr = JPointer(*insertPaths.begin()); + if (ptr.HasError()) return ptr.error; + if (json_is_instrument_enabled_insert()) { + ValkeyModule_Log(nullptr, "warning", "inserting value into doc %p at path %s", + static_cast(root), (*insertPaths.begin()).c_str()); + } + ptr.Set(*root, new_val, allocator); + TRACE("DEBUG", "commit inserted value at " << *insertPaths.begin()); + } else { + for (auto &path : insertPaths) { + // copy the new value so that it can be set at multiple paths + JValue new_val_copy(new_val, allocator); + JPointer ptr = JPointer(path); + if (ptr.HasError()) return ptr.error; + if (json_is_instrument_enabled_insert()) { + ValkeyModule_Log(nullptr, "warning", "inserting value into doc %p at path %s", + static_cast(root), path.c_str()); + } + ptr.Set(*root, new_val_copy, allocator); + TRACE("DEBUG", "commit inserted value at " << *insertPaths.begin()); + } + } + + TRACE("DEBUG", "commit inserted values: " << insertPaths.size()); + if (json_is_instrument_enabled_insert()) { + if (!ValidateJValue(*root)) { + ValkeyModule_Log(nullptr, "warning", "ERROR: after insert, doc %p is NOT valid!", + static_cast(root)); + } + if (json_is_instrument_enabled_dump_doc_after()) { + ValkeyModule_Log(nullptr, "warning", "Dump document structure after insert:"); + DumpRedactedJValue(*root, nullptr, "warning"); + } + } + } + + return JSONUTIL_SUCCESS; +} + +JsonUtilCode Selector::init(JValue &root, const char *path, const Mode mode) { + CHECK_QUERY_STRING_SIZE(path); + this->mode = mode; + this->root = &root; + node = &root; + nodePath = ""; + lex.init(path); + resultSet.clear(); + insertPaths.clear(); + maxPathDepth = 0; + currPathDepth = 0; + error = JSONUTIL_SUCCESS; + + lex.nextToken(); // initial pull + return JSONUTIL_SUCCESS; +} + +JsonUtilCode Selector::eval() { + TRACE("DEBUG", "eval curr token: " << lex.currToken().type << ", remaining path: " << lex.p + << ", nodePath: " << nodePath) + CHECK_RECURSION_DEPTH(); + JsonUtilCode rc = parseSupportedPath(); + if (rc == JSONUTIL_SUCCESS && node != nullptr) { + // select the value + ValueInfo vInfo(node, nodePath); + resultSet.push_back(std::move(vInfo)); + } + return rc; +} + +/** + * Errors fall into two categories: JSONPath syntax error and non-syntax error. Non-syntax error examples are + * path does not exist, array index out of bounds, index not a number, etc. In multi-path recursive traversals + * (e.g., wildcard, slice, etc.), if a syntax error is detected, the entire selector process should be immediately + * terminated (there is no need to continue exploring other paths). If a non-syntax error is detected, only the + * current path search ends, and we should continue exploring unexplored paths. + */ +bool Selector::isSyntaxError(JsonUtilCode code) const { + return (code == JSONUTIL_INVALID_JSON_PATH || + code == JSONUTIL_INVALID_MEMBER_NAME || + code == JSONUTIL_INVALID_NUMBER || + code == JSONUTIL_INVALID_IDENTIFIER || + code == JSONUTIL_EMPTY_EXPR_TOKEN || + code == JSONUTIL_ARRAY_INDEX_NOT_NUMBER || + code == JSONUTIL_STEP_CANNOT_NOT_BE_ZERO || + code == JSONUTIL_PARENT_ELEMENT_NOT_EXIST || + code == JSONUTIL_PARSER_RECURSION_DEPTH_LIMIT_EXCEEDED || + code == JSONUTIL_RECURSIVE_DESCENT_TOKEN_LIMIT_EXCEEDED || + code == JSONUTIL_QUERY_STRING_SIZE_LIMIT_EXCEEDED); +} + +/** + * SupportedPath ::= ["$" | "."] RelativePath + */ +JsonUtilCode Selector::parseSupportedPath() { + if (node == nullptr || lex.matchToken(Token::END)) return JSONUTIL_SUCCESS; + + if (lex.matchToken(Token::DOLLAR)) { + if (!ENABLE_V2_SYNTAX) return JSONUTIL_INVALID_JSON_PATH; + if (node != root) return JSONUTIL_DOLLAR_CANNOT_APPLY_TO_NON_ROOT; + isV2Path = true; + } else if (lex.matchToken(Token::DOT)) { + } + return parseRelativePath(); +} + +/** + * RelativePath ::= empty | RecursivePath | DotPath | BracketPath | QualifiedPath + */ +JsonUtilCode Selector::parseRelativePath() { + if (node == nullptr || lex.matchToken(Token::END)) return JSONUTIL_SUCCESS; + + switch (lex.currToken().type) { + case Token::END: return JSONUTIL_SUCCESS; + case Token::DOTDOT: return parseRecursivePath(); + case Token::DOT: return parseDotPath(); + case Token::LBRACKET: return parseBracketPath(); + default: return parseQualifiedPath(); + } +} + +/** + * RecursivePath ::= ".." SupportedPath + */ +JsonUtilCode Selector::parseRecursivePath() { + isRecursiveSearch = true; + if (!lex.matchToken(Token::DOTDOT)) ValkeyModule_Assert(false); + CHECK_RECURSIVE_DESCENT_TOKENS(); + + JsonUtilCode rc = recursiveSearch(*node, lex.p); + if (rc != JSONUTIL_SUCCESS) return rc; + dedupe(); + return JSONUTIL_SUCCESS; +} + +/** + * This DFS algorithm literally embodies "recursive descent": + * 1. Run DFS on the subtree rooted from the current node (a.k.a. value). + * 2. When each node is visited, run the selector at the node with the remaining path. + * 3. Selector::resultSet serves as the global result holding all selected values. + */ +JsonUtilCode Selector::recursiveSearch(JValue &v, const char *p) { + TRACE("DEBUG", "recursiveSearch curr token " << lex.currToken().type << ", curr path: "<< lex.p + << ", nodePath: " << nodePath << ", currPathDepth: " << currPathDepth << ", maxPathDepth: " << maxPathDepth) + if (lex.currToken().type == Token::DOTDOT || lex.currToken().type == Token::DOT) { + TRACE("DEBUG", "We have an ambiguous (and therefore invalid) sequence of 3+ dots") + return JSONUTIL_INVALID_DOT_SEQUENCE; + } + if (!v.IsObject() && !v.IsArray()) { + // Null out the current node to signal termination of the current path search. + node = nullptr; + return JSONUTIL_SUCCESS; + } + + // At the current node, run the selector by calling eval(). + State state; + snapshotState(state); + node = &v; // points to the current visited value + JsonUtilCode rc = eval(); // run the selector + restoreState(state); + if (isSyntaxError(rc)) return rc; + + // Descend to each child (i.e., recursive descent) + if (v.IsObject()) { + for (auto &m : v.GetObject()) { + jsn::string path_copy = nodePath; + nodePath.append("/").append(m.name.GetStringView()); + incrPathDepth(); + TRACE("DEBUG", "-> recursiveSearch descend to object member " << m.name.GetStringView() + << ", nodePath: " << nodePath << ", currPathDepth: " << currPathDepth << ", maxPathDepth: " + << maxPathDepth) + rc = recursiveSearch(m.value, p); + decrPathDepth(); + if (isSyntaxError(rc)) return rc; + nodePath = path_copy; + } + } else if (v.IsArray()) { + for (int64_t i=0; i < v.Size(); i++) { + jsn::string path_copy = nodePath; + nodePath.append("/").append(std::to_string(i)); + incrPathDepth(); + TRACE("DEBUG", "-> recursiveSearch descend to array index " << i << ", nodePath: " << nodePath + << ", currPathDepth: " << currPathDepth << ", maxPathDepth: " << maxPathDepth) + rc = recursiveSearch(v.GetArray()[i], p); + decrPathDepth(); + if (isSyntaxError(rc)) return rc; + nodePath = path_copy; + } + } + + // Null out the current node to signal we are done with the search. + node = nullptr; + return JSONUTIL_SUCCESS; +} + +/** + * DotPath ::= "." QualifiedPath + */ +JsonUtilCode Selector::parseDotPath() { + if (!lex.matchToken(Token::DOT)) ValkeyModule_Assert(false); + return parseQualifiedPath(); +} + +/** + * BracketPath ::= BracketPathElement [ RelativePath ] + */ +JsonUtilCode Selector::parseBracketPath() { + JsonUtilCode rc = parseBracketPathElement(); + if (rc != JSONUTIL_SUCCESS) return rc; + if (lex.currToken().type == Token::END) return JSONUTIL_SUCCESS; + return parseRelativePath(); +} + +/** + * BracketPathElement ::= "[" {SPACE} ( WildcardInBrackets | ((NameInBrackets | IndexExpr) ) {SPACE} "]") + * WildcardInBrackets ::= "*" {SPACE} "]" [ "[" {SPACE} "?" "(" FilterExpr ")" {SPACE} "]" ] + * NameInBrackets ::= QuotedMemberName [ ({SPACE} "," {SPACE} QuotedMemberName)+ ] + * QuotedMemberName ::= """ {char} """ | "'" {char} "'" + * IndexExpr ::= Filter | SliceStartsWithColon | SliceOrUnionOrIndex + */ +JsonUtilCode Selector::parseBracketPathElement() { + if (!lex.matchToken(Token::LBRACKET, true)) { + TRACE("ERROR", "parseBracketPathElement token [ is not seen" << ", nodePath: " << nodePath) + return JSONUTIL_INVALID_JSON_PATH; + } + + JsonUtilCode rc; + const Token &token = lex.currToken(); + if (token.type == Token::WILDCARD) { + rc = parseWildcardInBrackets(); + } else { + if (token.type == Token::SINGLE_QUOTE || token.type == Token::DOUBLE_QUOTE) { + rc = parseNameInBrackets(); + } else { + rc = parseIndexExpr(); + } + } + if (rc != JSONUTIL_SUCCESS) { + TRACE("ERROR", "parseBracketPathElement rc: " << rc << ", nodePath: " << nodePath) + return rc; + } + lex.skipSpaces(); + return JSONUTIL_SUCCESS; +} + +/** + * WildcardInBrackets ::= "*" {SPACE} "]" [ "[" {SPACE} "?" "(" FilterExpr ")" {SPACE} "]" ] + */ +JsonUtilCode Selector::parseWildcardInBrackets() { + if (!lex.matchToken(Token::WILDCARD, true)) return JSONUTIL_INVALID_JSON_PATH; + if (!lex.matchToken(Token::RBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + + if (lex.currToken().type == Token::LBRACKET && lex.peekToken() == Token::QUESTION_MARK) { + if (!lex.matchToken(Token::LBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + lex.skipSpaces(); + if (!lex.matchToken(Token::QUESTION_MARK)) return JSONUTIL_INVALID_JSON_PATH; + if (!lex.matchToken(Token::LPAREN)) return JSONUTIL_INVALID_JSON_PATH; + + jsn::vector result; // subset of indexes of the current array + JsonUtilCode rc = parseFilterExpr(result); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (!lex.matchToken(Token::RPAREN, true)) return JSONUTIL_INVALID_JSON_PATH; + if (!lex.matchToken(Token::RBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + return processFilterResult(result); + } else { + return processWildcard(); + } +} + +/** + * NameInBrackets ::= QuotedMemberName { {SPACE} "," {SPACE} QuotedMemberName } + * QuotedMemberName ::= """ {char} """ | "'" {char} "'" + */ +JsonUtilCode Selector::parseNameInBrackets() { + jsn::vector member_names; + jsn::stringstream ss; + JsonUtilCode rc = parseQuotedMemberName(ss); + if (rc != JSONUTIL_SUCCESS) return rc; + member_names.push_back(ss.str()); + TRACE("DEBUG", "parseNameInBrackets added member: " << ss.str()) + + while (lex.matchToken(Token::COMMA, true)) { + lex.skipSpaces(); + ss.str(jsn::string()); + rc = parseQuotedMemberName(ss); + if (rc != JSONUTIL_SUCCESS) return rc; + member_names.push_back(ss.str()); + TRACE("DEBUG", "parseNameInBrackets added member: " << ss.str()) + } + + if (!lex.matchToken(Token::RBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + return processUnionOfMembers(member_names); +} + +JsonUtilCode Selector::processUnionOfMembers(const jsn::vector &member_names) { + if (member_names.size() == 1) { + StringViewHelper member_name; + member_name.setInternalString(member_names[0]); + TRACE("DEBUG", "processUnionOfMembers member: " << member_name.getView()) + return traverseToObjectMember(member_name); + } + + if (!node->IsObject()) { + if (mode != READ) return JSONUTIL_CANNOT_INSERT_MEMBER_INTO_NON_OBJECT_VALUE; + // Null out the current node to signal termination of the current path search. + node = nullptr; + return JSONUTIL_SUCCESS; + } + + for (auto &s : member_names) { + TRACE("DEBUG", "processUnionOfMembers finding member " << s) + JValue::MemberIterator it = node->FindMember(s); + if (it != node->MemberEnd()) { + StringViewHelper member_name; + member_name.setInternalString(s); + JsonUtilCode rc = evalObjectMember(member_name, it->value); + if (isSyntaxError(rc)) return rc; + } + } + + // We are done. Null out the current node to signal end of selection. + node = nullptr; + return JSONUTIL_SUCCESS; +} + +/** + * QuotedMemberName ::= """ {char} """ | "'" {char} "'" + */ +JsonUtilCode Selector::parseQuotedMemberName(jsn::stringstream &ss) { + const Token &token = lex.currToken(); + if (token.type == Token::DOUBLE_QUOTE) { + JsonUtilCode rc = lex.scanDoubleQuotedString(ss); + if (rc != JSONUTIL_SUCCESS) return rc; + } else if (token.type == Token::SINGLE_QUOTE) { + JsonUtilCode rc = lex.scanSingleQuotedString(ss); + if (rc != JSONUTIL_SUCCESS) return rc; + } else { + return JSONUTIL_INVALID_JSON_PATH; + } + TRACE("DEBUG", "parseQuotedMemberName member_name: " << ss.str()) + return JSONUTIL_SUCCESS; +} + +/** + * QualifiedPath ::= QualifiedPathElement RelativePath + */ +JsonUtilCode Selector::parseQualifiedPath() { + TRACE("DEBUG", "parseQualifiedPath curr token: " << lex.currToken().type << ", nodePath: " << nodePath) + JsonUtilCode rc = parseQualifiedPathElement(); + if (rc != JSONUTIL_SUCCESS) return rc; + return parseRelativePath(); +} + +/** + * QualifiedPathElement ::= Key | BracketPathElement + */ +JsonUtilCode Selector::parseQualifiedPathElement() { + if (lex.currToken().type == Token::LBRACKET) + return parseBracketPathElement(); + else + return parseKey(); +} + +/** + * Key ::= "*" [ [ "." ] WildcardFilter ] | UnquotedMemberName + * WildcardFilter ::= "[" "?" "(" FilterExpr ")" "]" + */ +JsonUtilCode Selector::parseKey() { + if (lex.matchToken(Token::WILDCARD)) { + if (lex.currToken().type == Token::DOT) lex.nextToken(); // skip DOT + if (lex.currToken().type == Token::LBRACKET && lex.peekToken() == Token::QUESTION_MARK) { + return parseWildcardFilter(); + } else { + return processWildcard(); + } + } else { + StringViewHelper name; + JsonUtilCode rc = parseUnquotedMemberName(name); + if (rc != JSONUTIL_SUCCESS) return rc; + return traverseToObjectMember(name); + } +} + +/** + * UnquotedMemberName ::= char { char } + */ +JsonUtilCode Selector::parseUnquotedMemberName(StringViewHelper &name) { + JsonUtilCode rc = lex.scanUnquotedMemberName(name); + if (rc != JSONUTIL_SUCCESS) return rc; + TRACE("DEBUG", "parseUnquotedMemberName name: " << name.getView() << ", nodePath: " << nodePath) + return JSONUTIL_SUCCESS; +} + +/** + * WildcardFilter ::= "[" "?" "(" FilterExpr ")" "]" + */ +JsonUtilCode Selector::parseWildcardFilter() { + if (!node->IsArray()) return JSONUTIL_INVALID_JSON_PATH; + + if (!lex.matchToken(Token::LBRACKET)) return JSONUTIL_INVALID_JSON_PATH; + if (!lex.matchToken(Token::QUESTION_MARK)) return JSONUTIL_INVALID_JSON_PATH; + if (!lex.matchToken(Token::LPAREN, true)) return JSONUTIL_INVALID_JSON_PATH; + + jsn::vector result; // subset of indexes of the current array + JsonUtilCode rc = parseFilterExpr(result); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (!lex.matchToken(Token::RPAREN, true)) return JSONUTIL_INVALID_JSON_PATH; + if (!lex.matchToken(Token::RBRACKET)) return JSONUTIL_INVALID_JSON_PATH; + + return processFilterResult(result); +} + +JsonUtilCode Selector::processWildcard() { + if (node->IsObject()) { + return processWildcardKey(); + } else if (node->IsArray()) { + return processWildcardIndex(); + } else { + // v1 path syntax: return json syntax error that will fail the command. + // v2 path syntax: return a non-syntax error that will not fail the command. Only the + // current path search ends, and we should continue exploring other paths. + return isV2Path ? JSONUTIL_INVALID_USE_OF_WILDCARD : JSONUTIL_INVALID_JSON_PATH; + } +} + +JsonUtilCode Selector::processWildcardKey() { + JsonUtilCode rc; + for (auto &m : node->GetObject()) { + TRACE("DEBUG", "processWildcardKey continue parsing object member " + << m.name.GetStringView() << ", curr token: " << lex.currToken().type << ", remaining path: " << lex.p) + State state; + snapshotState(state); + StringViewHelper member_name; + member_name.setInternalView(m.name.GetStringView()); + rc = evalObjectMember(member_name, m.value); + restoreState(state); + if (isSyntaxError(rc)) return rc; + } + // We are done. Null out the current node to signal done of value collection. + node = nullptr; + return JSONUTIL_SUCCESS; +} + +JsonUtilCode Selector::processWildcardIndex() { + JsonUtilCode rc; + for (int64_t i=0; i < node->Size(); i++) { + TRACE("DEBUG", "processWildcardIndex continue parsing array index " << i + << ", curr token: " << lex.currToken().type << ", remaining path: " << lex.p << ", nodePath: " << nodePath) + rc = evalArrayMember(i); + if (isSyntaxError(rc)) return rc; + } + // We are done. Null out the current node to signal done of value collection. + node = nullptr; + return JSONUTIL_SUCCESS; +} + +/** + * Recursively call eval to evaluate the member and continue parsing the remaining path. + * Multiple wildcards/filter expressions are handled through recursion. e.g., + * + * $.a[?(@.price < $.expensive)].[*].[?(@.x > 15] + * + * 1. The first filter expression is evaluated, resulting in a slice of the current array ($.a). + * 2. The search path forks out, one per element of the slice. We'll explore each path. + * 3. For each path, continue parsing the remaining json path. When the wildcard is processed, each + * path forks out n subpaths, where n is the length of the current array. + * 4. For each subpath, the 2nd filter expression is evaluated, which results in a slice of the current array. + * 5. Again, each subpath continues to fork out, one per element of the slice. + */ +JsonUtilCode Selector::evalMember(JValue &m, const char *path_start) { + CHECK_RECURSION_DEPTH(); + incrPathDepth(); + resetPointers(m, path_start); + return eval(); +} + +JsonUtilCode Selector::evalObjectMember(const StringViewHelper &member_name, JValue &val) { + if (!node->IsObject()) { + TRACE("DEBUG", "evalObjectMember Current node is not object. Cannot eval member " + << member_name.getView()) + return JSONUTIL_JSON_ELEMENT_NOT_OBJECT; + } + + State state; + snapshotState(state); + + nodePath.append("/").append(member_name.getView()); + TRACE("DEBUG", "evalObjectMember object member " << member_name.getView() + << ", curr token: " << lex.currToken().type << ", remaining path: " << lex.p << ", nodePath: " << nodePath) + JsonUtilCode rc = evalMember(val, lex.p); + + restoreState(state); + return rc; +} + +JsonUtilCode Selector::evalArrayMember(int64_t idx) { + if (!node->IsArray()) { + TRACE("DEBUG", "evalArrayMember Current node is not array. Cannot eval array index " << idx) + return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + } + if (idx < 0 || idx >= static_cast(node->Size())) return JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES; + + State state; + snapshotState(state); + + nodePath.append("/").append(std::to_string(idx)); + TRACE("DEBUG", "evalArrayMember array index " << idx << ", curr token: " << lex.currToken().type + << ", remaining path: " << lex.p << ", nodePath: " << nodePath) + JsonUtilCode rc = evalMember(node->GetArray()[idx], lex.p); + + restoreState(state); + return rc; +} + +JsonUtilCode Selector::traverseToObjectMember(const StringViewHelper &member_name) { + if (!node->IsObject()) { + // We should not assert node must be an object, because this could just be a user error. + // e.g., path: $.phoneNumbers.city, where phoneNumbers is actually an array not object. An assertion would + // have crashed the server. The correct way to treat such user error is termination the current path search + // with no matching values found. Neither should we indicate a syntax error that will fail the entire search, + // because this is just a termination of one search path. Other path searches should continue. + TRACE("DEBUG", "traverseToObjectMember Current node is not object. Cannot traverse to member " + << member_name.getView() << ", nodePath: " << nodePath) + if (mode != READ) return JSONUTIL_CANNOT_INSERT_MEMBER_INTO_NON_OBJECT_VALUE; + + // Null out the current node to signal termination of the current path search. + node = nullptr; + return JSONUTIL_SUCCESS; + } + + JValue::MemberIterator it = node->FindMember(member_name.getView()); + if (it == node->MemberEnd()) { + TRACE("DEBUG", "traverseToObjectMember Member not found: " + << member_name.getView() << " len: " + << member_name.getView().length() << " mode:" << mode + << " cur node isObj? " << node->IsObject() << ", nodePath: " << nodePath) +#ifdef INSTRUMENT_V2PATH + dom_dump_value(*node); +#endif + + if ((mode == INSERT || mode == INSERT_OR_UPDATE) && !isRecursiveSearch) { + // A new key can be appended to an object if and only if it is the last child in the path + TRACE("DEBUG", "traverseToObjectMember insert mode, peek next token: " << lex.peekToken() + << ", nodePath: " << nodePath); + if (lex.peekToken() == Token::END) { + jsn::string insert_path = nodePath; + insert_path.append("/").append(member_name.getView()); + TRACE("DEBUG", "traverseToObjectMember add insert path: " << insert_path) + insertPaths.insert(std::move(insert_path)); + incrPathDepth(); + } else { + TRACE("DEBUG", "traverseToObjectMember insert mode, cannot insert because current " + "node is not the last child in the path, nodePath: " << nodePath) + setError(JSONUTIL_JSON_PATH_NOT_EXIST); + return JSONUTIL_JSON_PATH_NOT_EXIST; + } + } + + // Null out the current node to signal end of search. + node = nullptr; + return JSONUTIL_SUCCESS; + } + + nodePath.append("/").append(member_name.getView()); + TRACE("DEBUG", "traverseToObjectMember traversed to object member " + << member_name.getView() << ". remaining path: " << lex.p + << ", nodePath: " << nodePath) + node = &it->value; + incrPathDepth(); + return JSONUTIL_SUCCESS; +} + +JsonUtilCode Selector::traverseToArrayIndex(int64_t idx) { + if (!node->IsArray()) { + // We should not assert node must be an array, because this could just be a user error. + // e.g., path: $.address[0], where address is actually an object not array. An assertion would + // have crashed the server. The correct way to treat such user error is termination the current path + // search with no matching values found. Neither should we indicate a syntax error that will fail + // the entire search, because this is just a termination of one search path. Other path searches should + // continue. + + TRACE("DEBUG", "traverseToArrayIndex Current node is not array. Cannot traverse to index " << idx + << ", nodePath: " << nodePath) + // Null out the current node to signal termination of the current path search. + node = nullptr; + return JSONUTIL_SUCCESS; + } + + // handle negative index + if (idx < 0) idx += node->Size(); + + // check index bounds + if (idx >= static_cast(node->Size()) || idx < 0) { + return JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES; + } + + nodePath.append("/").append(std::to_string(idx)); + TRACE("DEBUG", "traverseToArrayIndex traversed to array index " << idx << ", nodePath: " << nodePath) + node = &node->GetArray()[idx]; + incrPathDepth(); + return JSONUTIL_SUCCESS; +} + +/** + * parseIndexExpr() is called from parseBracketPathElement(). + * + * BracketPathElement ::= "[" {SPACE} ( WildcardInBrackets | ((NameInBrackets | IndexExpr) {SPACE} ) "]") + * WildcardInBrackets ::= "*" {SPACE} "]" [ "[" {SPACE} "?" "(" FilterExpr ")" {SPACE} "]" ] + * IndexExpr ::= Filter | SliceStartsWithColon | SliceOrUnionOrIndex + * Filter ::= "?" "(" "@" "." FilterExpr ")" + * SliceStartsWithColon ::= {SPACE} ":" {SPACE} [ ":" {SPACE} [Step] | EndAndStep ] ] + * EndAndStep ::= End [{SPACE} ":" {SPACE} [Step]] ] + * SliceOrUnionOrIndex ::= SliceStartsWithInteger | Index | UnionOfIndexes + * SliceStartsWithInteger ::= Start {SPACE} ":" {SPACE} [ ":" {SPACE} [Step] | EndAndStep + * EndAndStep ::= End [{SPACE} ":" {SPACE} [Step]] ] + * Index ::= Integer + * Integer ::= ["+" | "-"] digit {digit} + * Start ::= Integer + * End ::= Integer + * Step ::= Integer + * UnionOfIndexes ::= Integer ({SPACE} "," {SPACE} Integer)+ + */ +JsonUtilCode Selector::parseIndexExpr() { + switch (lex.currToken().type) { + case Token::END: + return JSONUTIL_EMPTY_EXPR_TOKEN; + case Token::QUESTION_MARK: + return parseFilter(); + case Token::COLON: + return parseSliceStartsWithColon(); + case Token::COMMA: + return JSONUTIL_INVALID_JSON_PATH; // union cannot start with comma + default: + return parseSliceOrUnionOrIndex(); + } +} + +/** + * SliceStartsWithColon ::= {SPACE} ":" [ {SPACE} ":" [Step] | {SPACE} EndAndStep ] ] + * EndAndStep ::= End [{SPACE} ":" {SPACE} [Step]] ] + * End ::= Integer + * Step ::= Integer + * Integer ::= ["+" | "-"] digit {digit} + */ +JsonUtilCode Selector::parseSliceStartsWithColon() { + if (!node->IsArray()) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + + lex.nextToken(true); // skip COLON + switch (lex.currToken().type) { + case Token::RBRACKET: + return processSlice(0, node->Size()); + case Token::COLON: { + lex.nextToken(true); // skip COLON + return parseStep(0, node->Size()); + } + default: + return parseEndAndStep(0); + } +} + +/** + * SliceOrUnionOrIndex ::= SliceStartsWithInteger | UnionOfIndexes | Index + * SliceStartsWithInteger ::= Start {SPACE} ":" {SPACE} [ ":" {SPACE} [Step] | EndAndStep + * EndAndStep ::= End [{SPACE} ":" {SPACE} [Step]] ] + * UnionOfIndexes ::= Integer ({SPACE} "," {SPACE} Integer)+ + * Index ::= Integer + */ +JsonUtilCode Selector::parseSliceOrUnionOrIndex() { + int64_t start; + JsonUtilCode rc = parseIndex(start); + if (rc != JSONUTIL_SUCCESS) return rc; + + lex.skipSpaces(); + switch (lex.currToken().type) { + case Token::COLON: + return parseSliceStartsWithInteger(start); + case Token::COMMA: + return parseUnionOfIndexes(start); + default: + return processSubscript(start); + } +} + +/** + * SliceStartsWithInteger ::= Start {SPACE} ":" {SPACE} [ ":" {SPACE} [Step] | EndAndStep + * EndAndStep ::= End [{SPACE} ":" {SPACE} [Step]] ] + */ +JsonUtilCode Selector::parseSliceStartsWithInteger(const int64_t start) { + if (!node->IsArray()) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + lex.nextToken(true); // skip COLON + + lex.skipSpaces(); + switch (lex.currToken().type) { + case Token::RBRACKET: + return processSlice(start, node->Size()); + case Token::COLON: { + lex.nextToken(); // skip COLON + return parseStep(start, node->Size()); + } + default: + return parseEndAndStep(start); + } +} + +/** + * EndAndStep ::= End [{SPACE} ":" {SPACE} [Step]] ] + */ +JsonUtilCode Selector::parseEndAndStep(const int64_t start) { + int64_t end; + JsonUtilCode rc = parseIndex(end); + if (rc != JSONUTIL_SUCCESS) return rc; + + lex.skipSpaces(); + if (lex.currToken().type == Token::COLON) { + lex.nextToken(); // skip COLON + return parseStep(start, end); + } else { + return processSlice(start, end); + } +} + +JsonUtilCode Selector::parseStep(const int64_t start, const int64_t end) { + lex.skipSpaces(); + if (lex.currToken().type == Token::RBRACKET) { + return processSlice(start, end); + } else { + int64_t step; + JsonUtilCode rc = parseIndex(step); + if (rc != JSONUTIL_SUCCESS) return rc; + return processSlice(start, end, step); + } +} + +JsonUtilCode Selector::parseIndex(int64_t &val) { + JsonUtilCode rc = lex.scanInteger(val); + if (rc == JSONUTIL_VALUE_NOT_NUMBER) rc = JSONUTIL_ARRAY_INDEX_NOT_NUMBER; + return rc; +} + +JsonUtilCode Selector::processSubscript(const int64_t idx) { + if (!lex.matchToken(Token::RBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + if (!node->IsArray()) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + return traverseToArrayIndex(idx); +} + +JsonUtilCode Selector::processSlice(int64_t start, int64_t end, const int64_t step) { + if (!node->IsArray()) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + if (!lex.matchToken(Token::RBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + TRACE("DEBUG", "processSlice start: " << start << " end: " << end << " step: " + << step << ", p: " << lex.p << ", nodePath: " << nodePath) + // handle negative index + if (start < 0) start += node->Size(); + if (end < 0) end += node->Size(); + // Verify step cannot be 0. + if (step == 0) { + return JSONUTIL_STEP_CANNOT_NOT_BE_ZERO; + } + + // if the index is out of bounds, round it to the respective bound. + if (start < 0) + start = 0; + else if (start > node->Size()) + start = node->Size(); + if (end < 0) + end = 0; + else if (end > node->Size()) + end = node->Size(); + + JsonUtilCode rc = JSONUTIL_SUCCESS; + if (step > 0) { + for (int i = start; i < end; i += step) { + rc = evalArrayMember(i); + if (isSyntaxError(rc)) return rc; + } + } else { + for (int i = start; i > end; i += step) { + rc = evalArrayMember(i); + if (isSyntaxError(rc)) return rc; + } + } + + // We are done. Null out the current node to signal done of value collection. + node = nullptr; + return JSONUTIL_SUCCESS; +} + +/** + * Filter ::= "?" "(" FilterExpr ")" + * FilterExpr ::= {SPACE} Term { {SPACE} "||" {SPACE} Term {SPACE} } + * Term ::= Factor { {SPACE} "&&" {SPACE} Factor } + * Factor ::= ( "@" ( MemberName | ( [ MemberName ] ComparisonOp ComparisonValue) ) ) | + * ( ComparisonValue ComparisonOp "@" ( MemberName | ( [ MemberName ]) ) ) | + * ( {SPACE} "(" FilterExpr ")" {SPACE} ) + * MemberName ::= ("." (UnquotedMemberName | BracketedMemberName)) | BracketedMemberName + * BracketedMemberName ::= "[" {SPACE} QuotedMemberName {SPACE} "]" + * ComparisonOp ::= {SPACE} "<" | "<="] | ">" | ">=" | "==" | "!=" {SPACE} + * ComparisonValue ::= "null" | Bool | Number | QuotedString | PartialPath + * Bool ::= "true" | "false" + * Number ::= Integer | MemberNameInFilter | ScientificNumber + * QuotedString ::= "\"" {char} "\"" + * PartialPath ::= "$" RelativePath + */ +JsonUtilCode Selector::parseFilter() { + lex.nextToken(); // skip QUESTION_MARK + if (!lex.matchToken(Token::LPAREN)) return JSONUTIL_INVALID_JSON_PATH; + jsn::vector result; // subset of indexes of the current array + JsonUtilCode rc = parseFilterExpr(result); + if (rc != JSONUTIL_SUCCESS) return rc; + if (!lex.matchToken(Token::RPAREN)) return JSONUTIL_INVALID_JSON_PATH; + + if (!lex.matchToken(Token::RBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + return processFilterResult(result); +} + +JsonUtilCode Selector::processFilterResult(jsn::vector &result) { + JsonUtilCode rc; + if (node->IsArray()) { + for (auto idx : result) { + TRACE("DEBUG", "processFilterResult proceed to array index " << idx << ". remaining path: " + << lex.p << ", nodePath: " << nodePath) + rc = evalArrayMember(idx); + if (isSyntaxError(rc)) return rc; + } + + // We are done. Null out the current node to signal done of value selections. + node = nullptr; + return JSONUTIL_SUCCESS; + } else if (node->IsObject()) { + if (result.empty()) { + // Null out the current node to signal the node is not selected. + node = nullptr; + } + return JSONUTIL_SUCCESS; + } else { + if (!result.empty()) { + ValkeyModule_Assert(result.size() == 1); + rc = evalMember(*node, lex.p); + if (isSyntaxError(rc)) return rc; + } + node = nullptr; // We are done addressing this single element. + return JSONUTIL_SUCCESS; + } +} + +/** + * FilterExpr ::= {SPACE} Term { {SPACE} "||" {SPACE} Term {SPACE} } + */ +JsonUtilCode Selector::parseFilterExpr(jsn::vector &result) { + CHECK_RECURSION_DEPTH(); + lex.skipSpaces(); + JsonUtilCode rc = parseTerm(result); + TRACE("DEBUG", "parseFilterExpr parsed first term, rc: " << rc << ", nodePath: " << nodePath) + if (rc != JSONUTIL_SUCCESS) return rc; + + jsn::unordered_set set; + bool set_initialized = false; + jsn::vector v; + while (lex.matchToken(Token::OR, true)) { + v.clear(); + rc = parseTerm(v); + TRACE("DEBUG", "parseFilterExpr parsed OR term, rc: " << rc << ", nodePath: " << nodePath) + if (rc != JSONUTIL_SUCCESS) return rc; + + if (!set_initialized) { + set.insert(result.begin(), result.end()); + set_initialized = true; + } + vectorUnion(v, result, set); + } + lex.skipSpaces(); + return JSONUTIL_SUCCESS; +} + +/** + * Term ::= Factor { {SPACE} "&&" {SPACE} Factor } + */ +JsonUtilCode Selector::parseTerm(jsn::vector &result) { + CHECK_RECURSION_DEPTH(); + JsonUtilCode rc = parseFactor(result); + if (rc != JSONUTIL_SUCCESS) return rc; + jsn::vector vec1; + jsn::vector vec2; + while (lex.matchToken(Token::AND, true)) { + vec1.clear(); + vec1.insert(vec1.end(), result.begin(), result.end()); + result.clear(); + + vec2.clear(); + rc = parseFactor(vec2); + if (rc != JSONUTIL_SUCCESS) return rc; + vectorIntersection(vec1, vec2, result); + } + return JSONUTIL_SUCCESS; +} + +/** + * Factor ::= ( "@" ( MemberName | ( [ MemberName ] ComparisonOp ComparisonValue) ) ) | + * ( ComparisonValue ComparisonOp "@" ( MemberName | ( [ MemberName ]) ) ) | + * ( {SPACE} "(" FilterExpr ")" {SPACE} ) + * MemberName ::= ("." (UnquotedMemberName | BracketedMemberName)) | BracketedMemberName + * BracketedMemberName ::= "[" {SPACE} QuotedMemberName {SPACE} "]" + * ComparisonOp ::= {SPACE} "<" | "<="] | ">" | ">=" | "==" | "!=" {SPACE} + * ComparisonValue ::= "null" | Bool | Number | QuotedString | PartialPath + * Bool ::= "true" | "false" + * Number ::= Integer | MemberNameInFilter | ScientificNumber + * QuotedString ::= "\"" {char} "\"" + * PartialPath ::= "$" RelativePath +*/ +JsonUtilCode Selector::parseFactor(jsn::vector &result) { + CHECK_RECURSION_DEPTH(); + JsonUtilCode rc; + lex.skipSpaces(); + if (lex.currToken().type == Token::LPAREN) { + lex.nextToken(true); // skip LPAREN + rc = parseFilterExpr(result); + if (rc != JSONUTIL_SUCCESS) return rc; + if (!lex.matchToken(Token::RPAREN, true)) return JSONUTIL_INVALID_JSON_PATH; + } else { + if (lex.matchToken(Token::AT)) { + if (lex.currToken().type == Token::DOT || lex.currToken().type == Token::LBRACKET) { + // The next token must be member name + StringViewHelper member_name; + rc = parseMemberName(member_name); + if (rc != JSONUTIL_SUCCESS) return rc; + + lex.skipSpaces(); + Token::TokenType tokenType = lex.currToken().type; + if (tokenType == Token::LT || tokenType == Token::LE || + tokenType == Token::GT || tokenType == Token::GE || + tokenType == Token::EQ || tokenType == Token::NE) { + Token::TokenType op = Token::UNKNOWN; + rc = parseComparisonOp(op); + if (rc != JSONUTIL_SUCCESS) return rc; + + JValue v; + rc = parseComparisonValue(v); + if (rc != JSONUTIL_SUCCESS) return rc; + + return processComparisonExpr(false, member_name, op, v, result); + } else if (tokenType == Token::LBRACKET) { + lex.nextToken(true); // skip LBRACKET + if (lex.currToken().type == Token::QUESTION_MARK) { + lex.nextToken(true); // skip QUESTIONMARK + if (!lex.matchToken(Token::LPAREN)) return JSONUTIL_INVALID_JSON_PATH; + JValue v; + Token::TokenType op = Token::UNKNOWN; + if (lex.currToken().type == Token::AT) { + lex.nextToken(true); // skip AT + + rc = parseComparisonOp(op); + if (rc != JSONUTIL_SUCCESS) return rc; + + rc = parseComparisonValue(v); + if (rc != JSONUTIL_SUCCESS) return rc; + } else { + rc = parseComparisonValue(v); + if (rc != JSONUTIL_SUCCESS) return rc; + + rc = parseComparisonOp(op); + if (rc != JSONUTIL_SUCCESS) return rc; + rc = swapComparisonOpSide(op); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (!lex.matchToken(Token::AT)) return JSONUTIL_INVALID_JSON_PATH; + } + return processArrayContains(member_name, op, v, result); + } else { + int64_t index; + rc = parseIndex(index); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (!lex.matchToken(Token::RBRACKET)) return JSONUTIL_INVALID_JSON_PATH; + + JValue v; + Token::TokenType op = Token::UNKNOWN; + + rc = parseComparisonOp(op); + if (rc != JSONUTIL_SUCCESS) return rc; + + rc = parseComparisonValue(v); + if (rc != JSONUTIL_SUCCESS) return rc; + + return processComparisonExprAtIndex(index, member_name, op, v, result); + } + } else { + return processAttributeFilter(member_name, result); + } + } else { + // The next token must be comparison operator + Token::TokenType op = Token::UNKNOWN; + rc = parseComparisonOp(op); + if (rc != JSONUTIL_SUCCESS) return rc; + + JValue v; + rc = parseComparisonValue(v); + if (rc != JSONUTIL_SUCCESS) return rc; + + return processComparisonExpr(true, StringViewHelper(), op, v, result); + } + } else { // see if the @.member_name is on the right, do an inverted comparison + JValue v; + rc = parseComparisonValue(v); + if (rc != JSONUTIL_SUCCESS) return rc; + + lex.skipSpaces(); + // The next token must be comparison operator + Token::TokenType op = Token::UNKNOWN; + rc = parseComparisonOp(op); + if (rc != JSONUTIL_SUCCESS) return rc; + rc = swapComparisonOpSide(op); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (!lex.matchToken(Token::AT)) return JSONUTIL_INVALID_JSON_PATH; + + lex.skipSpaces(); + if (lex.currToken().type == Token::RPAREN || lex.currToken().type == Token::AND || + lex.currToken().type == Token::OR) { + return processComparisonExpr(true, StringViewHelper(), op, v, result); + } else { + // The next token must be member name + StringViewHelper member_name; + rc = parseMemberName(member_name); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (lex.currToken().type == Token::LBRACKET) { + lex.nextToken(true); // skip LBRACKET + int64_t index; + rc = parseIndex(index); + if (rc != JSONUTIL_SUCCESS) return rc; + + if (!lex.matchToken(Token::RBRACKET)) return JSONUTIL_INVALID_JSON_PATH; + + return processComparisonExprAtIndex(index, member_name, op, v, result); + } else { + return processComparisonExpr(false, member_name, op, v, result); + } + } + } + } + return JSONUTIL_SUCCESS; +} + +/** + * MemberName ::= ("." (UnquotedMemberName | BracketedMemberName)) | BracketedMemberName + */ +JsonUtilCode Selector::parseMemberName(StringViewHelper &name) { + if (lex.matchToken(Token::DOT)) { + if (lex.matchToken(Token::LBRACKET)) + return parseBracketedMemberName(name); + else + return parseUnquotedMemberName(name); + } else if (lex.matchToken(Token::LBRACKET)) { + return parseBracketedMemberName(name); + } else { + return JSONUTIL_INVALID_JSON_PATH; + } +} + +/** + * BracketedMemberName ::= "[" {SPACE} QuotedMemberName {SPACE} "]" + */ +JsonUtilCode Selector::parseBracketedMemberName(StringViewHelper &member_name) { + lex.skipSpaces(); + jsn::stringstream ss; + JsonUtilCode rc = parseQuotedMemberName(ss); + if (rc != JSONUTIL_SUCCESS) return rc; + if (!lex.matchToken(Token::RBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + member_name.setInternalString(ss.str()); + return JSONUTIL_SUCCESS; +} + +/** + * ComparisonValue ::= "null" | Bool | Number | QuotedString | PartialPath + * Bool ::= "true" | "false" + * Number ::= Integer | MemberNameInFilter | ScientificNumber + * QuotedString ::= "\"" {char} "\"" + * PartialPath ::= "$" RelativePath + */ +JsonUtilCode Selector::parseComparisonValue(JValue &v) { + CHECK_RECURSION_DEPTH(); + StringViewHelper sv; + const Token &token = lex.currToken(); + if (token.type == Token::DOLLAR) { // parse and process member name + Selector selector; + JsonUtilCode rc = lex.scanPathValue(sv); + if (rc != JSONUTIL_SUCCESS) return rc; + jsn::string path = {sv.getView().data(), sv.getView().length()}; + rc = selector.getValues(*root, path.c_str()); + if (rc != JSONUTIL_SUCCESS) return rc; + if (selector.resultSet.size() != 1 || selector.resultSet[0].first->IsObject() || + selector.resultSet[0].first->IsArray()) { + return JSONUTIL_INVALID_JSON_PATH; + } + v.CopyFrom(*selector.resultSet[0].first, allocator); + + return JSONUTIL_SUCCESS; + } else { // parse value directly + JParser parser; + if (token.type == Token::DOUBLE_QUOTE) { + JsonUtilCode rc = lex.scanDoubleQuotedString(parser); + if (rc != JSONUTIL_SUCCESS) return rc; + v = parser.GetJValue(); + TRACE("DEBUG", "parseComparisonValue ComparisonValue: " << v.GetString()) + return JSONUTIL_SUCCESS; + } else if (token.type == Token::SINGLE_QUOTE) { + jsn::stringstream ss; + JsonUtilCode rc = lex.scanSingleQuotedStringAndConvertToDoubleQuotedString(ss); + if (rc != JSONUTIL_SUCCESS) return rc; + sv.setInternalString(ss.str()); + } else if (token.type == Token::ALPHA && (token.strVal == "n")) { + JsonUtilCode rc = lex.scanIdentifier(sv); + if (rc != JSONUTIL_SUCCESS) return rc; + if (sv.getView() != "null") return JSONUTIL_INVALID_IDENTIFIER; + } else if (token.type == Token::ALPHA && (token.strVal == "t" || token.strVal == "f")) { + JsonUtilCode rc = lex.scanIdentifier(sv); + if (rc != JSONUTIL_SUCCESS) return rc; + if (sv.getView() != "true" && sv.getView() != "false") return JSONUTIL_INVALID_IDENTIFIER; + } else { + JsonUtilCode rc = lex.scanNumberInFilterExpr(sv); + if (rc != JSONUTIL_SUCCESS) return rc; + } + + if (parser.Parse(sv.getView()).HasParseError()) { + TRACE("DEBUG", "parseComparisonValue failed to parse " << sv.getView() << ", nodePath: " << nodePath) + return parser.GetParseErrorCode(); + } + TRACE("DEBUG", "parseComparisonValue ComparisonValue: ") +#ifdef INSTRUMENT_V2PATH + dom_dump_value(parser.GetJValue()); +#endif + v = parser.GetJValue(); + return JSONUTIL_SUCCESS; + } +} + +/** + * ComparisonOp := {SPACE} "<" | "<="] | ">" | ">=" | "==" | "!=" {SPACE} + */ +JsonUtilCode Selector::parseComparisonOp(Token::TokenType &op) { + lex.skipSpaces(); + Token::TokenType tokenType = lex.currToken().type; + if (tokenType != Token::EQ && tokenType != Token::NE && + tokenType != Token::LT && tokenType != Token::LE && + tokenType != Token::GT && tokenType != Token::GE) + return JSONUTIL_INVALID_JSON_PATH; + op = tokenType; + lex.skipSpaces(); + + lex.nextToken(true); // advance to the next token + TRACE("DEBUG", "parseComparisonOp op: " << op << ", curr path: " << lex.p << ", nodePath: " << nodePath) + return JSONUTIL_SUCCESS; +} + +JsonUtilCode Selector::swapComparisonOpSide(Token::TokenType &op) { + switch (op) { + case Token::EQ: + case Token::NE: + return JSONUTIL_SUCCESS; + case Token::GT: + op = Token::LT; + return JSONUTIL_SUCCESS; + case Token::LT: + op = Token::GT; + return JSONUTIL_SUCCESS; + case Token::GE: + op = Token::LE; + return JSONUTIL_SUCCESS; + case Token::LE: + op = Token::GE; + return JSONUTIL_SUCCESS; + default: + return JSONUTIL_INVALID_JSON_PATH; + } +} + +// We can enter an array and see if it contains an object that matches a condition. +// This only looks down one level. +// Further recursion can be investigated in the future. +JsonUtilCode Selector::processArrayContains(const StringViewHelper &member_name, const Token::TokenType op, + const JValue &comparison_value, jsn::vector &result) { + if (node->IsArray()) { + for (int64_t i = 0; i < node->Size(); i++) { + JValue &m = node->GetArray()[i]; + JValue *v; + if (!m.IsObject()) continue; // not object, skip + JValue::MemberIterator it = m.FindMember(member_name.getView()); + if (it == m.MemberEnd()) continue; // does not have the attribute, skip + v = &it->value; + if (v->IsArray()) { + bool found = false; + for (int64_t j = 0; j < v->Size() && !found; j++) { + JValue &n = v->GetArray()[j]; + JValue *w = &n; + if (evalOp(w, op, comparison_value)) { + // note that here we push back parent array element + result.push_back(i); + found = true; + } + } + } + } + } + if (!lex.matchToken(Token::RPAREN, true)) return JSONUTIL_INVALID_JSON_PATH; + if (!lex.matchToken(Token::RBRACKET)) return JSONUTIL_INVALID_JSON_PATH; + return JSONUTIL_SUCCESS; +} + +JsonUtilCode Selector::processComparisonExprAtIndex(const int64_t idx, const StringViewHelper &member_name, + const Token::TokenType op, const JValue &comparison_value, + jsn::vector &result) { + if (node->IsArray()) { + for (int64_t i = 0; i < node->Size(); i++) { + JValue &m = node->GetArray()[i]; + JValue *v; + if (!m.IsObject()) continue; // not object, skip + JValue::MemberIterator it = m.FindMember(member_name.getView()); + if (it == m.MemberEnd()) continue; // does not have the attribute, skip + v = &it->value; + if (v->IsArray()) { + int64_t inner_index = idx; + // handle negative index + if (inner_index < 0) inner_index += v->Size(); + // check index bounds + if (inner_index < static_cast(v->Size()) && inner_index >= 0) { + JValue &n = v->GetArray()[inner_index]; + JValue *w = &n; + if (evalOp(w, op, comparison_value)) { + // note that here we push back parent array element + result.push_back(i); + } + } + } + } + } + return JSONUTIL_SUCCESS; +} + + +JsonUtilCode Selector::processComparisonExpr(const bool is_self, const StringViewHelper &member_name, + const Token::TokenType op, const JValue &comparison_value, + jsn::vector &result) { + if (node->IsArray()) { + for (int64_t i = 0; i < node->Size(); i++) { + JValue &m = node->GetArray()[i]; + JValue *v; + if (is_self) { + v = &m; + } else { + if (!m.IsObject()) continue; // not object, skip + JValue::MemberIterator it = m.FindMember(member_name.getView()); + if (it == m.MemberEnd()) continue; // does not have the attribute, skip + v = &it->value; + } + if (evalOp(v, op, comparison_value)) result.push_back(i); + } + } else if (node->IsObject()){ + JValue::MemberIterator it = node->FindMember(member_name.getView()); + if (it != node->MemberEnd()) { + if (evalOp(&it->value, op, comparison_value)) result.push_back(0); + } + } else if (is_self) { + if (evalOp(node, op, comparison_value)) result.push_back(0); + } + return JSONUTIL_SUCCESS; +} + +bool Selector::evalOp(const JValue *v, const Token::TokenType op, const JValue &comparison_value) { + // We return false on LHS and RHS value type mismatch, but also treat kTrueType and kFalseType as the same type + if (v->GetType() != comparison_value.GetType() && !( + (v->GetType() == rapidjson::kTrueType || v->GetType() == rapidjson::kFalseType) && + (comparison_value.GetType() == rapidjson::kTrueType || comparison_value.GetType() == rapidjson::kFalseType)) + ) { + return false; + } + bool satisfied = false; + switch (op) { + case Token::EQ: { + switch (v->GetType()) { + case rapidjson::kNullType: + satisfied = true; + break; + case rapidjson::kTrueType: + case rapidjson::kFalseType: + satisfied = (v->GetBool() == comparison_value.GetBool()); + break; + case rapidjson::kStringType: + satisfied = (v->GetStringView() == comparison_value.GetStringView()); + break; + case rapidjson::kNumberType: { + if (v->IsDouble() || comparison_value.IsDouble()) { + // It's unsafe to compare floating points using == or !=. Doing so will incur compiler errors. + satisfied = (v->GetDouble() <= comparison_value.GetDouble() && + v->GetDouble() >= comparison_value.GetDouble()); + } else if (v->IsUint64() && comparison_value.IsUint64()) { + satisfied = (v->GetUint64() == comparison_value.GetUint64()); + } else { + satisfied = (v->GetInt64() == comparison_value.GetInt64()); + } + break; + } + default: + break; + } + break; + } + case Token::NE: { + switch (v->GetType()) { + case rapidjson::kTrueType: + case rapidjson::kFalseType: + satisfied = (v->GetBool() != comparison_value.GetBool()); + break; + case rapidjson::kStringType: + satisfied = (v->GetStringView() != comparison_value.GetStringView()); + break; + case rapidjson::kNumberType: { + if (v->IsDouble() || comparison_value.IsDouble()) { + // It's unsafe to compare floating points using == or !=. Doing so will incur compiler errors. + satisfied = (v->GetDouble() < comparison_value.GetDouble() || + v->GetDouble() > comparison_value.GetDouble()); + } else if (v->IsUint64() && comparison_value.IsUint64()) { + satisfied = (v->GetUint64() != comparison_value.GetUint64()); + } else { + satisfied = (v->GetInt64() != comparison_value.GetInt64()); + } + break; + } + default: + break; + } + break; + } + case Token::LT: { + switch (v->GetType()) { + case rapidjson::kTrueType: + case rapidjson::kFalseType: + satisfied = (v->GetBool() < comparison_value.GetBool()); + break; + case rapidjson::kStringType: + satisfied = (v->GetStringView() < comparison_value.GetStringView()); + break; + case rapidjson::kNumberType: { + if (v->IsDouble() || comparison_value.IsDouble()) { + satisfied = (v->GetDouble() < comparison_value.GetDouble()); + } else if (v->IsUint64() && comparison_value.IsUint64()) { + satisfied = (v->GetUint64() < comparison_value.GetUint64()); + } else { + satisfied = (v->GetInt64() < comparison_value.GetInt64()); + } + break; + } + default: + break; + } + break; + } + case Token::LE: { + switch (v->GetType()) { + case rapidjson::kTrueType: + case rapidjson::kFalseType: + satisfied = (v->GetBool() <= comparison_value.GetBool()); + break; + case rapidjson::kStringType: + satisfied = (v->GetStringView() <= comparison_value.GetStringView()); + break; + case rapidjson::kNumberType: { + if (v->IsDouble() || comparison_value.IsDouble()) { + satisfied = (v->GetDouble() <= comparison_value.GetDouble()); + } else if (v->IsUint64() && comparison_value.IsUint64()) { + satisfied = (v->GetUint64() <= comparison_value.GetUint64()); + } else { + satisfied = (v->GetInt64() <= comparison_value.GetInt64()); + } + break; + } + default: + break; + } + break; + } + case Token::GT: { + switch (v->GetType()) { + case rapidjson::kTrueType: + case rapidjson::kFalseType: + satisfied = (v->GetBool() > comparison_value.GetBool()); + break; + case rapidjson::kStringType: + satisfied = (v->GetStringView() > comparison_value.GetStringView()); + break; + case rapidjson::kNumberType: { + if (v->IsDouble() || comparison_value.IsDouble()) { + satisfied = (v->GetDouble() > comparison_value.GetDouble()); + } else if (v->IsUint64() && comparison_value.IsUint64()) { + satisfied = (v->GetUint64() > comparison_value.GetUint64()); + } else { + satisfied = (v->GetInt64() > comparison_value.GetInt64()); + } + break; + } + default: + break; + } + break; + } + case Token::GE: { + switch (v->GetType()) { + case rapidjson::kTrueType: + case rapidjson::kFalseType: + satisfied = (v->GetBool() >= comparison_value.GetBool()); + break; + case rapidjson::kStringType: + satisfied = (v->GetStringView() >= comparison_value.GetStringView()); + break; + case rapidjson::kNumberType: { + if (v->IsDouble() || comparison_value.IsDouble()) { + satisfied = (v->GetDouble() >= comparison_value.GetDouble()); + } else if (v->IsUint64() && comparison_value.IsUint64()) { + satisfied = (v->GetUint64() >= comparison_value.GetUint64()); + } else { + satisfied = (v->GetInt64() >= comparison_value.GetInt64()); + } + break; + } + default: + break; + } + break; + } + default: + break; + } + return satisfied; +} + +JsonUtilCode Selector::processAttributeFilter(const StringViewHelper &member_name, jsn::vector &result) { + if (node->IsArray()) { + for (int64_t i = 0; i < node->Size(); i++) { + JValue &m = node->GetArray()[i]; + if (!m.IsObject()) continue; // skip non-object values + JValue::MemberIterator it = m.FindMember(member_name.getView()); + if (it == m.MemberEnd()) continue; // does not have the attribute, skip + result.push_back(i); + } + } else if (node->IsObject()) { + if (node->FindMember(member_name.getView()) != node->MemberEnd()) + result.push_back(0); + } else { + return JSONUTIL_INVALID_JSON_PATH; + } + return JSONUTIL_SUCCESS; +} + +/** + * Union vector v with result r, with element order preserved. + * The final result will be r. Unique elements are stored in the set. + * + * This method is optimized for being called multiple times to union n vectors. + * The caller is responsible for initially syncing up set with r. + */ +void Selector::vectorUnion(const jsn::vector &v, jsn::vector &r, + jsn::unordered_set &set) { + for (auto e : v) { + auto res = set.emplace(e); + if (res.second) r.push_back(e); + } +} + +/** + * Intersect v1 with v2 and store the result in r, with v1's element order preserved. + */ +void Selector::vectorIntersection(const jsn::vector &v1, const jsn::vector &v2, + jsn::vector &r) { + jsn::unordered_set set(v2.begin(), v2.end()); + for (auto e : v1) { + if (set.find(e) != set.end()) { + r.push_back(e); + } + } +} + +/** + * UnionOfIndexes ::= Integer ({SPACE} "," {SPACE} Integer)+ + */ +JsonUtilCode Selector::parseUnionOfIndexes(const int64_t start) { + if (!node->IsArray()) return JSONUTIL_JSON_ELEMENT_NOT_ARRAY; + jsn::vector union_indices = {start}; + bool comma = false; + int64_t index; + JsonUtilCode rc; + + lex.skipSpaces(); + while (lex.currToken().type != Token::RBRACKET) { + switch (lex.currToken().type) { + case Token::COMMA: { + // cannot have multiple commas in a row + if (comma) return JSONUTIL_INVALID_JSON_PATH; + comma = true; + lex.nextToken(true); // skip comma + break; + } + default: { + // integer must follow comma + if (!comma) return JSONUTIL_INVALID_JSON_PATH; + comma = false; + lex.skipSpaces(); + rc = parseIndex(index); + if (rc != JSONUTIL_SUCCESS) return rc; + union_indices.push_back(index); + break; + } + } + lex.skipSpaces(); + } + // cannot end with comma + if (comma) return JSONUTIL_INVALID_JSON_PATH; + + if (!lex.matchToken(Token::RBRACKET, true)) return JSONUTIL_INVALID_JSON_PATH; + rc = processUnion(union_indices); + if (rc != JSONUTIL_SUCCESS) return rc; + + return JSONUTIL_SUCCESS; +} + +JsonUtilCode Selector::processUnion(jsn::vector union_indices) { + JsonUtilCode rc; + for (int64_t i : union_indices) { + // handle negative index + if (i < 0) i += node->Size(); + // if the index is out of bounds, skip + if (i < 0 || i > node->Size()-1) continue; + rc = evalArrayMember(i); + if (rc != JSONUTIL_SUCCESS) return rc; + } + + // We are done. Null out the current node to signal completion of value collection. + node = nullptr; + + return JSONUTIL_SUCCESS; +} + +/** + * Collect values from the result set. + * @param values OUTPUT parameter, stores collected values. + */ +void Selector::getSelectedValues(jsn::vector &values) const { + std::transform(resultSet.begin(), + resultSet.end(), + std::back_inserter(values), + [](const std::pair &p) { return p.first; }); +} + +/** + * Get unique result set with order preserved. + */ +const jsn::vector& Selector::getUniqueResultSet() { + if (resultSet.size() <= 1) return resultSet; + + TRACE("DEBUG", "getUniqueResultSet total values: " << resultSet.size()); + uniqueResultSet.clear(); + jsn::unordered_set set; + for (auto &v : resultSet) { + auto res = set.emplace(v.first); + if (res.second) uniqueResultSet.push_back(v); + } + TRACE("DEBUG", "getUniqueResultSet unique values: " << uniqueResultSet.size()); + return uniqueResultSet; +} + +/** + * Remove duplicate values from the result set. + */ +void Selector::dedupe() { + if (resultSet.size() <= 1) return; + + TRACE("DEBUG", "dedupe resultSet size before dedupe: " << resultSet.size()); + auto &rs = getUniqueResultSet(); + resultSet.clear(); + resultSet.insert(resultSet.end(), rs.begin(), rs.end()); + TRACE("DEBUG", "dedupe resultSet size after dedupe: " << resultSet.size()); +} diff --git a/src/json/selector.h b/src/json/selector.h new file mode 100644 index 0000000..8a2dd41 --- /dev/null +++ b/src/json/selector.h @@ -0,0 +1,369 @@ +#ifndef VALKEYJSONMODULE_JSON_SELECTOR_H_ +#define VALKEYJSONMODULE_JSON_SELECTOR_H_ + +#include "json/dom.h" +#include "json/rapidjson_includes.h" +#include + +struct Token { + enum TokenType { + UNKNOWN = 0, + DOLLAR, DOT, DOTDOT, WILDCARD, + COLON, COMMA, AT, QUESTION_MARK, + LBRACKET, RBRACKET, LPAREN, RPAREN, + SINGLE_QUOTE, DOUBLE_QUOTE, + PLUS, MINUS, DIV, PCT, + EQ, NE, GT, LT, GE, LE, NOT, ASSIGN, + ALPHA, DIGIT, SPACE, + TRUE, FALSE, AND, OR, + SPECIAL_CHAR, + END + }; + + Token() + : type(Token::UNKNOWN) + , strVal() + {} + TokenType type; + std::string_view strVal; +}; + +/** + * A helper class that contains a string view and an optional internal string. The caller decides if the view + * is a view of an external string or the internal string. + * If StringViewHelper::str is empty, the underlying string is owned by an external resource. + * Otherwise, the underlying string is owned by StringViewHelper. + */ +struct StringViewHelper { + StringViewHelper() : str(), view() {} + StringViewHelper(const StringViewHelper &svh) { + str = svh.str; + if (str.empty()) + view = svh.view; + else + view = std::string_view(str.c_str(), str.length()); + } + const std::string_view& getView() const { return view; } + void setInternalString(const jsn::string &s) { + str = s; + view = std::string_view(str.c_str(), str.length()); + } + void setInternalView(const std::string_view &v) { + str = jsn::string(v); + view = std::string_view(str.c_str(), str.length()); + } + void setExternalView(const std::string_view &sv) { + view = sv; + } + + private: + jsn::string str; + std::string_view view; + StringViewHelper& operator=(const StringViewHelper&); // disable assignment operator +}; + +class Lexer { + public: + Lexer() + : p(nullptr) + , next() + , path(nullptr) + , rdTokens(0) + {} + void init(const char *path); + Token::TokenType peekToken() const; + Token nextToken(const bool skipSpace = false); + const Token& currToken() const { return next; } + bool matchToken(const Token::TokenType type, const bool skipSpace = false); + JsonUtilCode scanInteger(int64_t &val); + JsonUtilCode scanUnquotedMemberName(StringViewHelper &member_name); + JsonUtilCode scanPathValue(StringViewHelper &output); + JsonUtilCode scanDoubleQuotedString(JParser& parser); + JsonUtilCode scanDoubleQuotedString(jsn::stringstream &ss); + JsonUtilCode scanSingleQuotedString(jsn::stringstream &ss); + JsonUtilCode scanSingleQuotedStringAndConvertToDoubleQuotedString(jsn::stringstream &ss); + JsonUtilCode scanNumberInFilterExpr(StringViewHelper &number_sv); + JsonUtilCode scanIdentifier(StringViewHelper &sv); + void skipSpaces(); + void unescape(const std::string_view &input, jsn::stringstream &ss); + size_t getRecursiveDescentTokens() { return rdTokens; } + const char *p; // current position in path + Token next; + + private: + Lexer(const Lexer &t); // disable copy constructor + Lexer& operator=(const Lexer &rhs); // disable assignment constructor + int64_t scanUnsignedInteger(); + const char *path; + size_t rdTokens; // number of recursive descent tokens +}; + +/** + * A JSONPath parser and evaluator that supports both v2 JSONPath and the legacy path syntax, and operates in either + * READ or WRITE mode. It is named Selector because: + * a) For READ, evaluation means selecting a list of values that match the query. + * b) For WRITE, evaluation means selecting a list of values to be updated and places to insert into. + * + * The selector is designed to work with a vector of values instead of a single value, and support both v1 and v2 + * path syntax. + * + * Internally, it maintains two pointers. One points to the current node (value) in the JSON tree. The other points + * to the current position in the path string. + * + * The selector automatically detects if the input path is v1 or v2 syntax, and sets the member isV2Path. + * Member mode indicates READ/INSERT/UPDATE/DELETE mode, which is automatically set based on the entry point method + * being invoked, which is getValues or setValues or deleteValues. + * + * 1. READ mode: + * Selector selector; + * JsonUtilCode rc = selector.getValues(doc, path); + * + * The outcome is a result set (selector.resultSet) that matches the query. Each entry is a (value, valuePath) pair. + * + * 2. WRITE mode: + * 2.1. Insert/Update: + * Selector selector; + * JsonUtilCode rc = selector.setValues(doc, path, new_val); + * + * The outcome is 2 collections: + * a) selector.resultSet: values to update. Each entry is a (value, valuePath) pair. + * b) selector.insertPaths: set of insert paths. + * + * Note that setValues takes care of everything (update/insert). As an option, the caller can inspect these vectors + * for verification purpose. + * + * 2.2. Delete: + * Selector selector; + * JsonUtilCode rc = selector.deleteValues(doc, path, numValsDeleted); + * + * The outcome is selector.resultSet, representing values to delete. Each entry is a (value, valuePath) pair. + * + * NOTE: + * a) Inserting into an array value is not allowed. (That's the job of JSON.ARRINSERT and JSON.ARRAPPEND) + * b) A new key can be appended to an object if and only if the key is the last child in the path. + */ +class Selector { + public: + explicit Selector(bool force_v2_path_behavior = false) + : isV2Path(force_v2_path_behavior) + , root(nullptr) + , node(nullptr) + , nodePath() + , lex() + , maxPathDepth(0) + , currPathDepth(0) + , resultSet() + , insertPaths() + , uniqueResultSet() + , mode(READ) + , isRecursiveSearch(false) + , error(JSONUTIL_SUCCESS) + {} + + // ValueInfo - (value, path) pair. + // first: JValue pointer + // second: path to the value, which is in json pointer format. + typedef std::pair ValueInfo; + + /** + * Entry point for READ query. + * The outcome is selector.resultSet that matches the query. Each entry is a (value, valuePath) pair. + */ + JsonUtilCode getValues(JValue &root, const char *path); + + /** + * Entry point for DELETE. + * The outcome is selector.resultSet that matches the query. Each entry is a (value, valuePath) pair. + */ + JsonUtilCode deleteValues(JValue &root, const char *path, size_t &numValsDeleted); + + /** + * Entry point for a single stage INSERT/UPDATE, which commits the operation. + * The outcome is 2 vectors: + * 1) selector.resultSet: values to update. Each entry is a (value, valuePath) pair. + * 2) selector.insertPaths: set of insert paths. + */ + JsonUtilCode setValues(JValue &root, const char *path, JValue &new_val); + /** + * Prepare for a 2-stage INSERT/UPDATE. The 2-stage write splits a write operation into two calls: + * prepareSetValues and commit, where prepareSetValues does not change the Valkey data. + * The purpose of having a 2-stage write is to be able to discard the write operation if + * certain conditions are not satisfied. + */ + JsonUtilCode prepareSetValues(JValue &root, const char *path); + /** + * Commit a 2-stage INSERT/UPDATE. + */ + JsonUtilCode commit(JValue &new_val); + bool isLegacyJsonPathSyntax() const { return !isV2Path; } + bool isSyntaxError(JsonUtilCode code) const; + + /** + * Given a list of paths, check if there is at least one path that is v2 JSONPath. + */ + static bool has_at_least_one_v2path(const char **paths, const int num_paths) { + for (int i = 0; i < num_paths; i++) { + if (*paths[i] == '$') return true; + } + return false; + } + + bool hasValues() const { return !resultSet.empty(); } + bool hasUpdates() const { return !resultSet.empty(); } + bool hasInserts() const { return !insertPaths.empty(); } + size_t getMaxPathDepth() const { return maxPathDepth; } + const jsn::vector& getResultSet() const { return resultSet; } + void getSelectedValues(jsn::vector &values) const; + const jsn::vector& getUniqueResultSet(); + void dedupe(); + + bool isV2Path; // if false, it's legacy syntax + + private: + enum Mode { + READ, + INSERT, + UPDATE, + INSERT_OR_UPDATE, // JSON.SET could be insert or update or both + DELETE + }; + + struct State { + State() + : currNode(nullptr) + , nodePath() + , currPathPtr(nullptr) + , currToken() + , currPathDepth(0) + {} + JValue *currNode; + jsn::string nodePath; + const char *currPathPtr; + Token currToken; + size_t currPathDepth; + }; + void snapshotState(State &state) { + state.currNode = node; + state.nodePath = nodePath; + state.currPathPtr = lex.p; + state.currToken = lex.next; + state.currPathDepth = currPathDepth; + } + void restoreState(const State &state) { + node = state.currNode; + nodePath = state.nodePath; + lex.p = state.currPathPtr; + lex.next = state.currToken; + currPathDepth = state.currPathDepth; + } + + /*** + * Initialize the selector. + */ + JsonUtilCode init(JValue &root, const char *path, const Mode mode); + + void resetPointers(JValue &currVal, const char *currPath) { + node = &currVal; + lex.p = currPath; + } + + void incrPathDepth() { + currPathDepth++; + maxPathDepth = std::max(maxPathDepth, currPathDepth); + } + + void decrPathDepth() { + ValkeyModule_Assert(currPathDepth > 0); + currPathDepth--; + } + + /*** + * Evaluate the path, which includes parsing and evaluating the path. + */ + JsonUtilCode eval(); + JsonUtilCode evalMember(JValue &m, const char *path_start); + JsonUtilCode evalObjectMember(const StringViewHelper &member_name, JValue &val); + JsonUtilCode evalArrayMember(int64_t idx); + JsonUtilCode traverseToObjectMember(const StringViewHelper &member_name); + JsonUtilCode traverseToArrayIndex(int64_t idx); + JsonUtilCode parseSupportedPath(); + JsonUtilCode parseRelativePath(); + JsonUtilCode parseRecursivePath(); + JsonUtilCode parseDotPath(); + JsonUtilCode parseBracketPath(); + JsonUtilCode parseQualifiedPath(); + JsonUtilCode parseQualifiedPathElement(); + JsonUtilCode parseKey(); + JsonUtilCode parseBracketPathElement(); + JsonUtilCode parseWildcardInBrackets(); + JsonUtilCode parseNameInBrackets(); + JsonUtilCode parseQuotedMemberName(jsn::stringstream &ss); + JsonUtilCode parseUnquotedMemberName(StringViewHelper &name); + JsonUtilCode parseIndexExpr(); + JsonUtilCode parseSliceStartsWithColon(); + JsonUtilCode parseSliceStartsWithInteger(const int64_t start); + JsonUtilCode parseSliceOrUnionOrIndex(); + JsonUtilCode parseEndAndStep(const int64_t start); + JsonUtilCode parseStep(const int64_t start, const int64_t end); + JsonUtilCode parseIndex(int64_t &val); + JsonUtilCode parseFilter(); + JsonUtilCode parseFilterExpr(jsn::vector &result); + JsonUtilCode parseTerm(jsn::vector &result); + JsonUtilCode parseFactor(jsn::vector &result); + JsonUtilCode parseMemberName(StringViewHelper &name); + JsonUtilCode parseBracketedMemberName(StringViewHelper &member_name); + JsonUtilCode parseComparisonValue(JValue &v); + JsonUtilCode parseComparisonOp(Token::TokenType &op); + JsonUtilCode swapComparisonOpSide(Token::TokenType &op); + JsonUtilCode processUnionOfMembers(const jsn::vector &member_names); + JsonUtilCode parseUnionOfIndexes(const int64_t fistIndex); + JsonUtilCode processWildcard(); + JsonUtilCode processWildcardKey(); + JsonUtilCode processWildcardIndex(); + JsonUtilCode parseWildcardFilter(); + JsonUtilCode processSubscript(const int64_t idx); + JsonUtilCode processSlice(int64_t start, int64_t end, const int64_t step = 1); + JsonUtilCode processUnion(jsn::vector union_indices); + JsonUtilCode processFilterResult(jsn::vector &result); + JsonUtilCode processArrayContains(const StringViewHelper &member_name, const Token::TokenType op, + const JValue &comparison_value, jsn::vector &result); + JsonUtilCode processComparisonExpr(const bool is_self, const StringViewHelper &member_name, + const Token::TokenType op, const JValue &comparison_value, + jsn::vector &result); + JsonUtilCode processComparisonExprAtIndex(const int64_t idx, const StringViewHelper &member_name, + const Token::TokenType op, const JValue &comparison_value, + jsn::vector &result); + JsonUtilCode processAttributeFilter(const StringViewHelper &member_name, jsn::vector &result); + bool evalOp(const JValue *v, const Token::TokenType op, const JValue &comparison_value); + bool deleteValue(const jsn::string &path); + void vectorUnion(const jsn::vector &v, jsn::vector &r, jsn::unordered_set &set); + void vectorIntersection(const jsn::vector &v1, const jsn::vector &v2, + jsn::vector &result); + JsonUtilCode recursiveSearch(JValue &v, const char *p); + void setError(const JsonUtilCode error_code) { error = error_code; } + JsonUtilCode getError() const { return error; } + + JValue *root; // the root value, aka the document + JValue *node; // current node (value) in the JSON tree + jsn::string nodePath; // current node's path, which is in json pointer format + Lexer lex; + size_t maxPathDepth; + size_t currPathDepth; + + // resultSet - selected values that match the query. In write mode, these are the source values to update + // or delete. + jsn::vector resultSet; + + // insertPaths - set of insert paths, which is in json pointer path format. + // Only used for write operations that generate INSERTs. + jsn::unordered_set insertPaths; + + // data structure to assist dedupe + jsn::vector uniqueResultSet; + + Mode mode; + bool isRecursiveSearch; // if we are doing a recursive search we do not wish to add new fields + JsonUtilCode error; // JSONUTIL_SUCCESS indicates no error +}; + +#endif diff --git a/src/json/stats.cc b/src/json/stats.cc new file mode 100644 index 0000000..4bb2012 --- /dev/null +++ b/src/json/stats.cc @@ -0,0 +1,291 @@ +#include "json/stats.h" +#include +#include +#include +#include +#include +#include +extern "C" { + #define VALKEYMODULE_EXPERIMENTAL_API + #include <./include/valkeymodule.h> +} + +#define STATIC /* decorator for static functions, remove so that backtrace symbols include these */ + +LogicalStats logical_stats; // initialize global variable + +// Thread local storage (TLS) key for calculating used memory per thread. +static pthread_key_t thread_local_mem_counter_key; + +/* JSON statistics struct. + * Use atomic integers due to possible multi-threading execution of rdb_load and + * also the overhead of atomic operations are negligible. + */ +typedef struct { + std::atomic_ullong used_mem; // global used memory counter + std::atomic_ullong num_doc_keys; + std::atomic_ullong max_depth_ever_seen; + std::atomic_ullong max_size_ever_seen; + std::atomic_ullong defrag_count; + std::atomic_ullong defrag_bytes; + + void reset() { + used_mem = 0; + num_doc_keys = 0; + max_depth_ever_seen = 0; + max_size_ever_seen = 0; + defrag_count = 0; + defrag_bytes = 0; + } +} JsonStats; +static JsonStats jsonstats; + +// histograms +#define NUM_BUCKETS (11) +static size_t buckets[] = { + 0, 256, 1024, 4*1024, 16*1024, 64*1024, 256*1024, 1024*1024, + 4*1024*1024, 16*1024*1024, 64*1024*1024, SIZE_MAX +}; + +// static histogram showing document size distribution +static size_t doc_hist[NUM_BUCKETS]; +// dynamic histogram for read operations (JSON.GET and JSON.MGET only) +static size_t read_hist[NUM_BUCKETS]; +// dynamic histogram for insert operations (JSON.SET and JSON.ARRINSERT) +static size_t insert_hist[NUM_BUCKETS]; +// dynamic histogram for update operations (JSON.SET, JSON.STRAPPEND and JSON.ARRAPPEND) +static size_t update_hist[NUM_BUCKETS]; +// dynamic histogram for delete operations (JSON.DEL, JSON.FORGET, JSON.ARRPOP and JSON.ARRTRIM) +static size_t delete_hist[NUM_BUCKETS]; + +JsonUtilCode jsonstats_init() { + ValkeyModule_Assert(jsonstats.used_mem == 0); // Otherwise you'll lose memory accounting + // Create thread local key. No need to have destructor hook, as the key is created on stack. + if (pthread_key_create(&thread_local_mem_counter_key, nullptr) != 0) + return JSONUTIL_FAILED_TO_CREATE_THREAD_SPECIFIC_DATA_KEY; + + jsonstats.reset(); + logical_stats.reset(); + memset(doc_hist, 0, sizeof(doc_hist)); + memset(read_hist, 0, sizeof(read_hist)); + memset(insert_hist, 0, sizeof(insert_hist)); + memset(update_hist, 0, sizeof(update_hist)); + memset(delete_hist, 0, sizeof(delete_hist)); + return JSONUTIL_SUCCESS; +} + +int64_t jsonstats_begin_track_mem() { + return reinterpret_cast(pthread_getspecific(thread_local_mem_counter_key)); +} + +int64_t jsonstats_end_track_mem(const int64_t begin_val) { + int64_t end_val = reinterpret_cast(pthread_getspecific(thread_local_mem_counter_key)); + return end_val - begin_val; +} + +void jsonstats_increment_used_mem(size_t delta) { + // update the atomic global counter + jsonstats.used_mem += delta; + + // update the thread local counter + int64_t curr_val = reinterpret_cast(pthread_getspecific(thread_local_mem_counter_key)); + pthread_setspecific(thread_local_mem_counter_key, reinterpret_cast(curr_val + delta)); +} + +void jsonstats_decrement_used_mem(size_t delta) { + // update the atomic global counter + ValkeyModule_Assert(delta <= jsonstats.used_mem); + jsonstats.used_mem -= delta; + + // update the thread local counter + int64_t curr_val = reinterpret_cast(pthread_getspecific(thread_local_mem_counter_key)); + pthread_setspecific(thread_local_mem_counter_key, reinterpret_cast(curr_val - delta)); +} + +unsigned long long jsonstats_get_used_mem() { + return jsonstats.used_mem; +} + +unsigned long long jsonstats_get_num_doc_keys() { + return jsonstats.num_doc_keys; +} + +unsigned long long jsonstats_get_max_depth_ever_seen() { + return jsonstats.max_depth_ever_seen; +} + +void jsonstats_update_max_depth_ever_seen(const size_t max_depth) { + if (max_depth > jsonstats.max_depth_ever_seen) { + jsonstats.max_depth_ever_seen = max_depth; + } +} + +unsigned long long jsonstats_get_max_size_ever_seen() { + return jsonstats.max_size_ever_seen; +} + +void jsonstats_update_max_size_ever_seen(const size_t max_size) { + if (max_size > jsonstats.max_size_ever_seen) { + jsonstats.max_size_ever_seen = max_size; + } +} + +unsigned long long jsonstats_get_defrag_count() { + return jsonstats.defrag_count; +} + +void jsonstats_increment_defrag_count() { + jsonstats.defrag_count++; +} + +unsigned long long jsonstats_get_defrag_bytes() { + return jsonstats.defrag_bytes; +} + +void jsonstats_increment_defrag_bytes(const size_t amount) { + jsonstats.defrag_bytes += amount; +} + +/* Given a size (bytes), find histogram bucket index using binary search. + */ +uint32_t jsonstats_find_bucket(size_t size) { + int lo = 0; + int hi = NUM_BUCKETS; // length of buckets[] is NUM_BUCKETS + 1 + while (hi - lo > 1) { + uint32_t mid = (lo + hi) / 2; + if (size < buckets[mid]) + hi = mid; + else if (size > buckets[mid]) + lo = mid; + else + return mid; + } + return lo; +} + +/* Update the static document histogram */ +STATIC void update_doc_hist(JDocument *doc, const size_t orig_size, const size_t new_size, + const JsonCommandType cmd_type) { + switch (cmd_type) { + case JSONSTATS_INSERT: { + if (orig_size == 0) { + uint32_t new_bucket = jsonstats_find_bucket(new_size); + doc_hist[new_bucket]++; + dom_set_bucket_id(doc, new_bucket); + } else { + update_doc_hist(doc, orig_size, new_size, JSONSTATS_UPDATE); + } + break; + } + case JSONSTATS_UPDATE: { + if (orig_size != new_size) { + uint32_t orig_bucket = dom_get_bucket_id(doc); + uint32_t new_bucket = jsonstats_find_bucket(new_size); + if (orig_bucket != new_bucket) { + doc_hist[orig_bucket]--; + doc_hist[new_bucket]++; + dom_set_bucket_id(doc, new_bucket); + } + } + break; + } + case JSONSTATS_DELETE: { + uint32_t orig_bucket = dom_get_bucket_id(doc); + if (new_size == 0) { + doc_hist[orig_bucket]--; + } else { + uint32_t new_bucket = jsonstats_find_bucket(new_size); + if (new_bucket != orig_bucket) { + doc_hist[orig_bucket]--; + doc_hist[new_bucket]++; + dom_set_bucket_id(doc, new_bucket); + } + } + break; + } + default: + break; + } +} + +void jsonstats_sprint_hist_buckets(char *buf, const size_t buf_size) { + std::ostringstream oss; + oss << "["; + for (size_t i=0; i < NUM_BUCKETS; i++) { + if (i > 0) oss << ","; + oss << buckets[i]; + } + oss << ",INF]"; + std::string str = oss.str(); + ValkeyModule_Assert(str.length() <= buf_size); + memcpy(buf, str.c_str(), str.length()); + buf[str.length()] = '\0'; +} + +STATIC void sprint_hist(size_t *arr, const size_t len, char *buf, const size_t buf_size) { + std::ostringstream oss; + oss << "["; + for (size_t i=0; i < len; i++) { + if (i > 0) oss << ","; + oss << arr[i]; + } + oss << "]"; + std::string str = oss.str(); + ValkeyModule_Assert(str.length() <= buf_size); + memcpy(buf, str.c_str(), str.length()); + buf[str.length()] = '\0'; +} + +void jsonstats_sprint_doc_hist(char *buf, const size_t buf_size) { + sprint_hist(doc_hist, NUM_BUCKETS, buf, buf_size); +} + +void jsonstats_sprint_read_hist(char *buf, const size_t buf_size) { + sprint_hist(read_hist, NUM_BUCKETS, buf, buf_size); +} + +void jsonstats_sprint_insert_hist(char *buf, const size_t buf_size) { + sprint_hist(insert_hist, NUM_BUCKETS, buf, buf_size); +} + +void jsonstats_sprint_update_hist(char *buf, const size_t buf_size) { + sprint_hist(update_hist, NUM_BUCKETS, buf, buf_size); +} + +void jsonstats_sprint_delete_hist(char *buf, const size_t buf_size) { + sprint_hist(delete_hist, NUM_BUCKETS, buf, buf_size); +} + +void jsonstats_update_stats_on_read(const size_t fetched_val_size) { + uint32_t bucket = jsonstats_find_bucket(fetched_val_size); + read_hist[bucket]++; +} + +void jsonstats_update_stats_on_insert(JDocument *doc, const bool is_delete_doc_key, const size_t orig_size, + const size_t new_size, const size_t inserted_val_size) { + if (is_delete_doc_key) jsonstats.num_doc_keys++; + update_doc_hist(doc, orig_size, new_size, JSONSTATS_INSERT); + uint32_t bucket = jsonstats_find_bucket(inserted_val_size); + insert_hist[bucket]++; + jsonstats_update_max_size_ever_seen(new_size); +} + +void jsonstats_update_stats_on_update(JDocument *doc, const size_t orig_size, const size_t new_size, + const size_t input_json_size) { + update_doc_hist(doc, orig_size, new_size, JSONSTATS_UPDATE); + uint32_t bucket = jsonstats_find_bucket(input_json_size); + update_hist[bucket]++; + jsonstats_update_max_size_ever_seen(new_size); +} + +void jsonstats_update_stats_on_delete(JDocument *doc, const bool is_delete_doc_key, const size_t orig_size, + const size_t new_size, const size_t deleted_val_size) { + update_doc_hist(doc, orig_size, new_size, JSONSTATS_DELETE); + if (is_delete_doc_key) { + ValkeyModule_Assert(jsonstats.num_doc_keys > 0); + jsonstats.num_doc_keys--; + } + uint32_t bucket = jsonstats_find_bucket(deleted_val_size); + delete_hist[bucket]++; +} + diff --git a/src/json/stats.h b/src/json/stats.h new file mode 100644 index 0000000..d2a6f73 --- /dev/null +++ b/src/json/stats.h @@ -0,0 +1,138 @@ +/** + * The STATS module's main responsibility is to produce the following metrics: + * 1. Core metrics: + * json_total_memory_bytes: total memory allocated to JSON objects + * json_num_documents: number of document keys in Valkey + * json_num_reads: number of reads + * json_num_writes: number of writes + * 2. Histograms: + * json_doc_histogram: static histogram showing document size distribution. Value of the i_th element is + * number of documents whose size fall into bucket i. + * json_read_histogram: dynamic histogram for read operations (JSON.GET and JSON.MGET). Value of the i_th + * element is number of read operations with fetched JSON size falling into bucket i. + * json_insert_histogram: dynamic histogram for insert operations (JSON.SET and JSON.ARRINSERT) that either + * insert new documents or insert values into existing documents. Value of the i_th element is number of + * insert operations with inserted values' size falling into bucket i. + * json_update_histogram: dynamic histogram for update operations (JSON.SET, JSON.STRAPPEND and + * JSON.ARRAPPEND). Value of the i_th element is number of update operations with input JSON size falling into + * bucket i. + * json_delete_histogram: dynamic histogram for delete operations (JSON.DEL, JSON.FORGET, JSON.ARRPOP and + * JSON.ARRTRIM). Value of the i_th element is number of delete operations with deleted values' size falling + * into bucket i. + * + * Histogram buckets: + * [0,256), [256,1k), [1k,4k), [4k,16k), [16k,64k), [64k,256k), [256k,1m), [1m,4m), [4m,16m), [16m,64m), [64m,INF). + * Each bucket represents a JSON size range in bytes. + * + * To query metrics, run Valkey command: + * info modules: returns all metrics of the module + * info json_core_metrics: returns core metrics + */ +#ifndef VALKEYJSONMODULE_JSON_STATS_H_ +#define VALKEYJSONMODULE_JSON_STATS_H_ + +#include "json/dom.h" + +typedef enum { + JSONSTATS_READ = 0, + JSONSTATS_INSERT, + JSONSTATS_UPDATE, + JSONSTATS_DELETE +} JsonCommandType; + +/* Initialize statistics counters and thread local storage (TLS) keys. */ +JsonUtilCode jsonstats_init(); + +/* Begin tracking memory usage. + * @return value of the thread local counter. +*/ +int64_t jsonstats_begin_track_mem(); + +/* End tracking memory usage. + * @param begin_val - previous saved thread local value that is returned from jsonstats_begin_track_memory(). + * @return delta of used memory + */ +int64_t jsonstats_end_track_mem(const int64_t begin_val); + +/* Get the total memory allocated to JSON objects. */ +unsigned long long jsonstats_get_used_mem(); + +/* The following two methods are invoked by the DOM memory allocator upon every malloc/free/realloc. + * Two memory counters are updated: A global atomic counter and a thread local counter (per thread). + */ +void jsonstats_increment_used_mem(size_t delta); +void jsonstats_decrement_used_mem(size_t delta); + +// get counters +unsigned long long jsonstats_get_num_doc_keys(); + +unsigned long long jsonstats_get_max_depth_ever_seen(); +void jsonstats_update_max_depth_ever_seen(const size_t max_depth); +unsigned long long jsonstats_get_max_size_ever_seen(); + +unsigned long long jsonstats_get_defrag_count(); +void jsonstats_increment_defrag_count(); + +unsigned long long jsonstats_get_defrag_bytes(); +void jsonstats_increment_defrag_bytes(const size_t amount); + +// updating stats on read/insert/update/delete operation +void jsonstats_update_stats_on_read(const size_t fetched_val_size); +void jsonstats_update_stats_on_insert(JDocument *doc, const bool is_delete_doc_key, const size_t orig_size, + const size_t new_size, const size_t inserted_val_size); +void jsonstats_update_stats_on_update(JDocument *doc, const size_t orig_size, const size_t new_size, + const size_t input_json_size); +void jsonstats_update_stats_on_delete(JDocument *doc, const bool is_delete_doc_key, const size_t orig_size, + const size_t new_size, const size_t deleted_val_size); + +// helper methods for printing histograms into C string +void jsonstats_sprint_hist_buckets(char *buf, const size_t buf_size); +void jsonstats_sprint_doc_hist(char *buf, const size_t buf_size); +void jsonstats_sprint_read_hist(char *buf, const size_t buf_size); +void jsonstats_sprint_insert_hist(char *buf, const size_t buf_size); +void jsonstats_sprint_update_hist(char *buf, const size_t buf_size); +void jsonstats_sprint_delete_hist(char *buf, const size_t buf_size); + +/* Given a size (bytes), find the histogram bucket index using binary search. + */ +uint32_t jsonstats_find_bucket(size_t size); + + +/* JSON logical statistics. + * Used for internal tracking of elements for Skyhook Billing. + * Using a similar structure to JsonStats. + * We don't track the logical bytes themselves here as they are tracked by Skyhook Metering. + * We are using size_t to match Valkey Module API for Data Metering. + */ +typedef struct { + std::atomic_size_t boolean_count; // 16 bytes + std::atomic_size_t number_count; // 16 bytes + std::atomic_size_t sum_extra_numeric_chars; // 1 byte per char + std::atomic_size_t string_count; // 16 bytes + std::atomic_size_t sum_string_chars; // 1 byte per char + std::atomic_size_t null_count; // 16 bytes + std::atomic_size_t array_count; // 16 bytes + std::atomic_size_t sum_array_elements; // internal metric + std::atomic_size_t object_count; // 16 bytes + std::atomic_size_t sum_object_members; // internal metric + std::atomic_size_t sum_object_key_chars; // 1 byte per char + + void reset() { + boolean_count = 0; + number_count = 0; + sum_extra_numeric_chars = 0; + string_count = 0; + sum_string_chars = 0; + null_count = 0; + array_count = 0; + sum_array_elements = 0; + object_count = 0; + sum_object_members = 0; + sum_object_key_chars = 0; + } +} LogicalStats; +extern LogicalStats logical_stats; + +#define DOUBLE_CHARS_CUTOFF 24 + +#endif // VALKEYJSONMODULE_JSON_STATS_H_ diff --git a/src/json/util.cc b/src/json/util.cc new file mode 100644 index 0000000..ee7fc1d --- /dev/null +++ b/src/json/util.cc @@ -0,0 +1,142 @@ +#include "json/util.h" +#include "json/dom.h" +#include "json/alloc.h" +#include +#include +#include "json/rapidjson_includes.h" + +const char *jsonutil_code_to_message(JsonUtilCode code) { + switch (code) { + case JSONUTIL_SUCCESS: + case JSONUTIL_WRONG_NUM_ARGS: + case JSONUTIL_NX_XX_CONDITION_NOT_SATISFIED: + // only used as code, no message needed + break; + case JSONUTIL_JSON_PARSE_ERROR: return "SYNTAXERR Failed to parse JSON string due to syntax error"; + case JSONUTIL_NX_XX_SHOULD_BE_MUTUALLY_EXCLUSIVE: + return "SYNTAXERR Option NX and XX should be mutually exclusive"; + case JSONUTIL_INVALID_JSON_PATH: return "SYNTAXERR Invalid JSON path"; + case JSONUTIL_INVALID_MEMBER_NAME: return "SYNTAXERR Invalid object member name"; + case JSONUTIL_INVALID_NUMBER: return "SYNTAXERR Invalid number"; + case JSONUTIL_INVALID_IDENTIFIER: return "SYNTAXERR Invalid identifier"; + case JSONUTIL_INVALID_DOT_SEQUENCE: return "SYNTAXERR Invalid dot sequence"; + case JSONUTIL_EMPTY_EXPR_TOKEN: return "SYNTAXERR Expression token cannot be empty"; + case JSONUTIL_ARRAY_INDEX_NOT_NUMBER: return "SYNTAXERR Array index is not a number"; + case JSONUTIL_STEP_CANNOT_NOT_BE_ZERO: return "SYNTAXERR Step in the slice cannot be zero"; + case JSONUTIL_INVALID_USE_OF_WILDCARD: return "ERR Invalid use of wildcard"; + case JSONUTIL_JSON_PATH_NOT_EXIST: return "NONEXISTENT JSON path does not exist"; + case JSONUTIL_PARENT_ELEMENT_NOT_EXIST: return "NONEXISTENT Parent element does not exist"; + case JSONUTIL_DOCUMENT_KEY_NOT_FOUND: return "NONEXISTENT Document key does not exist"; + case JSONUTIL_NOT_A_DOCUMENT_KEY: return "WRONGTYPE Not a JSON document key"; + case JSONUTIL_FAILED_TO_DELETE_VALUE: return "OPFAIL Failed to delete JSON value"; + case JSONUTIL_JSON_ELEMENT_NOT_NUMBER: return "WRONGTYPE JSON element is not a number"; + case JSONUTIL_JSON_ELEMENT_NOT_BOOL: return "WRONGTYPE JSON element is not a bool"; + case JSONUTIL_JSON_ELEMENT_NOT_STRING: return "WRONGTYPE JSON element is not a string"; + case JSONUTIL_JSON_ELEMENT_NOT_OBJECT: return "WRONGTYPE JSON element is not an object"; + case JSONUTIL_JSON_ELEMENT_NOT_ARRAY: return "WRONGTYPE JSON element is not an array"; + case JSONUTIL_VALUE_NOT_NUMBER: return "WRONGTYPE Value is not a number"; + case JSONUTIL_VALUE_NOT_STRING: return "WRONGTYPE Value is not a string"; + case JSONUTIL_VALUE_NOT_INTEGER: return "WRONGTYPE Value is not an integer"; + case JSONUTIL_PATH_SHOULD_BE_AT_THE_END: return "SYNTAXERR Path arguments should be positioned at the end"; + case JSONUTIL_COMMAND_SYNTAX_ERROR: return "SYNTAXERR Command syntax error"; + case JSONUTIL_MULTIPLICATION_OVERFLOW: return "OVERFLOW Multiplication would overflow"; + case JSONUTIL_ADDITION_OVERFLOW: return "OVERFLOW Addition would overflow"; + case JSONUTIL_EMPTY_JSON_OBJECT: return "EMPTYVAL Empty JSON object"; + case JSONUTIL_EMPTY_JSON_ARRAY: return "EMPTYVAL Empty JSON array"; + case JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES: return "OUTOFBOUNDARIES Array index is out of bounds"; + case JSONUTIL_UNKNOWN_SUBCOMMAND: return "SYNTAXERR Unknown subcommand"; + case JSONUTIL_FAILED_TO_CREATE_THREAD_SPECIFIC_DATA_KEY: + return "PTHREADERR Failed to create thread-specific data key"; + case JSONUTIL_DOCUMENT_SIZE_LIMIT_EXCEEDED: + return "LIMIT Document size limit is exceeded"; + case JSONUTIL_DOCUMENT_PATH_LIMIT_EXCEEDED: + return "LIMIT Document path nesting limit is exceeded"; + case JSONUTIL_PARSER_RECURSION_DEPTH_LIMIT_EXCEEDED: + return "LIMIT Parser recursion depth is exceeded"; + case JSONUTIL_RECURSIVE_DESCENT_TOKEN_LIMIT_EXCEEDED: + return "LIMIT Total number of recursive descent tokens in the query string exceeds the limit"; + case JSONUTIL_QUERY_STRING_SIZE_LIMIT_EXCEEDED: + return "LIMIT Query string size limit is exceeded"; + case JSONUTIL_CANNOT_INSERT_MEMBER_INTO_NON_OBJECT_VALUE: + return "ERROR Cannot insert a member into a non-object value"; + case JSONUTIL_INVALID_RDB_FORMAT: + return "ERROR Invalid value in RDB format"; + case JSONUTIL_DOLLAR_CANNOT_APPLY_TO_NON_ROOT: return "SYNTAXERR Dollar sign cannot apply to non-root element"; + default: ValkeyModule_Assert(false); + } + return ""; +} + +size_t jsonutil_double_to_string(const double val, char *double_to_string_buf, size_t len) { + // It's safe to write a double value into double_to_string_buf, because the converted string will + // never exceed length of 1024. + ValkeyModule_Assert(len == BUF_SIZE_DOUBLE_JSON); + return snprintf(double_to_string_buf, len, "%.17g", val); +} + +/** + * Convert double to string using the same format as RapidJSON's Writer::WriteDouble does. + */ +size_t jsonutil_double_to_string_rapidjson(const double val, char *double_to_string_buf_rapidjson, size_t len) { + // RapidJSON's Writer::WriteDouble only uses a buffer of 25 bytes. + ValkeyModule_Assert(len == BUF_SIZE_DOUBLE_RAPID_JSON); + char *end = rapidjson::internal::dtoa(val, double_to_string_buf_rapidjson, + rapidjson::Writer::kDefaultMaxDecimalPlaces); + *end = '\0'; + return end - double_to_string_buf_rapidjson; +} + +bool jsonutil_is_int64(const double a) { + int64_t a_l = static_cast(a); + double b = static_cast(a_l); + return (a <= b && a >= b); +} + +JsonUtilCode jsonutil_multiply_double(const double a, const double b, double *res) { + double c = a * b; + // check overflow + if (std::isinf(c)) return JSONUTIL_MULTIPLICATION_OVERFLOW; + *res = c; + return JSONUTIL_SUCCESS; +} + +JsonUtilCode jsonutil_multiply_int64(const int64_t a, const int64_t b, int64_t *res) { + if (a == 0 || b == 0) { + *res = 0; + return JSONUTIL_SUCCESS; + } + // Check overflow conditions without performing multiplication + if ((a > 0 && b > 0 && a > INT64_MAX / b) || // Positive * Positive overflow + (a > 0 && b < 0 && b < INT64_MIN / a) || // Positive * Negative overflow + (a < 0 && b > 0 && a < INT64_MIN / b) || // Negative * Positive overflow + (a < 0 && b < 0 && a < INT64_MAX / b)) { // Negative * Negative overflow + return JSONUTIL_MULTIPLICATION_OVERFLOW; + } + + // If no overflow, perform the multiplication + *res = a * b; + return JSONUTIL_SUCCESS; + return JSONUTIL_SUCCESS; +} + +JsonUtilCode jsonutil_add_double(const double a, const double b, double *res) { + double c = a + b; + // check overflow + if (std::isinf(c)) return JSONUTIL_ADDITION_OVERFLOW; + *res = c; + return JSONUTIL_SUCCESS; +} + +JsonUtilCode jsonutil_add_int64(const int64_t a, const int64_t b, int64_t *res) { + if (a >= 0) { + if (b > INT64_MAX - a) return JSONUTIL_ADDITION_OVERFLOW; + } else { + if (b < INT64_MIN - a) return JSONUTIL_ADDITION_OVERFLOW; + } + *res = a + b; + return JSONUTIL_SUCCESS; +} + +bool jsonutil_is_root_path(const char *json_path) { + return !strcmp(json_path, ".") || !strcmp(json_path, "$"); +} diff --git a/src/json/util.h b/src/json/util.h new file mode 100644 index 0000000..2f7117e --- /dev/null +++ b/src/json/util.h @@ -0,0 +1,122 @@ +/** + * This is the utility module, containing shared utility and helper code. + * + * Coding Conventions & Best Practices: + * 1. Every public interface method declared in this file should be prefixed with "jsonutil_". + * 2. Generally speaking, interface methods should not have Valkey module types such as ValkeyModuleCtx + * or ValkeyModuleString, because that would make unit tests hard to write, unless gmock classes + * have been developed. + */ +#ifndef VALKEYJSONMODULE_JSON_UTIL_H_ +#define VALKEYJSONMODULE_JSON_UTIL_H_ + +#include + +extern "C" { +#define VALKEYMODULE_EXPERIMENTAL_API +#include <./include/valkeymodule.h> +} + +typedef enum { + JSONUTIL_SUCCESS = 0, + JSONUTIL_WRONG_NUM_ARGS, + JSONUTIL_JSON_PARSE_ERROR, + JSONUTIL_NX_XX_CONDITION_NOT_SATISFIED, + JSONUTIL_NX_XX_SHOULD_BE_MUTUALLY_EXCLUSIVE, + JSONUTIL_INVALID_JSON_PATH, + JSONUTIL_INVALID_USE_OF_WILDCARD, + JSONUTIL_INVALID_MEMBER_NAME, + JSONUTIL_INVALID_NUMBER, + JSONUTIL_INVALID_IDENTIFIER, + JSONUTIL_INVALID_DOT_SEQUENCE, + JSONUTIL_EMPTY_EXPR_TOKEN, + JSONUTIL_ARRAY_INDEX_NOT_NUMBER, + JSONUTIL_STEP_CANNOT_NOT_BE_ZERO, + JSONUTIL_JSON_PATH_NOT_EXIST, + JSONUTIL_PARENT_ELEMENT_NOT_EXIST, + JSONUTIL_DOCUMENT_KEY_NOT_FOUND, + JSONUTIL_NOT_A_DOCUMENT_KEY, + JSONUTIL_FAILED_TO_DELETE_VALUE, + JSONUTIL_JSON_ELEMENT_NOT_NUMBER, + JSONUTIL_JSON_ELEMENT_NOT_BOOL, + JSONUTIL_JSON_ELEMENT_NOT_STRING, + JSONUTIL_JSON_ELEMENT_NOT_OBJECT, + JSONUTIL_JSON_ELEMENT_NOT_ARRAY, + JSONUTIL_VALUE_NOT_NUMBER, + JSONUTIL_VALUE_NOT_STRING, + JSONUTIL_VALUE_NOT_INTEGER, + JSONUTIL_PATH_SHOULD_BE_AT_THE_END, + JSONUTIL_COMMAND_SYNTAX_ERROR, + JSONUTIL_MULTIPLICATION_OVERFLOW, + JSONUTIL_ADDITION_OVERFLOW, + JSONUTIL_EMPTY_JSON_OBJECT, + JSONUTIL_EMPTY_JSON_ARRAY, + JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES, + JSONUTIL_UNKNOWN_SUBCOMMAND, + JSONUTIL_FAILED_TO_CREATE_THREAD_SPECIFIC_DATA_KEY, + JSONUTIL_DOCUMENT_SIZE_LIMIT_EXCEEDED, + JSONUTIL_DOCUMENT_PATH_LIMIT_EXCEEDED, + JSONUTIL_PARSER_RECURSION_DEPTH_LIMIT_EXCEEDED, + JSONUTIL_RECURSIVE_DESCENT_TOKEN_LIMIT_EXCEEDED, + JSONUTIL_QUERY_STRING_SIZE_LIMIT_EXCEEDED, + JSONUTIL_CANNOT_INSERT_MEMBER_INTO_NON_OBJECT_VALUE, + JSONUTIL_INVALID_RDB_FORMAT, + JSONUTIL_DOLLAR_CANNOT_APPLY_TO_NON_ROOT, + JSONUTIL_LAST +} JsonUtilCode; + +typedef struct { + const char *newline; + const char *space; + const char *indent; +} PrintFormat; + +/* Enums for buffer sizes used in conversion of double to json or double to rapidjson */ +enum { BUF_SIZE_DOUBLE_JSON = 32, BUF_SIZE_DOUBLE_RAPID_JSON = 25}; + +/* Get message for a given code. */ +const char *jsonutil_code_to_message(JsonUtilCode code); + +/* Convert a double value to string. This method is used to help serializing numbers to strings. + * Trailing zeros will be removed. For example, 135.250000 will be converted to string 135.25. + */ +size_t jsonutil_double_to_string(const double val, char *double_to_string_buf, size_t len); + +/** + * Convert double to string using the same format as RapidJSON's Writer::WriteDouble does. + */ +size_t jsonutil_double_to_string_rapidjson(const double val, char* double_to_string_buf_rapidjson, size_t len); + +/* Check if a double value is int64. + * If the given double does not equal an integer (int64), return false. + * If the given double is out of range of int64, return false. + */ +bool jsonutil_is_int64(const double a); + +/* Multiple two double numbers with overflow check. + * @param res - OUTPUT parameter, *res stores the result of multiplication. + * @return JSONUTIL_SUCCESS if successful, JSONUTIL_MULTIPLICATION_OVERFLOW if the result overflows. + */ +JsonUtilCode jsonutil_multiply_double(const double a, const double b, double *res); + +/* Multiple two int64 numbers with overflow check. + * @param res - OUTPUT parameter, *res stores the result of multiplication. + * @return JSONUTIL_SUCCESS if successful, JSONUTIL_MULTIPLICATION_OVERFLOW if the result overflows. + */ +JsonUtilCode jsonutil_multiply_int64(const int64_t a, const int64_t b, int64_t *res); + +/* Add two double numbers with overflow check. + * @param res - OUTPUT parameter, *res stores the result of addition. + * @return JSONUTIL_SUCCESS if successful, JSONUTIL_ADDITION_OVERFLOW if the result overflows. + */ +JsonUtilCode jsonutil_add_double(const double a, const double b, double *res); + +/* Add two int64 numbers with overflow check. + * @param res - OUTPUT parameter, *res stores the result of addition. + * @return JSONUTIL_SUCCESS if successful, JSONUTIL_ADDITION_OVERFLOW if the result overflows. + */ +JsonUtilCode jsonutil_add_int64(const int64_t a, const int64_t b, int64_t *res); + +bool jsonutil_is_root_path(const char *json_path); + +#endif // VALKEYJSONMODULE_JSON_UTIL_H_ diff --git a/src/rapidjson/CPPLINT.cfg b/src/rapidjson/CPPLINT.cfg new file mode 100644 index 0000000..51ff339 --- /dev/null +++ b/src/rapidjson/CPPLINT.cfg @@ -0,0 +1 @@ +exclude_files=.* diff --git a/src/rapidjson/README.md b/src/rapidjson/README.md new file mode 100644 index 0000000..b180b36 --- /dev/null +++ b/src/rapidjson/README.md @@ -0,0 +1,14 @@ +# RapidJSON Source Code +* The original RapidJSON Source Code is cloned at build time using CMAKELISTS +* Last commit on the master branch: 0d4517f15a8d7167ba9ae67f3f22a559ca841e3b, 2021-10-31 11:07:57 + +# Modifications +We made a few changes to the RapidJSON source code. Before the changes are pushed to the open source, +we have to include a private copy of the file. Modified RapidJSON code is under src/rapidjson. + +## document.h` +We need to modify RapidJSON's document.h to support JSON depth limit. + +### reader.h +Modified reader.h to only generate integers in int64 range. + diff --git a/src/rapidjson/document.h b/src/rapidjson/document.h new file mode 100644 index 0000000..84ad636 --- /dev/null +++ b/src/rapidjson/document.h @@ -0,0 +1,3599 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_DOCUMENT_H_ +#define RAPIDJSON_DOCUMENT_H_ + +/*! \file document.h */ +#ifndef RAPIDJSON_HAS_STDSTRING +#define RAPIDJSON_HAS_STDSTRING 1 +#endif + +#include "rapidjson/reader.h" +#include +#include +#include +#include +#include "json/keytable.h" +#include "json/dom.h" +#include "json/json.h" +#include "json/util.h" +#include // placement new +#include +#include +#ifdef __cpp_lib_three_way_comparison +#include +#endif + +extern size_t json_get_max_path_limit(); + +#include +#include +//#define trace(x) std::cerr << x << "\n" +#define trace(x) + +RAPIDJSON_DIAG_PUSH +#ifdef __clang__ +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +RAPIDJSON_DIAG_OFF(c++98-compat) +#elif defined(_MSC_VER) +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible loss of data +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_OFF(effc++) +#endif // __GNUC__ + +#ifdef GetObject +// see https://github.com/Tencent/rapidjson/issues/1448 +// a former included windows.h might have defined a macro called GetObject, which affects +// GetObject defined here. This ensures the macro does not get applied +#pragma push_macro("GetObject") +#define RAPIDJSON_WINDOWS_GETOBJECT_WORKAROUND_APPLIED +#undef GetObject +#endif + +#ifndef RAPIDJSON_NOMEMBERITERATORCLASS +#include // std::random_access_iterator_tag +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +// Forward declaration. +template +class GenericValue; + +template +class GenericDocument; + +/*! \def RAPIDJSON_DEFAULT_ALLOCATOR + \ingroup RAPIDJSON_CONFIG + \brief Allows to choose default allocator. + + User can define this to use CrtAllocator or MemoryPoolAllocator. +*/ +#ifndef RAPIDJSON_DEFAULT_ALLOCATOR +#define RAPIDJSON_DEFAULT_ALLOCATOR MemoryPoolAllocator +#endif + +/*! \def RAPIDJSON_DEFAULT_STACK_ALLOCATOR + \ingroup RAPIDJSON_CONFIG + \brief Allows to choose default stack allocator for Document. + + User can define this to use CrtAllocator or MemoryPoolAllocator. +*/ +#ifndef RAPIDJSON_DEFAULT_STACK_ALLOCATOR +#define RAPIDJSON_DEFAULT_STACK_ALLOCATOR CrtAllocator +#endif + +/*! \def RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY + \ingroup RAPIDJSON_CONFIG + \brief User defined kDefaultObjectCapacity value. + + User can define this as any natural number. +*/ +#ifndef RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY +// number of objects that rapidjson::Value allocates memory for by default +#define RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY 16 +#endif + +/*! \def RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY + \ingroup RAPIDJSON_CONFIG + \brief User defined kDefaultArrayCapacity value. + + User can define this as any natural number. +*/ +#ifndef RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY +// number of array elements that rapidjson::Value allocates memory for by default +#define RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY 16 +#endif + +struct HashTableFactors { + enum { MIN_HT_SIZE = 4 }; // Minimum size of a hash table + float minLoad = 0.25; // Keep table at least 1/2 full (after shrink) + float maxLoad = 0.85; // But no more than 85% full + float shrink = 0.5; // reduce by 50% + float grow = 1.0; // grow by 100% + size_t minHTSize = 32; // Minimum size for hashtable + const char *isValid() const { + if (minHTSize < 2) return "minHTSize < 2"; + if (minLoad <= 0) return "minLoad <= 0.0"; + if (maxLoad > 1.0f) return "maxLoad > 1.0"; + if (minLoad >= maxLoad) return "minLoad >= maxLoad"; + if (grow <= 0) return "Grow <= 0.0"; + if (shrink <= 0) return "Shrink <= 0.0"; + // + // The shrink factor requires additional validation because we want to make sure that + // rehash down will always succeed, i.e., you can't shrink TOO much or you're toast. + // (because it won't fit ;-)) + // + if (shrink > (1.0f - minLoad)) return "Shrink too large"; + return nullptr; // We're good !!! + } // Are values valid? Yes => empty(), No => error message +}; +extern HashTableFactors hashTableFactors; + +// +// Stats collected +// +struct HashTableStats { + std::atomic rehashUp; // Number of times we increased the hashtable size + std::atomic rehashDown; // Number of times we decreased the hashtable size + std::atomic convertToHT; // Number of times a vector was converted to a hashtable + std::atomic reserveHT; // Number of times a "reserved" yielded a hashtable + void reset() { + rehashUp = 0; + rehashDown = 0; + convertToHT = 0; + reserveHT = 0; + } +}; + +extern HashTableStats hashTableStats; + +//! Name-value pair in a JSON object value. +/*! + This class was internal to GenericValue. It used to be a inner struct. + But a compiler (IBM XL C/C++ for AIX) have reported to have problem with that so it moved as a namespace scope struct. + https://code.google.com/p/rapidjson/issues/detail?id=64 +*/ +template +class GenericMember { +public: + KeyTable_Handle name; //!< name of member (must be a string) + GenericValue value; //!< value of member. + + ~GenericMember() { + keyTable->destroyHandle(name); + } + +private: + //! Copy constructor is not permitted. + GenericMember(const GenericMember& rhs) = delete; + GenericMember& operator=(GenericMember& rhs) = delete; + GenericMember(GenericMember&& rhs) = delete; + GenericMember& operator=(GenericMember&& rhs) = delete; +}; + +// +// When in a hashtable, this is the correct format for a member. +// +template +class GenericMemberHT : public GenericMember { +public: + SizeType prev; + SizeType next; +private: + //! Copy constructor is not permitted. + GenericMemberHT(const GenericMemberHT& rhs) = delete; + GenericMemberHT& operator=(GenericMemberHT& rhs) = delete; + GenericMemberHT(GenericMemberHT&& rhs) = delete; + GenericMemberHT& operator=(GenericMemberHT&& rhs) = delete; +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericMemberIterator + +//! (Constant) member iterator for a JSON object value +/*! + \tparam Const Is this a constant iterator? + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. + + This class implements a Sequential Access Iterator for GenericMember elements + of a GenericValue, see ISO/IEC 14882:2003(E) C++ standard, 24.1 [lib.iterator.requirements]. + + \note This iterator implementation is mainly intended to avoid implicit + conversions from iterator values to \c NULL, + e.g. from GenericValue::FindMember. + + \note Define \c RAPIDJSON_NOMEMBERITERATORCLASS to fall back to a + pointer-based implementation, if your platform doesn't provide + the C++ header. + + \see GenericMember, GenericValue::MemberIterator, GenericValue::ConstMemberIterator + + // + // This code layers over the "dual-implementation" of the object members + // + */ +template +class GenericMemberIterator { + + friend class GenericValue; + template friend class GenericMemberIterator; + + typedef GenericMember PlainType; + typedef GenericMemberHT PlainTypeHT; + typedef typename internal::MaybeAddConst::Type ValueType; + typedef typename internal::MaybeAddConst::Type ValueTypeHT; + + typedef GenericValue NodeType; + +public: + //! Iterator type itself + typedef GenericMemberIterator Iterator; + //! Constant iterator type + typedef GenericMemberIterator ConstIterator; + //! Non-constant iterator type + typedef GenericMemberIterator NonConstIterator; + + /** \name std::iterator_traits support */ + //@{ + typedef ValueType value_type; + typedef ValueType * pointer; + typedef ValueType & reference; + typedef std::bidirectional_iterator_tag iterator_category; + //@} + + //! Pointer to (const) GenericMember + typedef pointer Pointer; + //! Reference to (const) GenericMember + typedef reference Reference; + + //! Default constructor (singular value) + /*! Creates an iterator pointing to no element. + \note All operations, except for comparisons, are undefined on such values. + */ + GenericMemberIterator() : optr(nullptr), index(0) {} + + //! Iterator conversions to more const + /*! + \param it (Non-const) iterator to copy from + + Allows the creation of an iterator from another GenericMemberIterator + that is "less const". Especially, creating a non-constant iterator + from a constant iterator are disabled: + \li const -> non-const (not ok) + \li const -> const (ok) + \li non-const -> const (ok) + \li non-const -> non-const (ok) + + \note If the \c Const template parameter is already \c false, this + constructor effectively defines a regular copy-constructor. + Otherwise, the copy constructor is implicitly defined. + */ + GenericMemberIterator(const NonConstIterator & it) : optr(it.optr), index(it.index) {} + Iterator& operator=(const NonConstIterator & it) { optr = it.optr; index = it.index; return *this; } + + //! @name stepping + //@{ + Iterator& operator++(){ Next(); return *this; } + Iterator& operator--(){ Prev(); return *this; } + Iterator operator++(int){ Iterator old(*this); Next(); return old; } + Iterator operator--(int){ Iterator old(*this); Prev(); return old; } + //@} + + + //! @name relations + //@{ + template bool operator==(const GenericMemberIterator& that) const + { return optr == that.optr && index == that.index; } + template bool operator!=(const GenericMemberIterator& that) const + { return optr != that.optr || index != that.index; } + + //@} + + //! @name dereference + //@{ + Reference operator*() const { return at(); } + Pointer operator->() const { return &at(); } + //@} + + friend std::ostream& operator<<(std::ostream& os, GenericMemberIterator it) { + return os << " Itr:" << it.index; + } + + // + // For accurate size accounting, clients want to know how large the overhead for a "Member" is. + // Here we report the "Right" value for that, depending on the current type + // + SizeType NodeSize() const { return isHashTable() ? sizeof(PlainTypeHT) : sizeof(PlainType); } + +private: + //! Internal constructor from plain pointer + explicit GenericMemberIterator(const NodeType *_optr, SizeType ix) : optr(_optr), index(ix) {} + + const NodeType *optr; // Points to the GenericValue Node that contains the object + SizeType index; // Offset within that vector + + ValueType& atVec() const { return optr->GetMembersPointerVec()[index]; } + ValueTypeHT& atHT() const { return optr->GetMembersPointerHT()[index]; } + bool isHashTable() const { return optr->IsObjectHT(); } + ValueType& at() const { return isHashTable() ? atHT() : atVec(); } + void Next() { index = isHashTable() ? atHT().next : index+1; } + void Prev() { index = isHashTable() ? atHT().prev : index-1; } +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericStringRef + +//! Reference to a constant string (not taking a copy) +/*! + \tparam CharType character type of the string + + This helper class is used to automatically infer constant string + references for string literals, especially from \c const \b (!) + character arrays. + + The main use is for creating JSON string values without copying the + source string via an \ref Allocator. This requires that the referenced + string pointers have a sufficient lifetime, which exceeds the lifetime + of the associated GenericValue. + + \b Example + \code + Value v("foo"); // ok, no need to copy & calculate length + const char foo[] = "foo"; + v.SetString(foo); // ok + + const char* bar = foo; + // Value x(bar); // not ok, can't rely on bar's lifetime + Value x(StringRef(bar)); // lifetime explicitly guaranteed by user + Value y(StringRef(bar, 3)); // ok, explicitly pass length + \endcode + + \see StringRef, GenericValue::SetString +*/ +template +struct GenericStringRef { + typedef CharType Ch; //!< character type of the string + + //! Create string reference from \c const character array +#ifndef __clang__ // -Wdocumentation + /*! + This constructor implicitly creates a constant string reference from + a \c const character array. It has better performance than + \ref StringRef(const CharType*) by inferring the string \ref length + from the array length, and also supports strings containing null + characters. + + \tparam N length of the string, automatically inferred + + \param str Constant character array, lifetime assumed to be longer + than the use of the string in e.g. a GenericValue + + \post \ref s == str + + \note Constant complexity. + \note There is a hidden, private overload to disallow references to + non-const character arrays to be created via this constructor. + By this, e.g. function-scope arrays used to be filled via + \c snprintf are excluded from consideration. + In such cases, the referenced string should be \b copied to the + GenericValue instead. + */ +#endif + template + GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT + : s(str), length(N-1) {} + + //! Explicitly create string reference from \c const character pointer +#ifndef __clang__ // -Wdocumentation + /*! + This constructor can be used to \b explicitly create a reference to + a constant string pointer. + + \see StringRef(const CharType*) + + \param str Constant character pointer, lifetime assumed to be longer + than the use of the string in e.g. a GenericValue + + \post \ref s == str + + \note There is a hidden, private overload to disallow references to + non-const character arrays to be created via this constructor. + By this, e.g. function-scope arrays used to be filled via + \c snprintf are excluded from consideration. + In such cases, the referenced string should be \b copied to the + GenericValue instead. + */ +#endif + explicit GenericStringRef(const CharType* str) + : s(str), length(NotNullStrLen(str)) {} + + //! Create constant string reference from pointer and length +#ifndef __clang__ // -Wdocumentation + /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \param len length of the string, excluding the trailing NULL terminator + + \post \ref s == str && \ref length == len + \note Constant complexity. + */ +#endif + GenericStringRef(const CharType* str, SizeType len) + : s(RAPIDJSON_LIKELY(str) ? str : emptyString), length(len) { RAPIDJSON_ASSERT(str != 0 || len == 0u); } + + GenericStringRef(const GenericStringRef& rhs) : s(rhs.s), length(rhs.length) {} + + //! implicit conversion to plain CharType pointer + operator const Ch *() const { return s; } + + const Ch* const s; //!< plain CharType pointer + const SizeType length; //!< length of the string (excluding the trailing NULL terminator) + +private: + SizeType NotNullStrLen(const CharType* str) { + RAPIDJSON_ASSERT(str != 0); + return internal::StrLen(str); + } + + /// Empty string - used when passing in a NULL pointer + static const Ch emptyString[]; + + //! Disallow construction from non-const array + template + GenericStringRef(CharType (&str)[N]) /* = delete */; + //! Copy assignment operator not permitted - immutable type + GenericStringRef& operator=(const GenericStringRef& rhs) /* = delete */; +}; + +template +const CharType GenericStringRef::emptyString[] = { CharType() }; + +//! Mark a character pointer as constant string +/*! Mark a plain character pointer as a "string literal". This function + can be used to avoid copying a character string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + \tparam CharType Character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \return GenericStringRef string reference object + \relatesalso GenericStringRef + + \see GenericValue::GenericValue(StringRefType), GenericValue::operator=(StringRefType), GenericValue::SetString(StringRefType), GenericValue::PushBack(StringRefType, Allocator&), GenericValue::AddMember +*/ +template +inline GenericStringRef StringRef(const CharType* str) { + return GenericStringRef(str); +} + +//! Mark a character pointer as constant string +/*! Mark a plain character pointer as a "string literal". This function + can be used to avoid copying a character string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + + This version has better performance with supplied length, and also + supports string containing null characters. + + \tparam CharType character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \param length The length of source string. + \return GenericStringRef string reference object + \relatesalso GenericStringRef +*/ +template +inline GenericStringRef StringRef(const CharType* str, size_t length) { + return GenericStringRef(str, SizeType(length)); +} + +#if RAPIDJSON_HAS_STDSTRING +//! Mark a string object as constant string +/*! Mark a string object (e.g. \c std::string) as a "string literal". + This function can be used to avoid copying a string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + + \tparam CharType character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \return GenericStringRef string reference object + \relatesalso GenericStringRef + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. +*/ +template +inline GenericStringRef StringRef(const std::basic_string& str) { + return GenericStringRef(str.data(), SizeType(str.size())); +} +#endif + +template +inline GenericStringRef StringRef(const std::basic_string_view& str) { + return GenericStringRef(str.data(), SizeType(str.size())); +} + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue type traits +namespace internal { + +template +struct IsGenericValueImpl : FalseType {}; + +// select candidates according to nested encoding and allocator types +template struct IsGenericValueImpl::Type, typename Void::Type> + : IsBaseOf, T>::Type {}; + +// helper to match arbitrary GenericValue instantiations, including derived classes +template struct IsGenericValue : IsGenericValueImpl::Type {}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// TypeHelper + +namespace internal { + +template +struct TypeHelper {}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsBool(); } + static bool Get(const ValueType& v) { return v.GetBool(); } + static ValueType& Set(ValueType& v, bool data) { return v.SetBool(data); } + static ValueType& Set(ValueType& v, bool data, typename ValueType::AllocatorType&) { return v.SetBool(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsInt(); } + static int Get(const ValueType& v) { return v.GetInt(); } + static ValueType& Set(ValueType& v, int data) { return v.SetInt(data); } + static ValueType& Set(ValueType& v, int data, typename ValueType::AllocatorType&) { return v.SetInt(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsUint(); } + static unsigned Get(const ValueType& v) { return v.GetUint(); } + static ValueType& Set(ValueType& v, unsigned data) { return v.SetUint(data); } + static ValueType& Set(ValueType& v, unsigned data, typename ValueType::AllocatorType&) { return v.SetUint(data); } +}; + +#ifdef _MSC_VER +RAPIDJSON_STATIC_ASSERT(sizeof(long) == sizeof(int)); +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsInt(); } + static long Get(const ValueType& v) { return v.GetInt(); } + static ValueType& Set(ValueType& v, long data) { return v.SetInt(data); } + static ValueType& Set(ValueType& v, long data, typename ValueType::AllocatorType&) { return v.SetInt(data); } +}; + +RAPIDJSON_STATIC_ASSERT(sizeof(unsigned long) == sizeof(unsigned)); +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsUint(); } + static unsigned long Get(const ValueType& v) { return v.GetUint(); } + static ValueType& Set(ValueType& v, unsigned long data) { return v.SetUint(data); } + static ValueType& Set(ValueType& v, unsigned long data, typename ValueType::AllocatorType&) { return v.SetUint(data); } +}; +#endif + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsInt64(); } + static int64_t Get(const ValueType& v) { return v.GetInt64(); } + static ValueType& Set(ValueType& v, int64_t data) { return v.SetInt64(data); } + static ValueType& Set(ValueType& v, int64_t data, typename ValueType::AllocatorType&) { return v.SetInt64(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsUint64(); } + static uint64_t Get(const ValueType& v) { return v.GetUint64(); } + static ValueType& Set(ValueType& v, uint64_t data) { return v.SetUint64(data); } + static ValueType& Set(ValueType& v, uint64_t data, typename ValueType::AllocatorType&) { return v.SetUint64(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsDouble(); } + static double Get(const ValueType& v) { return v.GetDouble(); } + static ValueType& Set(ValueType& v, double data) { return v.SetDouble(data); } + static ValueType& Set(ValueType& v, double data, typename ValueType::AllocatorType&) { return v.SetDouble(data); } +}; + +template +struct TypeHelper { + typedef const typename ValueType::Ch* StringType; + static bool Is(const ValueType& v) { return v.IsString(); } + static StringType Get(const ValueType& v) { return v.GetString(); } + static ValueType& Set(ValueType& v, const StringType data) { return v.SetString(typename ValueType::StringRefType(data)); } + static ValueType& Set(ValueType& v, const StringType data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); } +}; + +#if RAPIDJSON_HAS_STDSTRING +template +struct TypeHelper > { + typedef std::basic_string StringType; + static bool Is(const ValueType& v) { return v.IsString(); } + static StringType Get(const ValueType& v) { return StringType(v.GetString(), v.GetStringLength()); } + static ValueType& Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); } +}; +#endif + +template +struct TypeHelper > { + typedef std::basic_string_view StringType; + static bool Is(const ValueType& v) { return v.IsString(); } + static StringType Get(const ValueType& v) { return StringType(v.GetString(), v.GetStringLength()); } + static ValueType& Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); } +}; + +template +struct TypeHelper { + typedef typename ValueType::Array ArrayType; + static bool Is(const ValueType& v) { return v.IsArray(); } + static ArrayType Get(ValueType& v) { return v.GetArray(); } + static ValueType& Set(ValueType& v, ArrayType data) { return v = data; } + static ValueType& Set(ValueType& v, ArrayType data, typename ValueType::AllocatorType&) { return v = data; } +}; + +template +struct TypeHelper { + typedef typename ValueType::ConstArray ArrayType; + static bool Is(const ValueType& v) { return v.IsArray(); } + static ArrayType Get(const ValueType& v) { return v.GetArray(); } +}; + +template +struct TypeHelper { + typedef typename ValueType::Object ObjectType; + static bool Is(const ValueType& v) { return v.IsObject(); } + static ObjectType Get(ValueType& v) { return v.GetObject(); } + static ValueType& Set(ValueType& v, ObjectType data) { return v = data; } + static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { return v = data; } +}; + +template +struct TypeHelper { + typedef typename ValueType::ConstObject ObjectType; + static bool Is(const ValueType& v) { return v.IsObject(); } + static ObjectType Get(const ValueType& v) { return v.GetObject(); } +}; + +} // namespace internal + +// Forward declarations +template class GenericArray; +template class GenericObject; + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue + +//! Represents a JSON value. Use Value for UTF8 encoding and default allocator. +/*! + A JSON value can be one of 7 types. This class is a variant type supporting + these types. + + Use the Value if UTF8 and default allocator + + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. +*/ +template +class GenericValue { +public: + //! Name-value pair in an object. + typedef GenericMember Member; + typedef GenericMemberHT MemberHT; + typedef Encoding EncodingType; //!< Encoding type from template parameter. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericStringRef StringRefType; //!< Reference to a constant string + typedef typename GenericMemberIterator::Iterator MemberIterator; //!< Member iterator for iterating in object. + typedef typename GenericMemberIterator::Iterator ConstMemberIterator; //!< Constant member iterator for iterating in object. + typedef GenericValue* ValueIterator; //!< Value iterator for iterating in array. + typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array. + typedef GenericValue ValueType; //!< Value type of itself. + typedef GenericArray Array; + typedef GenericArray ConstArray; + typedef GenericObject Object; + typedef GenericObject ConstObject; + + //!@name Constructors and destructor. + //@{ + + //! Default constructor creates a null value. + GenericValue() RAPIDJSON_NOEXCEPT : data_() { + data_.f.flags = kNullFlag; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_) { + rhs.data_.f.flags = kNullFlag; // give up contents + } +#endif + +private: + //! Copy constructor is not permitted. + GenericValue(const GenericValue& rhs); + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Moving from a GenericDocument is not permitted. + template + GenericValue(GenericDocument&& rhs); + + //! Move assignment from a GenericDocument is not permitted. + template + GenericValue& operator=(GenericDocument&& rhs); +#endif + +public: + + //! Constructor with JSON value type. + /*! This creates a Value of specified type with default content. + \param type Type of the value. + \note Default content for number is zero. + */ + explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_() { + static const uint16_t defaultFlags[] = { + kNullFlag, kFalseFlag, kTrueFlag, kObjectVecFlag, kArrayFlag, kShortStringFlag, + kNumberAnyFlag + }; + RAPIDJSON_NOEXCEPT_ASSERT(type >= kNullType && type <= kNumberType); + data_.f.flags = defaultFlags[type]; + SetNoescape(false); + SetMarked(false); + } + + //! Explicit copy constructor (with allocator) + /*! Creates a copy of a Value by using the given Allocator + \tparam SourceAllocator allocator of \c rhs + \param rhs Value to copy from (read-only) + \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator(). + \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer) + \see CopyFrom() + */ + + template + GenericValue(const GenericValue& rhs, Allocator& allocator, bool copyConstStrings = false) { + switch (rhs.GetType()) { + case kObjectType: + DoCopyMembers(rhs, allocator, copyConstStrings); + break; + case kArrayType: { + SizeType count = rhs.data_.a.size; + GenericValue* le = reinterpret_cast(allocator.Malloc(count * sizeof(GenericValue))); + const GenericValue* re = rhs.GetElementsPointer(); + for (SizeType i = 0; i < count; i++) + new (&le[i]) GenericValue(re[i], allocator, copyConstStrings); + data_.f.flags = kArrayFlag; + data_.a.size = data_.a.capacity = count; + SetElementsPointer(le); + } + break; + case kStringType: + if (rhs.data_.f.flags == kConstStringFlag && !copyConstStrings) { + data_.f.flags = rhs.data_.f.flags; + data_ = *reinterpret_cast(&rhs.data_); + } + else + SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator); + SetNoescape(rhs.IsNoescape()); + break; + case kNumberType: + if ((rhs.data_.f.flags & kDoubleFlag) != 0) { + SetStringRaw(StringRef(rhs.GetDoubleString(), rhs.GetDoubleStringLength()), allocator, false, true); + RAPIDJSON_ASSERT(rhs.IsNoescape()); + SetNoescape(true); + } else { + data_.f.flags = rhs.data_.f.flags; + data_ = *reinterpret_cast(&rhs.data_); + } + break; + case kTrueType: + case kFalseType: + data_.f.flags = rhs.data_.f.flags; + data_ = *reinterpret_cast(&rhs.data_); + break; + default: + data_.f.flags = rhs.data_.f.flags; + data_ = *reinterpret_cast(&rhs.data_); + break; + } + } + + //! Constructor for boolean value. + /*! \param b Boolean value + \note This constructor is limited to \em real boolean values and rejects + implicitly converted types like arbitrary pointers. Use an explicit cast + to \c bool, if you want to construct a boolean JSON value in such cases. + */ +#ifndef RAPIDJSON_DOXYGEN_RUNNING // hide SFINAE from Doxygen + template + explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame))) RAPIDJSON_NOEXCEPT // See #472 +#else + explicit GenericValue(bool b) RAPIDJSON_NOEXCEPT +#endif + : data_() { + // safe-guard against failing SFINAE + RAPIDJSON_STATIC_ASSERT((internal::IsSame::Value)); + data_.f.flags = b ? kTrueFlag : kFalseFlag; + } + + //! Constructor for int value. + explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_() { + data_.n.i64 = i; + data_.f.flags = (i >= 0) ? (kNumberIntFlag | kUintFlag | kUint64Flag) : kNumberIntFlag; + } + + //! Constructor for unsigned value. + explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_() { + data_.n.u64 = u; + data_.f.flags = (u & 0x80000000) ? kNumberUintFlag : (kNumberUintFlag | kIntFlag | kInt64Flag); + } + + //! Constructor for int64_t value. + explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_() { + data_.n.i64 = i64; + data_.f.flags = kNumberInt64Flag; + if (i64 >= 0) { + data_.f.flags |= kNumberUint64Flag; + if (!(static_cast(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000))) + data_.f.flags |= kUintFlag; + if (!(static_cast(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + else if (i64 >= static_cast(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + + //! Constructor for uint64_t value. + explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_() { + data_.n.u64 = u64; + data_.f.flags = kNumberUint64Flag; + if (!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000))) + data_.f.flags |= kInt64Flag; + if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000))) + data_.f.flags |= kUintFlag; + if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + + //! Constructor for constant string (i.e. do not make a copy of string) + GenericValue(const Ch* s, SizeType length, bool noescape = false) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(StringRef(s, length), noescape); } + + //! Constructor for constant string (i.e. do not make a copy of string) + explicit GenericValue(StringRefType s, bool noescape = false) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(s, noescape); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch* s, SizeType length, Allocator& allocator, bool noescape = false, bool isdouble = false) : data_() { + SetStringRaw(StringRef(s, length), allocator, noescape, isdouble); + } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch*s, Allocator& allocator) : data_() { + StringRefType srt = StringRef(s); + SetStringRaw(srt, allocator, false); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Constructor for copy-string from a string object (i.e. do make a copy of string) + /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + GenericValue(const std::basic_string& s, Allocator& allocator) : data_() { + SetStringRaw(StringRef(s), allocator, false); + } +#endif + GenericValue(const std::basic_string_view& s, Allocator& allocator) : data_() { + SetStringRaw(StringRef(s), allocator, false); + } + + /* + * Give KeyTable_Handle to this Value + */ + GenericValue(KeyTable_Handle& h) : data_() { + SetHandleRaw(h); + } + + //! Constructor for Array. + /*! + \param a An array obtained by \c GetArray(). + \note \c Array is always pass-by-value. + \note the source array is moved into this value and the sourec array becomes empty. + */ + GenericValue(Array a) RAPIDJSON_NOEXCEPT : data_(a.value_.data_) { + a.value_.data_ = Data(); + a.value_.data_.f.flags = kArrayFlag; + } + + //! Constructor for Object. + /*! + \param o An object obtained by \c GetObject(). + \note \c Object is always pass-by-value. + \note the source object is moved into this value and the source object becomes empty. + */ + GenericValue(Object o) RAPIDJSON_NOEXCEPT : data_(o.value_.data_) { + o.value_.data_ = Data(); + o.value_.data_.f.flags = kObjectVecFlag; + } + + //! Destructor. + /*! Need to destruct elements of array, members of object, or copy-string. + */ + ~GenericValue() { + // With RAPIDJSON_USE_MEMBERSMAP, the maps need to be destroyed to release + // their Allocator if it's refcounted (e.g. MemoryPoolAllocator). + if (Allocator::kNeedFree || (RAPIDJSON_USE_MEMBERSMAP+0 && + internal::IsRefCounted::Value)) { + switch(data_.f.flags) { + + case kArrayFlag: + { + GenericValue* e = GetElementsPointer(); + for (GenericValue* v = e; v != e + data_.a.size; ++v) + v->~GenericValue(); + if (Allocator::kNeedFree) { // Shortcut by Allocator's trait + Allocator::Free(e); + } + } + break; + + case kObjectVecFlag: + DoFreeMembersVec(); + break; + + case kObjectHTFlag: + DoFreeMembersHT(); + break; + + case kStringFlag: + case kShortStringFlag: + break; + + case kCopyStringFlag: + case kNumberDoubleFlag: + if (Allocator::kNeedFree) { // Shortcut by Allocator's trait + Allocator::Free(const_cast(GetStringPointer())); + } + break; + + case kHandleFlag: + keyTable->destroyHandle(*reinterpret_cast(&data_.h.handle)); + break; + + default: + break; // Do nothing for other types. + } + } + } + + //@} + + //!@name Assignment operators + //@{ + + //! Assignment with move semantics. + /*! \param rhs Source of the assignment. It will become a null value after assignment. + */ + GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT { + if (RAPIDJSON_LIKELY(this != &rhs)) { + // Can't destroy "this" before assigning "rhs", otherwise "rhs" + // could be used after free if it's an sub-Value of "this", + // hence the temporary danse. + GenericValue temp; + temp.RawAssign(rhs, false); // valid + this->~GenericValue(); + // We increment null for the following reason: + // // 1. The previous value of "this" is destroyed with counter(s) decremented in the destructor + // // 2. The value of RHS is moved to this. + // // 3. RHS is replaced by a newly created NULL without a constructor. + RawAssign(temp, true); + } + return *this; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move assignment in C++11 + GenericValue& operator=(GenericValue&& rhs) RAPIDJSON_NOEXCEPT { + return *this = rhs.Move(); + } +#endif + + //! Assignment of constant string reference (no copy) + /*! \param str Constant string reference to be assigned + \note This overload is needed to avoid clashes with the generic primitive type assignment overload below. + \see GenericStringRef, operator=(T) + */ + GenericValue& operator=(StringRefType str) RAPIDJSON_NOEXCEPT { + GenericValue s(str); + return *this = s; + } + + //! Assignment with primitive types. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param value The value to be assigned. + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref SetString(const Ch*, Allocator&) (for copying) or + \ref StringRef() (to explicitly mark the pointer as constant) instead. + All other pointer types would implicitly convert to \c bool, + use \ref SetBool() instead. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::IsPointer), (GenericValue&)) + operator=(T value) { + GenericValue v(value); + return *this = v; + } + + //! Deep-copy assignment from Value + /*! Assigns a \b copy of the Value to the current Value object + \tparam SourceAllocator Allocator type of \c rhs + \param rhs Value to copy from (read-only) + \param allocator Allocator to use for copying + \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer) + */ + template + GenericValue& CopyFrom(const GenericValue& rhs, Allocator& allocator, bool copyConstStrings = false) { + RAPIDJSON_ASSERT(static_cast(this) != static_cast(&rhs)); + this->~GenericValue(); + new (this) GenericValue(rhs, allocator, copyConstStrings); + return *this; + } + + //! Exchange the contents of this value with those of other. + /*! + \param other Another value. + \note Constant complexity. + */ + GenericValue& Swap(GenericValue& other) RAPIDJSON_NOEXCEPT { + GenericValue temp; + temp.RawAssign(*this, false); + RawAssign(other, false); + other.RawAssign(temp, false); + return *this; + } + + //! free-standing swap function helper + /*! + Helper function to enable support for common swap implementation pattern based on \c std::swap: + \code + void swap(MyClass& a, MyClass& b) { + using std::swap; + swap(a.value, b.value); + // ... + } + \endcode + \see Swap() + */ + friend inline void swap(GenericValue& a, GenericValue& b) RAPIDJSON_NOEXCEPT { a.Swap(b); } + + //! Prepare Value for move semantics + /*! \return *this */ + GenericValue& Move() RAPIDJSON_NOEXCEPT { return *this; } + //@} + + //!@name Equal-to and not-equal-to operators + //@{ + //! Equal-to operator + /*! + \note If an object contains duplicated named member, comparing equality with any object is always \c false. + \note Complexity is quadratic in Object's member number and linear for the rest (number of all values in the subtree and total lengths of all strings). + */ + template + bool operator==(const GenericValue& rhs) const { + typedef GenericValue RhsType; + if (GetType() != rhs.GetType()) + return false; + + switch (GetType()) { + case kObjectType: // Warning: O(n^2) inner-loop + if (data_.o.size != rhs.data_.o.size) + return false; + for (ConstMemberIterator lhsMemberItr = MemberBegin(); lhsMemberItr != MemberEnd(); ++lhsMemberItr) { + typename RhsType::ConstMemberIterator rhsMemberItr = rhs.FindMember(lhsMemberItr->name.GetStringView()); + if (rhsMemberItr == rhs.MemberEnd() || lhsMemberItr->value != rhsMemberItr->value) + return false; + } + return true; + + case kArrayType: + if (data_.a.size != rhs.data_.a.size) + return false; + for (SizeType i = 0; i < data_.a.size; i++) + if ((*this)[i] != rhs[i]) + return false; + return true; + + case kStringType: + return StringEqual(rhs); + + case kNumberType: + if (IsDouble() || rhs.IsDouble()) { + double a = GetDouble(); // May convert from integer to double. + double b = rhs.GetDouble(); // Ditto + return a >= b && a <= b; // Prevent -Wfloat-equal + } + else + return data_.n.u64 == rhs.data_.n.u64; + + default: + return true; + } + } + + //! Equal-to operator with const C-string pointer + bool operator==(const Ch* rhs) const { return *this == GenericValue(StringRef(rhs)); } + +#if RAPIDJSON_HAS_STDSTRING + //! Equal-to operator with string object + /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + bool operator==(const std::basic_string& rhs) const { return *this == GenericValue(StringRef(rhs)); } +#endif + bool operator==(const std::basic_string_view& rhs) const { return *this == GenericValue(StringRef(rhs)); } + + //! Equal-to operator with primitive types + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c true, \c false + */ + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr,internal::IsGenericValue >), (bool)) operator==(const T& rhs) const { return *this == GenericValue(rhs); } + + //! Not-equal-to operator + /*! \return !(*this == rhs) + */ + template + bool operator!=(const GenericValue& rhs) const { return !(*this == rhs); } + + //! Not-equal-to operator with const C-string pointer + bool operator!=(const Ch* rhs) const { return !(*this == rhs); } + + //! Not-equal-to operator with arbitrary types + /*! \return !(*this == rhs) + */ + template RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator!=(const T& rhs) const { return !(*this == rhs); } + +#ifndef __cpp_lib_three_way_comparison + //! Equal-to operator with arbitrary types (symmetric version) + /*! \return (rhs == lhs) + */ + template friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator==(const T& lhs, const GenericValue& rhs) { return rhs == lhs; } + + //! Not-Equal-to operator with arbitrary types (symmetric version) + /*! \return !(rhs == lhs) + */ + template friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator!=(const T& lhs, const GenericValue& rhs) { return !(rhs == lhs); } + //@} +#endif + + //!@name Type + //@{ + + Type GetType() const { return static_cast(data_.f.flags & kTypeMask); } + bool IsNull() const { return data_.f.flags == kNullFlag; } + bool IsFalse() const { return data_.f.flags == kFalseFlag; } + bool IsTrue() const { return data_.f.flags == kTrueFlag; } + bool IsBool() const { return (data_.f.flags & kBoolFlag) != 0; } + bool IsObject() const { return GetType() == kObjectType; } + bool IsObjectHT() const { return (data_.f.flags & kHashTableFlag) != 0; } + bool IsArray() const { return data_.f.flags == kArrayFlag; } + bool IsNumber() const { return (data_.f.flags & kNumberFlag) != 0; } + bool IsInt() const { return (data_.f.flags & kIntFlag) != 0; } + bool IsUint() const { return (data_.f.flags & kUintFlag) != 0; } + bool IsInt64() const { return (data_.f.flags & kInt64Flag) != 0; } + bool IsUint64() const { return (data_.f.flags & kUint64Flag) != 0; } + bool IsDouble() const { return (data_.f.flags & kDoubleFlag) != 0; } + bool IsString() const { return (data_.f.flags & kStringFlag) != 0; } + bool IsShortString() const { return (data_.f.flags & kShortStringFlag) != 0;} + bool IsShortDouble() const { return (data_.f.flags & kNumberShortDoubleFlag) != 0;} + bool IsHandle() const { return data_.f.flags == kHandleFlag; } + + // Checks whether a number can be losslessly converted to a double. + bool IsLosslessDouble() const { + if (!IsNumber()) return false; + if (IsUint64()) { + uint64_t u = GetUint64(); + volatile double d = static_cast(u); + return (d >= 0.0) + && (d < static_cast((std::numeric_limits::max)())) + && (u == static_cast(d)); + } + if (IsInt64()) { + int64_t i = GetInt64(); + volatile double d = static_cast(i); + return (d >= static_cast((std::numeric_limits::min)())) + && (d < static_cast((std::numeric_limits::max)())) + && (i == static_cast(d)); + } + return true; // double, int, uint are always lossless + } + + //@} + + //!@name Null + //@{ + + GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; } + + //@} + + //!@name Bool + //@{ + + bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return data_.f.flags == kTrueFlag; } + //!< Set boolean value + /*! \post IsBool() == true */ + GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; } + + //@} + + //!@name Object + //@{ + + //! Set this value as an empty object. + /*! \post IsObject() == true */ + GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; } + + //! Get the number of members in the object. + SizeType MemberCount() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size; } + + //! Get the capacity of object. + SizeType MemberCapacity() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.capacity; } + + //! Check whether the object is empty. + bool ObjectEmpty() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size == 0; } + + //! Get a value from an object associated with the name. + /*! \pre IsObject() == true + \tparam T Either \c Ch or \c const \c Ch (template used for disambiguation with \ref operator[](SizeType)) + \note In version 0.1x, if the member is not found, this function returns a null value. This makes issue 7. + Since 0.2, if the name is not correct, it will assert. + If user is unsure whether a member exists, user should use HasMember() first. + A better approach is to use FindMember(). + \note Linear time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr::Type, Ch> >),(GenericValue&)) operator[](T* name) { + GenericValue n(StringRef(name)); + return (*this)[n]; + } + template + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr::Type, Ch> >),(const GenericValue&)) operator[](T* name) const { return const_cast(*this)[name]; } + + //! Get a value from an object associated with the name. + /*! \pre IsObject() == true + \tparam SourceAllocator Allocator of the \c name value + + \note Compared to \ref operator[](T*), this version is faster because it does not need a StrLen(). + And it can also handle strings with embedded null characters. + + \note Linear time complexity. + */ + template + GenericValue& operator[](const GenericValue& name) { + MemberIterator member = FindMember(name); + if (member != MemberEnd()) + return member->value; + else { + RAPIDJSON_ASSERT(false); // see above note + + // This will generate -Wexit-time-destructors in clang + // static GenericValue NullValue; + // return NullValue; + + // Use static buffer and placement-new to prevent destruction + static char buffer[sizeof(GenericValue)]; + return *new (buffer) GenericValue(); + } + } + template + const GenericValue& operator[](const GenericValue& name) const { return const_cast(*this)[name]; } + +#if RAPIDJSON_HAS_STDSTRING + //! Get a value from an object associated with name (string object). + GenericValue& operator[](const std::basic_string& name) { return (*this)[GenericValue(StringRef(name))]; } + const GenericValue& operator[](const std::basic_string& name) const { return (*this)[GenericValue(StringRef(name))]; } +#endif + GenericValue& operator[](const std::basic_string_view& name) { return (*this)[GenericValue(StringRef(name))]; } + const GenericValue& operator[](const std::basic_string_view& name) const { return (*this)[GenericValue(StringRef(name))]; } + + ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return IsObjectHT() ? MemberBeginHT() : MemberBeginVec(); } + ConstMemberIterator MemberEnd() const { RAPIDJSON_ASSERT(IsObject()); return IsObjectHT() ? MemberEndHT() : MemberEndVec(); } + MemberIterator MemberBegin() { RAPIDJSON_ASSERT(IsObject()); return IsObjectHT() ? MemberBeginHT() : MemberBeginVec(); } + MemberIterator MemberEnd() { RAPIDJSON_ASSERT(IsObject()); return IsObjectHT() ? MemberEndHT() : MemberEndVec(); } + + //! Request the object to have enough capacity to store members. + /*! \param newCapacity The capacity that the object at least need to have. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note Linear time complexity. + */ + GenericValue& MemberReserve(SizeType newCapacity, Allocator &allocator) { + RAPIDJSON_ASSERT(IsObject()); + DoReserveMembers(newCapacity, allocator); + return *this; + } + + //! Check whether a member exists in the object. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + bool HasMember(const Ch* name) const { return FindMember(name) != MemberEnd(); } + +#if RAPIDJSON_HAS_STDSTRING + //! Check whether a member exists in the object with string object. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + bool HasMember(const std::basic_string& name) const { return FindMember(name) != MemberEnd(); } +#endif + bool HasMember(const std::basic_string_view& name) const { return FindMember(name) != MemberEnd(); } + + //! Check whether a member exists in the object with GenericValue name. + /*! + This version is faster because it does not need a StrLen(). It can also handle string with null character. + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + template + bool HasMember(const GenericValue& name) const { return FindMember(name) != MemberEnd(); } + + //! Find member by name. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + + \note Earlier versions of Rapidjson returned a \c NULL pointer, in case + the requested member doesn't exist. For consistency with e.g. + \c std::map, this has been changed to MemberEnd() now. + \note Linear time complexity. + */ + MemberIterator FindMember(const Ch* name) { + GenericValue n(StringRef(name)); + return FindMember(n); + } + + ConstMemberIterator FindMember(const Ch* name) const { return const_cast(*this).FindMember(name); } + + //! Find member by name. + /*! + This version is faster because it does not need a StrLen(). It can also handle string with null character. + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + + \note Earlier versions of Rapidjson returned a \c NULL pointer, in case + the requested member doesn't exist. For consistency with e.g. + \c std::map, this has been changed to MemberEnd() now. + \note Linear time complexity. + */ + template + MemberIterator FindMember(const GenericValue& name) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + return DoFindMember(name); + } + template ConstMemberIterator FindMember(const GenericValue& name) const { return const_cast(*this).FindMember(name); } + +#if RAPIDJSON_HAS_STDSTRING + //! Find member by string object name. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + */ + MemberIterator FindMember(const std::basic_string& name) { return FindMember(GenericValue(StringRef(name))); } + ConstMemberIterator FindMember(const std::basic_string& name) const { return FindMember(GenericValue(StringRef(name))); } +#endif + MemberIterator FindMember(const std::basic_string_view& name) { return FindMember(GenericValue(StringRef(name))); } + ConstMemberIterator FindMember(const std::basic_string_view& name) const { return FindMember(GenericValue(StringRef(name))); } + + //! Add a member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note The ownership of \c name and \c value will be transferred to this object on success. + \pre IsObject() && name.IsString() + \post name.IsNull() && value.IsNull() + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + DoAddMember(name, value, allocator); + return *this; + } + + //! Add a constant string value as member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, StringRefType value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Add a string object as member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, std::basic_string& value, Allocator& allocator) { + GenericValue v(value, allocator); + return AddMember(name, v, allocator); + } +#endif + GenericValue& AddMember(GenericValue& name, std::basic_string_view& value, Allocator& allocator) { + GenericValue v(value, allocator); + return AddMember(name, v, allocator); + } + + //! Add any primitive value as member (name-value pair) to the object. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param name A string value as name of member. + \param value Value of primitive type \c T as value of member + \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref + AddMember(StringRefType, StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized Constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + AddMember(GenericValue& name, T value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericValue& AddMember(GenericValue&& name, GenericValue&& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(GenericValue&& name, GenericValue& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(GenericValue& name, GenericValue&& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(StringRefType name, GenericValue&& value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + + + //! Add a member (name-value pair) to the object. + /*! \param name A constant string reference as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note The ownership of \c value will be transferred to this object on success. + \pre IsObject() + \post value.IsNull() + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(StringRefType name, GenericValue& value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } + + //! Add a constant string value as member (name-value pair) to the object. + /*! \param name A constant string reference as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(StringRefType,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(StringRefType name, StringRefType value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + + //! Add any primitive value as member (name-value pair) to the object. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param name A constant string reference as name of member. + \param value Value of primitive type \c T as value of member + \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref + AddMember(StringRefType, StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized Constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + AddMember(StringRefType name, T value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } + + //! Remove all members in the object. + /*! This function do not deallocate memory in the object, i.e. the capacity is unchanged. + \note Linear time complexity. + */ + void RemoveAllMembers() { + RAPIDJSON_ASSERT(IsObject()); + DoClearMembers(); + } + + //! Remove a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note This function may reorder the object members. Use \ref + EraseMember(ConstMemberIterator) if you need to preserve the + relative order of the remaining members. + \note Linear time complexity. + */ + bool RemoveMember(const Ch* name) { + GenericValue n(StringRef(name)); + return RemoveMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool RemoveMember(const std::basic_string& name) { return RemoveMember(GenericValue(StringRef(name))); } +#endif + bool RemoveMember(const std::basic_string_view& name) { return RemoveMember(GenericValue(StringRef(name))); } + + template + bool RemoveMember(const GenericValue& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + RemoveMember(m); + return true; + } + else + return false; + } + + //! Remove a member in object by iterator. + /*! \param m member iterator (obtained by FindMember() or MemberBegin()). + \return the new iterator after removal. + \note This function may reorder the object members. Use \ref + EraseMember(ConstMemberIterator) if you need to preserve the + relative order of the remaining members. + \note Constant time complexity. + */ + MemberIterator RemoveMember(MemberIterator m) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(data_.o.size > 0); + return DoRemoveMember(m); + } + + //! Remove a member from an object by iterator. + /*! \param pos iterator to the member to remove + \pre IsObject() == true && \ref MemberBegin() <= \c pos < \ref MemberEnd() + \return Iterator following the removed element. + If the iterator \c pos refers to the last element, the \ref MemberEnd() iterator is returned. + \note This function preserves the relative order of the remaining object + members. If you do not need this, use the more efficient \ref RemoveMember(MemberIterator). + \note Linear time complexity. + */ + MemberIterator EraseMember(MemberIterator pos) { + MemberIterator last = pos; + ++last; + return EraseMember(pos, last); + } + + //! Remove members in the range [first, last) from an object. + /*! \param first iterator to the first member to remove + \param last iterator following the last member to remove + \pre IsObject() == true && \ref MemberBegin() <= \c first <= \c last <= \ref MemberEnd() + \return Iterator following the last removed element. + \note This function preserves the relative order of the remaining object + members. + \note Linear time complexity. + */ + MemberIterator EraseMember(MemberIterator first, MemberIterator last) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(!MembersPointerIsNull()); + return DoEraseMembers(first, last); + } + + //! Erase a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note Linear time complexity. + */ + bool EraseMember(const Ch* name) { + GenericValue n(StringRef(name)); + return EraseMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool EraseMember(const std::basic_string& name) { return EraseMember(GenericValue(StringRef(name))); } +#endif + bool EraseMember(const std::basic_string_view& name) { return EraseMember(GenericValue(StringRef(name))); } + + template + bool EraseMember(const GenericValue& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + EraseMember(m); + return true; + } + else + return false; + } + + Object GetObject() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); } + Object GetObj() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); } + ConstObject GetObject() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); } + ConstObject GetObj() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); } + + std::string Validate() const { RAPIDJSON_ASSERT(IsObject()); return DoValidateMembers(); } + + //@} + + //!@name Array + //@{ + + //! Set this value as an empty array. + /*! \post IsArray == true */ + GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; } + + //! Get the number of elements in array. + SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; } + + //! Get the capacity of array. + SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; } + + //! Check whether the array is empty. + bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; } + + //! Remove all elements in the array. + /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged. + \note Linear time complexity. + */ + void Clear(Allocator &allocator) { + GenericValue* e = GetElementsPointer(); + for (GenericValue* v = e; v != e + data_.a.size; ++v) + v->~GenericValue(); + data_.a.size = 0; + } + + //! Get an element from array by index. + /*! \pre IsArray() == true + \param index Zero-based index of element. + \see operator[](T*) + */ + GenericValue& operator[](SizeType index) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(index < data_.a.size); + return GetElementsPointer()[index]; + } + const GenericValue& operator[](SizeType index) const { return const_cast(*this)[index]; } + + //! Element iterator + /*! \pre IsArray() == true */ + ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer(); } + //! \em Past-the-end element iterator + /*! \pre IsArray() == true */ + ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer() + data_.a.size; } + //! Constant element iterator + /*! \pre IsArray() == true */ + ConstValueIterator Begin() const { return const_cast(*this).Begin(); } + //! Constant \em past-the-end element iterator + /*! \pre IsArray() == true */ + ConstValueIterator End() const { return const_cast(*this).End(); } + + //! Request the array to have enough capacity to store elements. + /*! \param newCapacity The capacity that the array at least need to have. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note Linear time complexity. + */ + GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) { + if (newCapacity > data_.a.capacity) { + SetElementsPointer(reinterpret_cast(allocator.Realloc(GetElementsPointer(), data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue)))); + data_.a.capacity = newCapacity; + } + return *this; + } + + //! Append a GenericValue at the end of the array. + /*! \param value Value to be appended. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \post value.IsNull() == true + \return The value itself for fluent API. + \note The ownership of \c value will be transferred to this array on success. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + \note Amortized constant time complexity. + */ + GenericValue& PushBack(GenericValue& value, Allocator& allocator) { + if (data_.a.size >= data_.a.capacity) + Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : (data_.a.capacity + (data_.a.capacity + 1) / 2), allocator); + GetElementsPointer()[data_.a.size++].RawAssign(value, true); + return *this; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericValue& PushBack(GenericValue&& value, Allocator& allocator) { + return PushBack(value, allocator); + } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + + //! Append a constant string reference at the end of the array. + /*! \param value Constant string reference to be appended. + \param allocator Allocator for reallocating memory. It must be the same one used previously. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \return The value itself for fluent API. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + \note Amortized constant time complexity. + \see GenericStringRef + */ + GenericValue& PushBack(StringRefType value, Allocator& allocator) { + return (*this).template PushBack(value, allocator); + } + + //! Append a primitive value at the end of the array. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param value Value of primitive type T to be appended. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \return The value itself for fluent API. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref PushBack(GenericValue&, Allocator&) or \ref + PushBack(StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + PushBack(T value, Allocator& allocator) { + GenericValue v(value); + return PushBack(v, allocator); + } + + //! Remove the last element in the array. + /*! + \note Constant time complexity. + */ + GenericValue& PopBack(Allocator& allocator) { + RAPIDJSON_ASSERT(!Empty()); + GetElementsPointer()[--data_.a.size].~GenericValue(); + return *this; + } + + //! Remove an element of array by iterator. + /*! + \param pos iterator to the element to remove + \pre IsArray() == true && \ref Begin() <= \c pos < \ref End() + \return Iterator following the removed element. If the iterator pos refers to the last element, the End() iterator is returned. + \note Linear time complexity. + */ + ValueIterator Erase(ConstValueIterator pos) { + return Erase(pos, pos + 1); + } + + //! Remove elements in the range [first, last) of the array. + /*! + \param first iterator to the first element to remove + \param last iterator following the last element to remove + \pre IsArray() == true && \ref Begin() <= \c first <= \c last <= \ref End() + \return Iterator following the last removed element. + \note Linear time complexity. + */ + ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(data_.a.size > 0); + RAPIDJSON_ASSERT(GetElementsPointer() != 0); + RAPIDJSON_ASSERT(first >= Begin()); + RAPIDJSON_ASSERT(first <= last); + RAPIDJSON_ASSERT(last <= End()); + ValueIterator pos = Begin() + (first - Begin()); + for (ValueIterator itr = pos; itr != last; ++itr) { + itr->~GenericValue(); + } + std::memmove(static_cast(pos), last, static_cast(End() - last) * sizeof(GenericValue)); + data_.a.size -= static_cast(last - first); + return pos; + } + + Array GetArray() { RAPIDJSON_ASSERT(IsArray()); return Array(*this); } + ConstArray GetArray() const { RAPIDJSON_ASSERT(IsArray()); return ConstArray(*this); } + + //@} + + //!@name Number + //@{ + + int GetInt() const { RAPIDJSON_ASSERT(data_.f.flags & kIntFlag); return data_.n.i.i; } + unsigned GetUint() const { RAPIDJSON_ASSERT(data_.f.flags & kUintFlag); return data_.n.u.u; } + int64_t GetInt64() const { RAPIDJSON_ASSERT(data_.f.flags & kInt64Flag); return data_.n.i64; } + uint64_t GetUint64() const { RAPIDJSON_ASSERT(data_.f.flags & kUint64Flag); return data_.n.u64; } + + //! Get the value as double type. + /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessDouble() to check whether the converison is lossless. + */ + double GetDouble() const { + RAPIDJSON_ASSERT(IsNumber()); + if ((data_.f.flags & kDoubleFlag) != 0) { return std::stod(DataString(data_)); } // convert from string to double. + if ((data_.f.flags & kIntFlag) != 0) return data_.n.i.i; // int -> double + if ((data_.f.flags & kUintFlag) != 0) return data_.n.u.u; // unsigned -> double + if ((data_.f.flags & kInt64Flag) != 0) return static_cast(data_.n.i64); // int64_t -> double (may lose precision) + RAPIDJSON_ASSERT((data_.f.flags & kUint64Flag) != 0); return static_cast(data_.n.u64); // uint64_t -> double (may lose precision) + } + + GenericValue& SetInt(int i) { this->~GenericValue(); new (this) GenericValue(i); return *this; } + GenericValue& SetUint(unsigned u) { this->~GenericValue(); new (this) GenericValue(u); return *this; } + GenericValue& SetInt64(int64_t i64) { this->~GenericValue(); new (this) GenericValue(i64); return *this; } + GenericValue& SetUint64(uint64_t u64) { this->~GenericValue(); new (this) GenericValue(u64); return *this; } + GenericValue& SetDouble(const Ch* d, SizeType length, Allocator& allocator) { this->~GenericValue(); new (this) GenericValue(d, length, allocator, true, true); return *this; } + + //@} + void ExtractHandle(KeyTable_Handle* h) { + RAPIDJSON_ASSERT(IsHandle()); + data_.f.flags = kNullType; + *reinterpret_cast(h) = data_.h.handle; + } + + //!@name String + //@{ + + const Ch* GetString(bool validate = true) const { RAPIDJSON_ASSERT(IsString()); return DataString(data_, validate); } + const Ch* GetDoubleString(bool validate = true) const { RAPIDJSON_ASSERT(IsDouble()); return DataString(data_, validate); } + const std::basic_string_view GetStringView() const { + RAPIDJSON_ASSERT(IsString()); + return std::basic_string_view(DataString(data_), DataStringLength(data_)); + } + + //! Get the length of string. + /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength(). + */ + SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return DataStringLength(data_); } + SizeType GetDoubleStringLength() const { RAPIDJSON_ASSERT(IsDouble()); return DataStringLength(data_); } + + //! Set this value as a string without copying source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string pointer. + \param length The length of source string, excluding the trailing null terminator. + \return The value itself for fluent API. + \post IsString() == true && GetString() == s && GetStringLength() == length + \see SetString(StringRefType) + */ + GenericValue& SetString(const Ch* s, SizeType length) { return SetString(StringRef(s, length)); } + + //! Set this value as a string without copying source string. + /*! \param s source string reference + \return The value itself for fluent API. + \post IsString() == true && GetString() == s && GetStringLength() == s.length + */ + GenericValue& SetString(StringRefType s) { this->~GenericValue(); SetStringRaw(s); return *this; } + + //! Set this value as a string by copying from source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string. + \param length The length of source string, excluding the trailing null terminator. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { return SetString(StringRef(s, length), allocator); } + + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(StringRef(s), allocator); } + + //! Set this value as a string by copying from source string. + /*! \param s source string reference + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s.s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(StringRefType s, Allocator& allocator) { + this->~GenericValue(); + SetStringRaw(s, allocator); + return *this; + } + +#if RAPIDJSON_HAS_STDSTRING + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size() + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + GenericValue& SetString(const std::basic_string& s, Allocator& allocator) { return SetString(StringRef(s), allocator); } +#endif + GenericValue& SetString(const std::basic_string_view& s, Allocator& allocator) { return SetString(StringRef(s), allocator); } + + //@} + + //!@name Array + //@{ + + //! Templated version for checking whether this value is type T. + /*! + \tparam T Either \c bool, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c float, \c const \c char*, \c std::basic_string + */ + template + bool Is() const { return internal::TypeHelper::Is(*this); } + + template + T Get() const { return internal::TypeHelper::Get(*this); } + + template + T Get() { return internal::TypeHelper::Get(*this); } + + template + ValueType& Set(const T& data) { return internal::TypeHelper::Set(*this, data); } + + template + ValueType& Set(const T& data, AllocatorType& allocator) { return internal::TypeHelper::Set(*this, data, allocator); } + + //@} + + //! Generate events of this value to a Handler. + /*! This function adopts the GoF visitor pattern. + Typical usage is to output this JSON value as JSON text via Writer, which is a Handler. + It can also be used to deep clone this value via GenericDocument, which is also a Handler. + \tparam Handler type of handler. + \param handler An object implementing concept Handler. + */ + template + bool Accept(Handler& handler) const { + switch(GetType()) { + case kNullType: return handler.Null(); + case kFalseType: return handler.Bool(false); + case kTrueType: return handler.Bool(true); + + case kObjectType: + if (RAPIDJSON_UNLIKELY(!handler.StartObject())) + return false; + for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) { + if (RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(), m->name.GetStringLength(), true))) + return false; + if (RAPIDJSON_UNLIKELY(!m->value.Accept(handler))) + return false; + } + return handler.EndObject(data_.o.size); + + case kArrayType: + if (RAPIDJSON_UNLIKELY(!handler.StartArray())) + return false; + for (ConstValueIterator v = Begin(); v != End(); ++v) + if (RAPIDJSON_UNLIKELY(!v->Accept(handler))) + return false; + return handler.EndArray(data_.a.size); + + case kStringType: + return handler.String(GetString(), GetStringLength(), (data_.f.flags & kCopyFlag) != 0); + + default: + RAPIDJSON_ASSERT(GetType() == kNumberType); + if (IsDouble()) return handler.RawNumber(GetDoubleString(), GetDoubleStringLength(), true); + else if (IsInt()) return handler.Int(data_.n.i.i); + else if (IsUint()) return handler.Uint(data_.n.u.u); + else if (IsInt64()) return handler.Int64(data_.n.i64); + else return handler.Uint64(data_.n.u64); + } + } + + // + // User Data Facility. + // + // One bit per JValue is explicitly reserved for client applications. Typically it's used to "mark" jValues + // for some purposes (typically duplicate detection) + // + bool IsMarked() const { return data_.f.userFlag != 0; } + bool IsNotMarked() const { return !IsMarked(); } + void SetMarked(bool v) { data_.f.userFlag = int(v); } + void SetMarked() { SetMarked(true); } + void SetNotMarked() { SetMarked(false); } + + bool IsNoescape() const { return data_.f.noescapeFlag != 0; } + void SetNoescape(bool v) { data_.f.noescapeFlag = int(v); } + +private: + template friend class GenericValue; + template friend class GenericDocument; + template friend class GenericMemberIterator; + enum { + // We are limited in the number of bits we have available here. kVectorFlag is now reusing kCopyFlag's value because of this. + // If we plan to expand the behavior of JValue further, we must revisit how this is setup. + kBoolFlag = 0x0008, + kNumberFlag = 0x0010, + kIntFlag = 0x0020, + kUintFlag = 0x0040, + kInt64Flag = 0x0080, + kUint64Flag = 0x0100, + kDoubleFlag = 0x0200, + kStringFlag = 0x0400, + kCopyFlag = 0x0800, // kCopyFlag should not be called on its own, because this bit is reused in kVectorFlag. + kInlineStrFlag = 0x1000, + kHashTableFlag = 0x2000, // Object members are a hash table + kVectorFlag = 0x0800, // Array elements are vectors. Reusing kCopyFlag bit as we have limited space and it's not heavily used. + kHandleFlag = 0x0007, // This could be a kHandleType but its only internal.... + + // Initial flags of different types. + kNullFlag = kNullType, + // These casts are added to suppress the warning on MSVC about bitwise operations between enums of different types. + kTrueFlag = static_cast(kTrueType) | static_cast(kBoolFlag), + kFalseFlag = static_cast(kFalseType) | static_cast(kBoolFlag), + kNumberIntFlag = static_cast(kNumberType) | static_cast(kNumberFlag | kIntFlag | kInt64Flag), + kNumberUintFlag = static_cast(kNumberType) | static_cast(kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag), + kNumberInt64Flag = static_cast(kNumberType) | static_cast(kNumberFlag | kInt64Flag), + kNumberUint64Flag = static_cast(kNumberType) | static_cast(kNumberFlag | kUint64Flag), + // kNumberDoubleFlag behaves like kCopyStringFlag; kNumberShortDoubleFlag behaves like kShortStringFlag. + // They function as doubles (printed like numbers, support numerical operations) but are stored as strings (but restricted to double values) + kNumberDoubleFlag = static_cast(kNumberType) | static_cast(kNumberFlag | kDoubleFlag), + kNumberShortDoubleFlag = static_cast(kNumberType) | static_cast(kNumberFlag | kDoubleFlag | kInlineStrFlag), + kNumberAnyFlag = static_cast(kNumberType) | static_cast(kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag), + kConstStringFlag = static_cast(kStringType) | static_cast(kStringFlag), + kCopyStringFlag = static_cast(kStringType) | static_cast(kStringFlag | kCopyFlag), + kShortStringFlag = static_cast(kStringType) | static_cast(kStringFlag | kCopyFlag | kInlineStrFlag), + kObjectVecFlag = kObjectType, + kObjectHTFlag = static_cast(kObjectType) | static_cast(kHashTableFlag), + kArrayFlag = kArrayType, + + kTypeMask = 0x07 + }; + + static const SizeType kDefaultArrayCapacity = RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY; + static const SizeType kDefaultObjectCapacity = RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY; + + struct Flag { +#if RAPIDJSON_48BITPOINTER_OPTIMIZATION + char payload[sizeof(SizeType) * 2 + 6]; // 2 x SizeType + lower 48-bit pointer +#elif RAPIDJSON_64BIT + char payload[sizeof(SizeType) * 2 + sizeof(void*) + 6]; // 6 padding bytes +#else + char payload[sizeof(SizeType) * 2 + sizeof(void*) + 2]; // 2 padding bytes +#endif + uint16_t flags:14; + uint16_t userFlag:1; + uint16_t noescapeFlag:1; + }; + + struct String { + SizeType length; + SizeType hashcode; //!< reserved + const Ch* str; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // implementation detail: ShortString can represent zero-terminated strings up to MaxSize chars + // (excluding the terminating zero) and store a value to determine the length of the contained + // string in the last character str[LenPos] by storing "MaxSize - length" there. If the string + // to store has the maximal length of MaxSize then str[LenPos] will be 0 and therefore act as + // the string terminator as well. For getting the string length back from that value just use + // "MaxSize - str[LenPos]". + // This allows to store 13-chars strings in 32-bit mode, 21-chars strings in 64-bit mode, + // 13-chars strings for RAPIDJSON_48BITPOINTER_OPTIMIZATION=1 inline (for `UTF8`-encoded strings). + struct ShortString { + enum { MaxChars = sizeof(static_cast(0)->payload) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize }; + Ch str[MaxChars]; + + inline static bool Usable(SizeType len) { return (MaxSize >= len); } + inline void SetLength(SizeType len) { str[LenPos] = static_cast(MaxSize - len); } + inline SizeType GetLength() const { return static_cast(MaxSize - str[LenPos]); } + }; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // By using proper binary layout, retrieval of different integer types do not need conversions. + union Number { +#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN + struct I { + int i; + char padding[4]; + }i; + struct U { + unsigned u; + char padding2[4]; + }u; +#else + struct I { + char padding[4]; + int i; + }i; + struct U { + char padding2[4]; + unsigned u; + }u; +#endif + int64_t i64; + uint64_t u64; + double d; + }; // 8 bytes + + struct ObjectData { + SizeType size; + SizeType capacity; + union { + Member *members; + MemberHT *membersHT; + } u; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + struct ArrayData { + SizeType size; + SizeType capacity; + GenericValue* elements; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + struct HandleData { + size_t handle; // This is actually KeyTable_Handle, but that type has a default constructor which disallows usage in a union + }; + + union Data { + String s; + ShortString ss; + Number n; + ObjectData o; + ArrayData a; + HandleData h; + Flag f; + }; // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with RAPIDJSON_48BITPOINTER_OPTIMIZATION + + static RAPIDJSON_FORCEINLINE const Ch* DataString(const Data& data, bool validate = true) { + return (data.f.flags & kInlineStrFlag) ? data.ss.str : + ( + ((data.f.flags == kCopyStringFlag) || (data.f.flags == kNumberDoubleFlag)) + ? MEMORY_VALIDATE(RAPIDJSON_GETPOINTER(Ch, data.s.str), validate) + : RAPIDJSON_GETPOINTER(Ch, data.s.str) + ); + } + static RAPIDJSON_FORCEINLINE SizeType DataStringLength(const Data& data) { + return (data.f.flags & kInlineStrFlag) ? data.ss.GetLength() : data.s.length; + } + + RAPIDJSON_FORCEINLINE const Ch* GetStringPointer(bool validate = true) const { return DataString(data_, validate); } + RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str) { return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str); } + RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer(bool validate = true) const { return MEMORY_VALIDATE(RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements), validate); } + RAPIDJSON_FORCEINLINE GenericValue* SetElementsPointer(GenericValue* elements) { MEMORY_VALIDATE(elements); return RAPIDJSON_SETPOINTER(GenericValue, data_.a.elements, elements); } + + RAPIDJSON_FORCEINLINE bool MembersPointerIsNull() const { return RAPIDJSON_GETPOINTER(Member, data_.o.u.members) == nullptr; } + + RAPIDJSON_FORCEINLINE Member* GetMembersPointerVec(bool validate = true) const { + RAPIDJSON_ASSERT(!IsObjectHT()); + return MEMORY_VALIDATE(RAPIDJSON_GETPOINTER(Member, data_.o.u.members), validate); + } + RAPIDJSON_FORCEINLINE void SetMembersPointerVec(Member* members) { + RAPIDJSON_ASSERT(!IsObjectHT()); + MEMORY_VALIDATE(members); + RAPIDJSON_SETPOINTER(Member, data_.o.u.members, members); + } + RAPIDJSON_FORCEINLINE MemberHT* GetMembersPointerHT(bool validate = true) const { + RAPIDJSON_ASSERT(IsObjectHT()); + return MEMORY_VALIDATE(RAPIDJSON_GETPOINTER(MemberHT, data_.o.u.members), validate); + } + RAPIDJSON_FORCEINLINE void SetMembersPointerHT(MemberHT* membersHT) { + RAPIDJSON_ASSERT(IsObjectHT()); + MEMORY_VALIDATE(membersHT); + RAPIDJSON_SETPOINTER(MemberHT, data_.o.u.membersHT,membersHT); + } + + // + // Generic form of DoXxxxx functions dispatches to DoXxxxxVec or DoXxxxxHT + // + + void DoReserveMembers(SizeType newCapacity, Allocator& allocator) { + if (IsObjectHT()) { + DoReserveMembersHT(newCapacity, allocator); + } else if (newCapacity > hashTableFactors.minHTSize) { + if (data_.o.capacity > 0) { + RehashHT(newCapacity, allocator); // Convert to HT + } else { + data_.f.flags = kObjectHTFlag; // Make a HT + DoReserveMembersHT(newCapacity, allocator); + } + } else { + DoReserveMembersVec(newCapacity, allocator); + } + } + + template + MemberIterator DoFindMember(const GenericValue& name) { + KeyTable_Handle h = keyTable->makeHandle(name.GetString(), name.GetStringLength()); + MemberIterator i; + if (IsObjectHT()) { + i = DoFindMemberHT(h, false); + } else { + i = DoFindMemberVec(h); + } + if (h) keyTable->destroyHandle(h); + return i; + } + + void DoClearMembers() { + if (IsObjectHT()) { + DoClearMembersHT(); + } else { + DoClearMembersVec(); + } + } + + void DoFreeMembers() { + if (IsObjectHT()) { + DoFreeMembersHT(); + } else { + DoFreeMembersVec(); + } + } + + void DoAddMember(KeyTable_Handle& name, GenericValue& value, Allocator& allocator) { + if (IsObjectHT()) { + DoAddMemberHT(name, value, allocator); + } else { + if (data_.o.size < hashTableFactors.minHTSize) { + DoAddMemberVec(name, value, allocator); + } else { + hashTableStats.convertToHT++; + RehashHT(data_.o.size * (1.0f + hashTableFactors.grow), allocator); + DoAddMemberHT(name, value, allocator); + } + } + } + MemberIterator DoRemoveMember(MemberIterator m) { + if (IsObjectHT()) { + return DoRemoveMemberHT(m); + } else { + return DoRemoveMemberVec(m); + } + } + MemberIterator DoEraseMembers(MemberIterator first, MemberIterator last) { + if (IsObjectHT()) { + return DoEraseMembersHT(first, last); + } else { + return DoEraseMembersVec(first, last); + } + } + // + // Explicit copy constructor call + template + void DoCopyMembers(const GenericValue& rhs, Allocator& allocator, bool copyConstStrings) { + if (rhs.IsObjectHT()) { + DoCopyMembersHT(rhs, allocator, copyConstStrings); + } else { + DoCopyMembersVec(rhs, allocator, copyConstStrings); + } + } + void SetObjectRaw(GenericValue *values, SizeType numPairs, Allocator& allocator) { + if (numPairs > hashTableFactors.minHTSize) { + SetObjectRawHT(values, numPairs, allocator); + } else { + SetObjectRawVec(values, numPairs, allocator); + } + } + + std::string DoValidateMembers() const { + if (IsObjectHT()) { + return DoValidateMembersHT(); + } else { + return DoValidateMembersVec(); + } + } + + // + // functions named DoxxxxxVec are the vector implementations + // + ConstMemberIterator MemberBeginVec() const { return ConstMemberIterator(this, 0); } + ConstMemberIterator MemberEndVec() const { return ConstMemberIterator(this, data_.o.size); } + MemberIterator MemberBeginVec() { return MemberIterator(this, 0); } + MemberIterator MemberEndVec() { return MemberIterator(this, data_.o.size); } + + void DoReserveMembersVec(SizeType newCapacity, Allocator& allocator) { + ObjectData& o = data_.o; + if (newCapacity > o.capacity) { + Member* newMembers = Realloc(allocator, GetMembersPointerVec(), o.capacity, newCapacity); + SetMembersPointerVec(newMembers); + o.capacity = newCapacity; + } + } + + MemberIterator DoFindMemberVec(KeyTable_Handle& h) { + MemberIterator member = MemberBegin(); + trace("DoFind: Size: " << data_.o.size << " H:" << h); + for ( ; member != MemberEnd(); ++member) { + trace("Checking " << member << " H:" << member->name); + if (h == member->name) + break; + } + keyTable->destroyHandle(h); + trace("Found at index: " << member); + return member; + } + + // Remove all members, but leave as empty object + void DoClearMembersVec() { + for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m) { + m->~Member(); + } + data_.o.size = 0; + } + // Destructor call + void DoFreeMembersVec() { + DoClearMembers(); + Allocator::Free(GetMembersPointerVec()); + } + + void DoAddMember(GenericValue& name, GenericValue& value, Allocator& allocator) { + KeyTable_Handle h; + if (name.IsHandle()) { + // This path from the input scanner + name.ExtractHandle(&h); + } else { + // This path from internal usage, i.e., selector + h = keyTable->makeHandle(name.GetString(), name.GetStringLength()); + } + DoAddMember(h, value, allocator); + } + + void DoAddMemberVec(KeyTable_Handle& name, GenericValue& value, Allocator& allocator) { + ObjectData& o = data_.o; + if (o.size >= o.capacity) + DoReserveMembersVec(o.capacity ? (o.capacity + (o.capacity + 1) / 2) : kDefaultObjectCapacity, allocator); + Member* members = GetMembersPointerVec(); + // + // Check for duplicates + // + trace("Add Size:" << o.size << " Cap:" << o.capacity << " H:" << name); + if (name->getRefCount() > 1) { // refcount == 1 guarantees no duplicates + for (SizeType i = 0; i < o.size; ++i) { + trace("Comparing to " << i << " h:" << members[i].name); + if (name == members[i].name) { + // + // Found a duplicate, overwrite the value + // + keyTable->destroyHandle(name); + members[i].value = value; + return; + } + } + } + trace("Added at index: " << o.size); + Member* m = members + o.size; + m->name.RawAssign(name); + m->value.RawAssign(value, true); + ++o.size; + } + + MemberIterator DoRemoveMemberVec(MemberIterator m) { + RAPIDJSON_ASSERT(false); // broken. + ObjectData& o = data_.o; + Member* members = GetMembersPointerVec(); + if (o.size > 1 && m.index != o.size-1) { + m->name = members[o.size-1].name; // Move the last one to this place + m->value = members[o.size-1].value; + } + else { + m->~Member(); // Only one left, just destroy + } + --o.size; + return m; + } + + MemberIterator DoEraseMembersVec(MemberIterator first, MemberIterator last) { + ObjectData& o = data_.o; + SizeType count = 0; + MemberIterator pos = first; + MemberIterator itr = first; + trace("Erase(" << o.size << "): from : " << first << " to: " << last); + for (; itr != last; ++itr) { + trace("nuking " << itr); + itr->~Member(); + count++; + } + for (; itr != MemberEnd(); itr++) { + trace("copy " << pos << " <- " << itr); + pos->name = itr->name; + pos->value.RawAssign(itr->value, false); + pos++; + } + o.size -= count; + trace("Size is now " << o.size); + return first; + } + + // + // Explicit copy constructor call + // + template + void DoCopyMembersVec(const GenericValue& rhs, Allocator& allocator, bool copyConstStrings) { + RAPIDJSON_ASSERT(rhs.GetType() == kObjectType); + data_.f.flags = kObjectVecFlag; + data_.o.size = data_.o.capacity = 0; + SetMembersPointerVec(nullptr); + SizeType count = rhs.data_.o.size; + DoReserveMembersVec(count, allocator); + const typename GenericValue::Member* rm = rhs.GetMembersPointerVec(); + for (SizeType i = 0; i < count; i++) { + KeyTable_Handle name = std::move(keyTable->clone(rm[i].name)); + GenericValue value(rm[i].value, allocator, copyConstStrings); + DoAddMember(name, value, allocator); + } + } + + //! Initialize this value as object with initial data, without calling destructor. + void SetObjectRawVec(GenericValue *values, SizeType numPairs, Allocator& allocator) { + data_.f.flags = kObjectVecFlag; + data_.o.size = 0; + data_.o.capacity = numPairs; + if (numPairs) { + SetMembersPointerVec(Malloc(allocator, numPairs)); + for (SizeType i = 0; i < numPairs; ++i) { + DoAddMember(values[2*i], values[2*i+1], allocator); + } + } + else + SetMembersPointerVec(0); + } + + std::string DoValidateMembersVec() const { + std::ostringstream os; + if (data_.o.size > data_.o.capacity) { + os << "Bad size"; + } + Member *m = GetMembersPointerVec(); + if (m == nullptr && data_.o.size > 0) { + os << "Bad members pointer"; + } + for (size_t i = 0; i < data_.o.size; ++i) { + if (!m[i].name) { + os << "Bad handle at index " << i; + } + } + return os.str(); + } + + /********************************************************************************************* + * + * New hashtable implementation, implemented using linear probing. + * + * A hashtable is a vector of MemberHT elements. A MemberHT is a Member (Name+Value) plus + * a prev/next indexes. Each entry in the hashtable is linked into a single list. With element + * 0 of the vector serving as the head/tail of the list. + * + * The capacity member tells us the size of the hashtable. Because of the list head, the + * allocated size is capacity + 1 + * + * The 0'th index value member contains a pointer to the allocator for this hashtable. (See + * rehash). + * + */ + + MemberHT& ListHead() const { return GetMembersPointerHT()[0]; } + ConstMemberIterator MemberBeginHT() const { return ConstMemberIterator(this, ListHead().next); } + ConstMemberIterator MemberEndHT() const { return ConstMemberIterator(this, 0); } + MemberIterator MemberBeginHT() { return MemberIterator(this, ListHead().next); } + MemberIterator MemberEndHT() { return MemberIterator(this, 0); } + + // + // Constructor for HashTable. + // + // We allocate a vector of MemberHT sized at capacity + 1. The 0'th element plays a dual role + // (1) It's the head a doubly linked list of MemberHT (insertion order). + // (2) The legacy RemoveMember/EraseMember API calls don't pass in an allocator. But + // since the hashtable auto-shrinks, it needs an allocator to do that work. So rather than + // modify all of the APIs (there are lots), we'll just save the allocator here in the + // 0'th entry. + // + void DoConstructMembersHT(SizeType capacity, Allocator& allocator) { + if (capacity < HashTableFactors::MIN_HT_SIZE) { + capacity = HashTableFactors::MIN_HT_SIZE; + } + data_.f.flags = kObjectHTFlag; + data_.o.size = 0; + data_.o.capacity = capacity; + size_t memSize = sizeof(MemberHT) * (data_.o.capacity + 1); // +1 for ListHead + void *mem = allocator.Malloc(memSize); + memset(mem, 0, memSize); // We actually care about the handles. + MemberHT *m = reinterpret_cast(mem); + SetMembersPointerHT(m); + + m[0].value.data_.n.u64 = reinterpret_cast(&allocator); // Cheat.... + } + + void RehashHT(SizeType newCapacity, Allocator& allocator) { + if (newCapacity < HashTableFactors::MIN_HT_SIZE) { + newCapacity = HashTableFactors::MIN_HT_SIZE; + } + // + // Save the current object locally + // + GenericValue me(kObjectType); + me.Swap(*this); + // + // Make current node into a hashtable + // + DoConstructMembersHT(newCapacity, allocator); + // + // Move the members from the copy into the new hashtable + // + size_t object_member_count = 0; + size_t object_num_member_chars = 0; + for (MemberIterator i = me.MemberBegin(); i != me.MemberEnd(); ++i) { + object_member_count += 1; + object_num_member_chars += i->name.GetStringLength(); + DoAddMemberHT(i->name, i->value, allocator); + } + // + // Now kill the temp, don't use the destructor because we know all of the members are empty. + // + allocator.Free(me.IsObjectHT() ? me.GetMembersPointerHT() : me.GetMembersPointerVec()); + me.data_.f.flags = kNullFlag; + } + + void DoReserveMembersHT(SizeType newCapacity, Allocator& allocator) { + hashTableStats.reserveHT++; + RehashHT(newCapacity, allocator); + } + + // + // Compute the starting HashTable entry for this string + // Don't forget to account for ListHead() + // + // We only have 19-bits of hash in the KeyTable_Handle. When the hashtable size is less than + // 2^19, we just modulo that value directly. But when the table is larger, we use the full + // value stored in the keytable entry itself. + // + SizeType HTIndex(KeyTable_Handle& h) const { + size_t hsh = (data_.o.capacity < KeyTable_Handle::MAX_HASHCODE) + ? h.GetHashcode() + : h->getOriginalHash(); + return (hsh % data_.o.capacity) + 1; + } + + // + // Circularly go to next element of hashtable + // + void IncrIndex(SizeType &ix) const { + if (++ix > data_.o.capacity) { // Remember capacity + 1 is true size of vector + ix = 1; + } + } + + float loadFactor() const { + return data_.o.size / float(data_.o.capacity); + } + + MemberIterator DoFindMemberHT(KeyTable_Handle& h, bool findInsertion) { + SizeType ix = HTIndex(h); + MemberHT* members = GetMembersPointerHT(); + // + // Linear probe, keep searching until a hit or an empty + // + trace("DoFindMemberHT(" <<(findInsertion?"Ins":"") << "): @ ix:" << ix << " : " << h); + for (SizeType count = 0; count < data_.o.capacity; ++count) { + if (!members[ix].name) { + if (!findInsertion) { + trace("not found"); + return MemberEnd(); + } else { + trace("InsertIx" << ix); + return MemberIterator(this, ix); + } + } else if (h == members[ix].name) { + trace("found at ix " << ix); + return MemberIterator(this, ix); + } + IncrIndex(ix); + } + trace("Full not found"); + RAPIDJSON_ASSERT(!findInsertion); + return MemberEnd(); + } + // + // Remove all members, but don't destruct the Object. + // + void DoClearMembersHT() { + ObjectData& o = data_.o; + MemberHT *members = GetMembersPointerHT(); + for (MemberIterator i = MemberBegin(); i != MemberEnd(); ++i) { + i->~Member(); + } + members[0].next = members[0].prev = 0; + o.size = 0; + } + // Destructor call + void DoFreeMembersHT() { + DoClearMembersHT(); + Allocator::Free(GetMembersPointerHT()); + } + + void DoAddMemberHT(GenericValue& name, GenericValue& value, Allocator& allocator) { + KeyTable_Handle h; + if (name.IsHandle()) { + name.ExtractHandle(&h); + } else { + h = keyTable->makeHandle(name.GetString(), name.GetStringLength()); + name.~GenericValue(); + } + DoAddMemberHT(h, value, allocator); + } + + void DoAddMemberHT(KeyTable_Handle& name, GenericValue& value, Allocator& allocator) { + if (loadFactor() > hashTableFactors.maxLoad) { + hashTableStats.rehashUp++; + RehashHT(data_.o.capacity * (1.0f + hashTableFactors.grow), allocator); + } + MemberIterator i = DoFindMemberHT(name, true); + RAPIDJSON_ASSERT(i != MemberEnd()); + if (i->name) { + // Duplicate + trace("DoAddMemberHT duplicate"); + i->value = value; + keyTable->destroyHandle(name); + } else { + // Empty + trace("DoAddMemberHT inserting @" << i.index << " : " << name); + RAPIDJSON_ASSERT(i.index > 0 && i.index <= data_.o.capacity); + MemberHT *m = GetMembersPointerHT(); + MemberHT& newMember = m[i.index]; + newMember.name.RawAssign(name); + newMember.value.RawAssign(value, true); + SizeType endix = ListHead().prev; + RAPIDJSON_ASSERT(m[endix].next == 0); + newMember.prev = endix; + newMember.next = 0; + m[endix].next = i.index; + ListHead().prev = i.index; + data_.o.size++; + } + } + + // + // Distance from -> to with wrap around + // + size_t forward_distance(size_t from, size_t to) { + size_t result; + if (from <= to) { + result = to - from; + } else { + result = (to + data_.o.capacity) - from; + } + RAPIDJSON_ASSERT(result < data_.o.capacity); + return result; + } + // + // Remove one member. + // + MemberIterator DoRemoveMemberHT(MemberIterator itr) { + MemberIterator next = itr; + next++; + // + // remove and destruct this node + // + SizeType remove_ix = itr.index; + MemberHT *m = GetMembersPointerHT(); + RAPIDJSON_ASSERT(remove_ix > 0 && remove_ix <= data_.o.capacity); + MemberHT& remove_entry = m[remove_ix]; + trace("DoRemove ix:" << remove_ix << " : " << remove_entry.name); + RAPIDJSON_ASSERT(m[remove_entry.prev].next == remove_ix); + RAPIDJSON_ASSERT(m[remove_entry.next].prev == remove_ix); + m[remove_entry.prev].next = remove_entry.next; + m[remove_entry.next].prev = remove_entry.prev; + data_.o.size--; + remove_entry.~MemberHT(); + // + // Now, we need to scan from this ix until the next empty slot in case some other entries + // need to be moved down (see Linear Probing) + // + SizeType ix = remove_ix; + SizeType empty_ix = remove_ix; + IncrIndex(ix); + RAPIDJSON_ASSERT(!m[empty_ix].name); + for (size_t count = 0; count <= data_.o.capacity; ++count) { + MemberHT& thisEntry = m[ix]; + if (!thisEntry.name) { + trace("Removemember, scan complete"); + if (loadFactor() < hashTableFactors.minLoad) { + // + // See DoConstructHT + Allocator *allocator = reinterpret_cast(m[0].value.data_.n.u64); + hashTableStats.rehashDown++; + RehashHT(data_.o.capacity * hashTableFactors.shrink, *allocator); + } + return next; // Done! + } + // + // See if this entry can be moved down to the empty_ix + // + SizeType orig_ix = HTIndex(thisEntry.name); + trace("Not empty ix : " << ix << " : orig_ix: " << orig_ix); + if (forward_distance(orig_ix, ix) > forward_distance(orig_ix, empty_ix)) { + trace("move " << ix << " -> " << empty_ix << " : " << thisEntry.name); + m[empty_ix].name.RawAssign(thisEntry.name); + m[empty_ix].value.RawAssign(thisEntry.value, false); + m[empty_ix].prev = thisEntry.prev; + m[empty_ix].next = thisEntry.next; + // Fix up the links. + RAPIDJSON_ASSERT(m[thisEntry.prev].next == ix); + RAPIDJSON_ASSERT(m[thisEntry.next].prev == ix); + m[thisEntry.prev].next = empty_ix; + m[thisEntry.next].prev = empty_ix; + empty_ix = ix; + RAPIDJSON_ASSERT(!m[empty_ix].name); + } + IncrIndex(ix); + } + return next; + } + // + // erase a range + // + MemberIterator DoEraseMembersHT(MemberIterator first, MemberIterator last) { + MemberIterator i = first; + while (i != last) i = DoRemoveMemberHT(i); + return i; + } + // + // Explicit Copy Constructor call + // + template + void DoCopyMembersHT(const GenericValue& rhs, Allocator& allocator, bool copyConstStrings) { + DoConstructMembersHT(rhs.MemberCapacity(), allocator); + for (ConstMemberIterator m = rhs.MemberBegin(); m != rhs.MemberEnd(); ++m) { + KeyTable_Handle name = keyTable->clone(m->name); + GenericValue value(m->value, allocator, copyConstStrings); + DoAddMember(name, value, allocator); + } + } + // + // Create a hash for this size entries and fill it in. + // + void SetObjectRawHT(GenericValue *values, SizeType numPairs, Allocator& allocator) { + hashTableStats.reserveHT++; + DoConstructMembersHT(numPairs / hashTableFactors.maxLoad, allocator); + for (SizeType i = 0; i < numPairs; ++i) { + DoAddMemberHT(values[2*i], values[2*i+1], allocator); + } + } + + // + // Validate structure of hashtable + // + std::string DoValidateMembersHT() const { + std::ostringstream os; + MemberHT *m = GetMembersPointerHT(); + if (!m[0].value.data_.n.u64) os << "Invalid allocator"; + if (data_.o.capacity < HashTableFactors::MIN_HT_SIZE) { + os << "Too small, Size is " << data_.o.capacity << " MinSize:" << hashTableFactors.minHTSize; + } + size_t vector_size = data_.o.capacity + 1; + std::vector occupied(vector_size, false); + // + // Walk the linked list in the forward direction + // + size_t found = 0; + for (size_t ix = m[0].next; ix != 0; ix = m[ix].next) { + if (ix >= vector_size) os << "Invalid index in forward link: " << ix; + if (occupied[ix]) os << "Double linked list through index " << ix; + occupied[ix] = true; + found++; + if (!m[ix].name) os << "Bad empty handle at index " << ix; + } + if (data_.o.size != found) { + os << "Invalid forward list size: " << data_.o.size << " But found:" << found; + } + // + // Now walk in the reverse direction + // + found = 0; + for (size_t ix = m[0].prev; ix != 0; ix = m[ix].prev) { + if (ix >= vector_size) os << "Invalid index in reverse link: " << ix; + if (!occupied[ix]) os << "Bad linkage for " << ix; + found++; + } + if (data_.o.size != found) { + os << "Invalid reverse list size: " << data_.o.size << " But found:" << found; + } + // + // Now make sure all of the other indexes are empty + // + for (size_t ix = 1; ix < vector_size; ++ix) { + if (!occupied[ix] && m[ix].name) os << "Not-free handle @ " << ix; + } + // + // Now check the hashtable invariants w.r.t. hashindex + // + for (size_t ix = 1; ix < vector_size; ++ix) { + if (m[ix].name) { + // + // No entry between this index and the hash index for this handle can be empty + // + SizeType native_ix = HTIndex(m[ix].name); + while (native_ix != ix) { + if (!m[native_ix].name) os << "Found incorrect empty entry @ " << native_ix; + IncrIndex(native_ix); + } + } + } + return os.str(); + } + + // + // Compute HT distribution + // +public: + void getObjectDistribution(std::map& runs, size_t topN) const { + MemberHT *m = GetMembersPointerHT(); + size_t thisRun = 0; + for (size_t ix = 1; ix < data_.o.capacity+1; ++ix) { + if (m[ix].name) { + thisRun ++; + } else if (thisRun != 0) { + runs[thisRun]++; + thisRun = 0; + while (runs.size() > topN) runs.erase(runs.begin()); + } + } + } +private: + // Initialize this value as array with initial data, without calling destructor. + void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) { + data_.f.flags = kArrayFlag; + if (count) { + GenericValue* e = static_cast(allocator.Malloc(count * sizeof(GenericValue))); + SetElementsPointer(e); + std::memcpy(static_cast(e), values, count * sizeof(GenericValue)); + } + else + SetElementsPointer(0); + data_.a.size = data_.a.capacity = count; + } + + //! Initialize this value as constant string, without calling destructor. + void SetStringRaw(StringRefType s, bool noescape = false) RAPIDJSON_NOEXCEPT { + data_.f.flags = kConstStringFlag; + SetStringPointer(s); + data_.s.length = s.length; + SetNoescape(noescape); + } + + //! Initialize this value as copy string with initial data, without calling destructor. + void SetStringRaw(StringRefType s, Allocator& allocator, bool noescape = false, bool isdouble = false) { + Ch* str = 0; + if (ShortString::Usable(s.length)) { + data_.f.flags = isdouble? kNumberShortDoubleFlag : kShortStringFlag; + data_.ss.SetLength(s.length); + str = data_.ss.str; + } else { + data_.f.flags = isdouble? kNumberDoubleFlag : kCopyStringFlag; + data_.s.length = s.length; + str = static_cast(allocator.Malloc((s.length + 1) * sizeof(Ch))); + SetStringPointer(str); + } + std::memcpy(str, s, s.length * sizeof(Ch)); + str[s.length] = '\0'; + SetNoescape(noescape); + } + + // + // Set the handle into an uninitialized GenericValue. Unfortunately, because + // KeyTable_Handle has a default constructor, we've substituted a size_t as + // the type in HandleData. Meaning we have to do a bit of type fiddling here. + // + void SetHandleRaw(KeyTable_Handle& handle) { + data_.f.flags = kHandleFlag; + // Emulate data_.h.handle = handle; + data_.h.handle = 0; // Default constructor of KeyTable_Handle + *reinterpret_cast(&data_.h.handle) = handle; + } + + //! Assignment without calling destructor + void RawAssign(GenericValue& rhs, bool track_new_outgoing_null) RAPIDJSON_NOEXCEPT { + data_ = rhs.data_; + // data_.f.flags = rhs.data_.f.flags; + rhs.data_.f.flags = kNullFlag; + if (track_new_outgoing_null) { + } + } + + template + bool StringEqual(const GenericValue& rhs) const { + RAPIDJSON_ASSERT(IsString()); + RAPIDJSON_ASSERT(rhs.IsString()); + + const SizeType len1 = GetStringLength(); + const SizeType len2 = rhs.GetStringLength(); + if(len1 != len2) { return false; } + + const Ch* const str1 = GetString(); + const Ch* const str2 = rhs.GetString(); + if(str1 == str2) { return true; } // fast path for constant string + + return (std::memcmp(str1, str2, sizeof(Ch) * len1) == 0); + } + + Data data_; + /* + * Stuff after this is to support testing of traps in the software. + */ +public: + // + // Get the raw underlying pointer for a Value if it's malloc'ed memory + // + const void *trap_GetMallocPointer(bool validate = true) const { + if (IsObjectHT()) { + return GetMembersPointerHT(validate); + } else if (IsObject()) { + return GetMembersPointerVec(validate); + } else if (IsArray()) { + return GetElementsPointer(validate); + } else if (IsDouble() && 0 == (data_.f.flags & kInlineStrFlag)) { + return GetDoubleString(validate); + } else if (IsString() && (0 == (data_.f.flags & kInlineStrFlag))) { + return GetStringPointer(validate); + } else { + return nullptr; + } + } + +}; + +//! GenericValue with UTF8 encoding +typedef GenericValue > Value; + +/////////////////////////////////////////////////////////////////////////////// +// GenericDocument + +//! A document for parsing JSON text as DOM. +/*! + \note implements Handler concept + \tparam Encoding Encoding for both parsing and string storage. + \tparam Allocator Allocator for allocating memory for the DOM + \tparam StackAllocator Allocator for allocating memory for stack during parsing. + \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any virtual functions, especially no virtual destructor. To avoid memory leaks, do not \c delete a GenericDocument object via a pointer to a GenericValue. +*/ +template +class GenericDocument : public GenericValue { +public: + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericValue ValueType; //!< Value type of the document. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + + //! Constructor + /*! Creates an empty document of specified type. + \param type Mandatory type of object to create. + \param allocator Optional allocator for allocating memory. + \param stackCapacity Optional initial capacity of stack in bytes. + \param stackAllocator Optional allocator for allocating memory for stack. + */ + explicit GenericDocument(Type type, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + GenericValue(type), allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_(), curDepth_(0), maxDepth_(0) + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + } + + //! Constructor + /*! Creates an empty document which type is Null. + \param allocator Optional allocator for allocating memory. + \param stackCapacity Optional initial capacity of stack in bytes. + \param stackAllocator Optional allocator for allocating memory for stack. + */ + GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_(), curDepth_(0), maxDepth_(0) + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericDocument(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT + : ValueType(std::forward(rhs)), // explicit cast to avoid prohibited move from Document + allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + stack_(std::move(rhs.stack_)), + parseResult_(rhs.parseResult_), + curDepth_(rhs.curDepth_), + maxDepth_(rhs.maxDepth_) + { + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.parseResult_ = ParseResult(); + } +#endif + + ~GenericDocument() { + // Clear the ::ValueType before ownAllocator is destroyed, ~ValueType() + // runs last and may access its elements or members which would be freed + // with an allocator like MemoryPoolAllocator (CrtAllocator does not + // free its data when destroyed, but MemoryPoolAllocator does). + if (ownAllocator_) { + ValueType::SetNull(); + } + Destroy(); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move assignment in C++11 + GenericDocument& operator=(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT + { + // The cast to ValueType is necessary here, because otherwise it would + // attempt to call GenericValue's templated assignment operator. + ValueType::operator=(std::forward(rhs)); + + // Calling the destructor here would prematurely call stack_'s destructor + Destroy(); + + allocator_ = rhs.allocator_; + ownAllocator_ = rhs.ownAllocator_; + stack_ = std::move(rhs.stack_); + parseResult_ = rhs.parseResult_; + curDepth_ = rhs.curDepth_; + maxDepth_ = rhs.maxDepth_; + + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.parseResult_ = ParseResult(); + curDepth_ = 0; + maxDepth_ = 0; + + return *this; + } +#endif + + //! Exchange the contents of this document with those of another. + /*! + \param rhs Another document. + \note Constant complexity. + \see GenericValue::Swap + */ + GenericDocument& Swap(GenericDocument& rhs) RAPIDJSON_NOEXCEPT { + ValueType::Swap(rhs); + stack_.Swap(rhs.stack_); + internal::Swap(allocator_, rhs.allocator_); + internal::Swap(ownAllocator_, rhs.ownAllocator_); + internal::Swap(parseResult_, rhs.parseResult_); + Swap(curDepth_, rhs.curDepth_); + Swap(maxDepth_, rhs.maxDepth_); + return *this; + } + + // Allow Swap with ValueType. + // Refer to Effective C++ 3rd Edition/Item 33: Avoid hiding inherited names. + using ValueType::Swap; + + //! free-standing swap function helper + /*! + Helper function to enable support for common swap implementation pattern based on \c std::swap: + \code + void swap(MyClass& a, MyClass& b) { + using std::swap; + swap(a.doc, b.doc); + // ... + } + \endcode + \see Swap() + */ + friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT { a.Swap(b); } + + //! Populate this document by a generator which produces SAX events. + /*! \tparam Generator A functor with bool f(Handler) prototype. + \param g Generator functor which sends SAX events to the parameter. + \return The document itself for fluent API. + */ + template + GenericDocument& Populate(Generator& g) { + ClearStackOnExit scope(*this); + if (g(*this)) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + ValueType* t = stack_.template Pop(1); + ValueType::operator=(*t); // Move value from stack to document + t->~ValueType(); + } + return *this; + } + + //!@name Parse from stream + //!@{ + + //! Parse JSON text from an input stream (with Encoding conversion) + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam SourceEncoding Encoding of input stream + \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + curDepth_ = maxDepth_ = 0; + GenericReader reader( + stack_.HasAllocator() ? &stack_.GetAllocator() : 0); + ClearStackOnExit scope(*this); + parseResult_ = reader.template Parse(is, *this); + if (parseResult_) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + ValueType* t = stack_.template Pop(1); + ValueType::operator = (*t);// Move value from stack to document + t->~ValueType(); + } + return *this; + } + + //! Parse JSON text from an input stream + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + return ParseStream(is); + } + + //! Parse JSON text from an input stream (with \ref kParseDefaultFlags) + /*! \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + return ParseStream(is); + } + //!@} + + //!@name Parse in-place from mutable string + //!@{ + + //! Parse JSON text from a mutable string + /*! \tparam parseFlags Combination of \ref ParseFlag. + \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseInsitu(Ch* str) { + GenericInsituStringStream s(str); + return ParseStream(s); + } + + //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags) + /*! \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + GenericDocument& ParseInsitu(Ch* str) { + return ParseInsitu(str); + } + //!@} + + //!@name Parse from read-only string + //!@{ + + //! Parse JSON text from a read-only string (with Encoding conversion) + /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). + \tparam SourceEncoding Transcoding from input Encoding + \param str Read-only zero-terminated string to be parsed. + */ + template + GenericDocument& Parse(const typename SourceEncoding::Ch* str) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + GenericStringStream s(str); + return ParseStream(s); + } + + //! Parse JSON text from a read-only string + /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). + \param str Read-only zero-terminated string to be parsed. + */ + template + GenericDocument& Parse(const Ch* str) { + return Parse(str); + } + + //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags) + /*! \param str Read-only zero-terminated string to be parsed. + */ + GenericDocument& Parse(const Ch* str) { + return Parse(str); + } + + template + GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + MemoryStream ms(reinterpret_cast(str), length * sizeof(typename SourceEncoding::Ch)); + EncodedInputStream is(ms); + ParseStream(is); + return *this; + } + + template + GenericDocument& Parse(const Ch* str, size_t length) { + return Parse(str, length); + } + + GenericDocument& Parse(const Ch* str, size_t length) { + return Parse(str, length); + } + +#if RAPIDJSON_HAS_STDSTRING + template + GenericDocument& Parse(const std::basic_string& str) { + // c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t) + return Parse(str.c_str()); + } + + template + GenericDocument& Parse(const std::basic_string& str) { + return Parse(str.c_str()); + } + + GenericDocument& Parse(const std::basic_string& str) { + return Parse(str); + } +#endif // RAPIDJSON_HAS_STDSTRING + + //!@} + + //!@name Handling parse errors + //!@{ + + //! Whether a parse error has occurred in the last parsing. + bool HasParseError() const { return parseResult_.IsError(); } + + //! Get the \ref ParseErrorCode of last parsing. + ParseErrorCode GetParseError() const { return parseResult_.Code(); } + + //! Get the position of last parsing error in input, 0 otherwise. + size_t GetErrorOffset() const { return parseResult_.Offset(); } + + //! Implicit conversion to get the last parse result +#ifndef __clang // -Wdocumentation + /*! \return \ref ParseResult of the last parse operation + + \code + Document doc; + ParseResult ok = doc.Parse(json); + if (!ok) + printf( "JSON parse error: %s (%u)\n", GetParseError_En(ok.Code()), ok.Offset()); + \endcode + */ +#endif + operator ParseResult() const { return parseResult_; } + //!@} + + //! Get the allocator of this document. + Allocator& GetAllocator() { + RAPIDJSON_ASSERT(allocator_); + return *allocator_; + } + + //! Get the capacity of stack in bytes. + size_t GetStackCapacity() const { return stack_.GetCapacity(); } + +private: + // clear stack on any exit from ParseStream, e.g. due to exception + struct ClearStackOnExit { + explicit ClearStackOnExit(GenericDocument& d) : d_(d) {} + ~ClearStackOnExit() { d_.ClearStack(); } + private: + ClearStackOnExit(const ClearStackOnExit&); + ClearStackOnExit& operator=(const ClearStackOnExit&); + GenericDocument& d_; + }; + + // callers of the following private Handler functions + // template friend class GenericReader; // for parsing + template friend class GenericValue; // for deep copying + +public: + // Implementation of Handler + bool Null() { new (stack_.template Push()) ValueType(); return true; } + bool Bool(bool b) { new (stack_.template Push()) ValueType(b); return true; } + bool Int(int i) { new (stack_.template Push()) ValueType(i); return true; } + bool Uint(unsigned i) { new (stack_.template Push()) ValueType(i); return true; } + bool Int64(int64_t i) { new (stack_.template Push()) ValueType(i); return true; } + bool Uint64(uint64_t i) { new (stack_.template Push()) ValueType(i); return true; } + bool Double(double d) { new (stack_.template Push()) ValueType(d); return true; } + + bool RawNumber(const Ch* str, SizeType length, bool copy) { + if (copy) + new (stack_.template Push()) ValueType(str, length, GetAllocator(), true, true); + else + new (stack_.template Push()) ValueType(str, length); + return true; + } + + bool String(const Ch* str, SizeType length, bool copy, bool noescape) { + if (copy) + new (stack_.template Push()) ValueType(str, length, GetAllocator(), noescape); + else + new (stack_.template Push()) ValueType(str, length, noescape); + return true; + } + + bool IncrDepth() { + curDepth_++; + if (curDepth_ > maxDepth_) maxDepth_ = curDepth_; + return curDepth_ <= json_get_max_path_limit(); + } + + bool DecrDepth() { + RAPIDJSON_ASSERT(curDepth_ > 0); + curDepth_--; + return true; + } + + size_t GetMaxDepth() const { return maxDepth_; } + + bool StartObject() { new (stack_.template Push()) ValueType(kObjectType); return IncrDepth(); } + + bool Key(const Ch* str, SizeType length, bool copy, bool noescape) { + (void)(copy); // Unused. + KeyTable_Handle h = keyTable->makeHandle(str, length, noescape); + new (stack_.template Push()) ValueType(h); + return true; + } + + bool EndObject(SizeType memberCount) { + ValueType* members = stack_.template Pop(memberCount * 2); // Each member is two Values + stack_.template Top()->SetObjectRaw(members, memberCount, GetAllocator()); + return DecrDepth(); + } + + bool StartArray() { new (stack_.template Push()) ValueType(kArrayType); return IncrDepth(); } + + bool EndArray(SizeType elementCount) { + ValueType* elements = stack_.template Pop(elementCount); + stack_.template Top()->SetArrayRaw(elements, elementCount, GetAllocator()); + return DecrDepth(); + } + +private: + //! Prohibit copying + GenericDocument(const GenericDocument&); + //! Prohibit assignment + GenericDocument& operator=(const GenericDocument&); + + void ClearStack() { + if (Allocator::kNeedFree) { + while (stack_.GetSize() > 0) // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects) + (stack_.template Pop(1))->~ValueType(); + } else { + stack_.Clear(); + } + stack_.ShrinkToFit(); + } + + void Destroy() { + RAPIDJSON_DELETE(ownAllocator_); + } + + static const size_t kDefaultStackCapacity = 1024; + Allocator* allocator_; + Allocator* ownAllocator_; + internal::Stack stack_; + ParseResult parseResult_; + uint32_t curDepth_; + uint32_t maxDepth_; +}; + +//! GenericDocument with UTF8 encoding +typedef GenericDocument > Document; + + +//! Helper class for accessing Value of array type. +/*! + Instance of this helper class is obtained by \c GenericValue::GetArray(). + In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1. +*/ +template +class GenericArray { +public: + typedef GenericArray ConstArray; + typedef GenericArray Array; + typedef ValueT PlainType; + typedef typename internal::MaybeAddConst::Type ValueType; + typedef ValueType* ValueIterator; // This may be const or non-const iterator + typedef const ValueT* ConstValueIterator; + typedef typename ValueType::AllocatorType AllocatorType; + typedef typename ValueType::StringRefType StringRefType; + + template + friend class GenericValue; + + GenericArray(const GenericArray& rhs) : value_(rhs.value_) {} + GenericArray& operator=(const GenericArray& rhs) { value_ = rhs.value_; return *this; } + ~GenericArray() {} + + operator ValueType&() const { return value_; } + SizeType Size() const { return value_.Size(); } + SizeType Capacity() const { return value_.Capacity(); } + bool Empty() const { return value_.Empty(); } + void Clear() const { value_.Clear(); } + ValueType& operator[](SizeType index) const { return value_[index]; } + ValueIterator Begin() const { return value_.Begin(); } + ValueIterator End() const { return value_.End(); } + GenericArray Reserve(SizeType newCapacity, AllocatorType &allocator) const { value_.Reserve(newCapacity, allocator); return *this; } + GenericArray PushBack(ValueType& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericArray PushBack(ValueType&& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericArray PushBack(StringRefType value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (const GenericArray&)) PushBack(T value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } + GenericArray PopBack() const { value_.PopBack(); return *this; } + ValueIterator Erase(ConstValueIterator pos) const { return value_.Erase(pos); } + ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) const { return value_.Erase(first, last); } + +#if RAPIDJSON_HAS_CXX11_RANGE_FOR + ValueIterator begin() const { return value_.Begin(); } + ValueIterator end() const { return value_.End(); } +#endif + +private: + GenericArray(); + GenericArray(ValueType& value) : value_(value) {} + ValueType& value_; +}; + +//! Helper class for accessing Value of object type. +/*! + Instance of this helper class is obtained by \c GenericValue::GetObject(). + In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1. +*/ +template +class GenericObject { +public: + typedef GenericObject ConstObject; + typedef GenericObject Object; + typedef ValueT PlainType; + typedef typename internal::MaybeAddConst::Type ValueType; + typedef GenericMemberIterator MemberIterator; // This may be const or non-const iterator + typedef GenericMemberIterator ConstMemberIterator; + typedef typename ValueType::AllocatorType AllocatorType; + typedef typename ValueType::StringRefType StringRefType; + typedef typename ValueType::EncodingType EncodingType; + typedef typename ValueType::Ch Ch; + + template + friend class GenericValue; + + GenericObject(const GenericObject& rhs) : value_(rhs.value_) {} + GenericObject& operator=(const GenericObject& rhs) { value_ = rhs.value_; return *this; } + ~GenericObject() {} + + operator ValueType&() const { return value_; } + SizeType MemberCount() const { return value_.MemberCount(); } + SizeType MemberCapacity() const { return value_.MemberCapacity(); } + bool ObjectEmpty() const { return value_.ObjectEmpty(); } + template ValueType& operator[](T* name) const { return value_[name]; } + template ValueType& operator[](const GenericValue& name) const { return value_[name]; } +#if RAPIDJSON_HAS_STDSTRING + ValueType& operator[](const std::basic_string& name) const { return value_[name]; } +#endif + ValueType& operator[](const std::basic_string_view& name) const { return value_[name]; } + + MemberIterator MemberBegin() const { return value_.MemberBegin(); } + MemberIterator MemberEnd() const { return value_.MemberEnd(); } + GenericObject MemberReserve(SizeType newCapacity, AllocatorType &allocator) const { value_.MemberReserve(newCapacity, allocator); return *this; } + bool HasMember(const Ch* name) const { return value_.HasMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool HasMember(const std::basic_string& name) const { return value_.HasMember(name); } +#endif + bool HasMember(const std::basic_string_view& name) const { return value_.HasMember(name); } + + template bool HasMember(const GenericValue& name) const { return value_.HasMember(name); } + MemberIterator FindMember(const Ch* name) const { return value_.FindMember(name); } + template MemberIterator FindMember(const GenericValue& name) const { return value_.FindMember(name); } +#if RAPIDJSON_HAS_STDSTRING + MemberIterator FindMember(const std::basic_string& name) const { return value_.FindMember(name); } +#endif + MemberIterator FindMember(const std::basic_string_view& name) const { return value_.FindMember(name); } + + GenericObject AddMember(ValueType& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType& name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#if RAPIDJSON_HAS_STDSTRING + GenericObject AddMember(ValueType& name, std::basic_string& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#endif + GenericObject AddMember(ValueType& name, std::basic_string_view& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) AddMember(ValueType& name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericObject AddMember(ValueType&& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType&& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(StringRefType name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericObject)) AddMember(StringRefType name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + void RemoveAllMembers() { value_.RemoveAllMembers(); } + bool RemoveMember(const Ch* name) const { return value_.RemoveMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool RemoveMember(const std::basic_string& name) const { return value_.RemoveMember(name); } +#endif + bool RemoveMember(const std::basic_string_view& name) const { return value_.RemoveMember(name); } + + template bool RemoveMember(const GenericValue& name) const { return value_.RemoveMember(name); } + MemberIterator RemoveMember(MemberIterator m) const { return value_.RemoveMember(m); } + MemberIterator EraseMember(ConstMemberIterator pos) const { return value_.EraseMember(pos); } + MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) const { return value_.EraseMember(first, last); } + bool EraseMember(const Ch* name) const { return value_.EraseMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool EraseMember(const std::basic_string& name) const { return EraseMember(ValueType(StringRef(name))); } +#endif + bool EraseMember(const std::basic_string_view& name) const { return EraseMember(ValueType(StringRef(name))); } + + template bool EraseMember(const GenericValue& name) const { return value_.EraseMember(name); } + +#if RAPIDJSON_HAS_CXX11_RANGE_FOR + MemberIterator begin() const { return value_.MemberBegin(); } + MemberIterator end() const { return value_.MemberEnd(); } +#endif + +private: + GenericObject(); + GenericObject(ValueType& value) : value_(value) {} + ValueType& value_; +}; + +RAPIDJSON_NAMESPACE_END +RAPIDJSON_DIAG_POP + +#ifdef RAPIDJSON_WINDOWS_GETOBJECT_WORKAROUND_APPLIED +#pragma pop_macro("GetObject") +#undef RAPIDJSON_WINDOWS_GETOBJECT_WORKAROUND_APPLIED +#endif + +#endif // RAPIDJSON_DOCUMENT_H_ diff --git a/src/rapidjson/license.txt b/src/rapidjson/license.txt new file mode 100644 index 0000000..7ccc161 --- /dev/null +++ b/src/rapidjson/license.txt @@ -0,0 +1,57 @@ +Tencent is pleased to support the open source community by making RapidJSON available. + +Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. + +If you have downloaded a copy of the RapidJSON binary from Tencent, please note that the RapidJSON binary is licensed under the MIT License. +If you have downloaded a copy of the RapidJSON source code from Tencent, please note that RapidJSON source code is licensed under the MIT License, except for the third-party components listed below which are subject to different license terms. Your integration of RapidJSON into your own projects may require compliance with the MIT License, as well as the other licenses applicable to the third-party components included within RapidJSON. To avoid the problematic JSON license in your own projects, it's sufficient to exclude the bin/jsonchecker/ directory, as it's the only code under the JSON license. +A copy of the MIT License is included in this file. + +Other dependencies and licenses: + +Open Source Software Licensed Under the BSD License: +-------------------------------------------------------------------- + +The msinttypes r29 +Copyright (c) 2006-2013 Alexander Chemeris +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Open Source Software Licensed Under the JSON License: +-------------------------------------------------------------------- + +json.org +Copyright (c) 2002 JSON.org +All Rights Reserved. + +JSON_checker +Copyright (c) 2002 JSON.org +All Rights Reserved. + + +Terms of the JSON License: +--------------------------------------------------- + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +Terms of the MIT License: +-------------------------------------------------------------------- + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/rapidjson/prettywriter.h b/src/rapidjson/prettywriter.h new file mode 100644 index 0000000..0ee6c8f --- /dev/null +++ b/src/rapidjson/prettywriter.h @@ -0,0 +1,393 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_PRETTYWRITER_H_ +#define RAPIDJSON_PRETTYWRITER_H_ + +#include +#include "json/json.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Combination of PrettyWriter format flags. +/*! \see PrettyWriter::SetFormatOptions + */ +enum PrettyFormatOptions { + kFormatDefault = 0, //!< Default pretty formatting. + kFormatSingleLineArray = 1 //!< Format arrays on a single line. +}; + +//! Writer with indentation and spacing. +/*! + \tparam OutputStream Type of output os. + \tparam SourceEncoding Encoding of source string. + \tparam TargetEncoding Encoding of output stream. + \tparam StackAllocator Type of allocator for allocating memory of stack. +*/ +template, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags> +class PrettyWriter : public Writer { +public: + typedef Writer Base; + typedef typename Base::Ch Ch; + + //! Constructor + /*! \param os Output stream. + \param allocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of stack. + */ + explicit PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(os, allocator, levelDepth), formatOptions_(kFormatDefault), initialLevel(0), curDepth(0), maxDepth(0) {} + + + explicit PrettyWriter(StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(allocator, levelDepth), formatOptions_(kFormatDefault), initialLevel(0), curDepth(0), maxDepth(0) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + PrettyWriter(PrettyWriter&& rhs) : + Base(std::forward(rhs)), formatOptions_(rhs.formatOptions_), + newline_(rhs.newline_), indent_(rhs.indent_), space_(rhs.space_), initialLevel(rhs.initialLevel_) {} +#endif + + //! Set pretty writer formatting options. + /*! \param options Formatting options. + */ + PrettyWriter& SetFormatOptions(PrettyFormatOptions options) { + formatOptions_ = options; + return *this; + } + PrettyWriter& SetNewline(const std::string_view &newline) { + newline_ = newline; + return *this; + } + PrettyWriter& SetIndent(const std::string_view &indent) { + indent_ = indent; + return *this; + } + PrettyWriter& SetSpace(const std::string_view &space) { + space_ = space; + return *this; + } + PrettyWriter& SetInitialLevel(size_t il) { + initialLevel = il; + return *this; + } + + /*! @name Implementation of Handler + \see Handler + */ + //@{ + + bool Null() { PrettyPrefix(kNullType); return Base::EndValue(Base::WriteNull()); } + bool Bool(bool b) { PrettyPrefix(b ? kTrueType : kFalseType); return Base::EndValue(Base::WriteBool(b)); } + bool Int(int i) { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteInt(i)); } + bool Uint(unsigned u) { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteUint(u)); } + bool Int64(int64_t i64) { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteInt64(i64)); } + bool Uint64(uint64_t u64) { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteUint64(u64)); } + bool Double(double d) { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteDouble(d)); } + + bool RawNumber(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + PrettyPrefix(kNumberType); + return Base::EndValue(Base::WriteDouble(str, length)); + } + + bool String(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + PrettyPrefix(kStringType); + return Base::EndValue(Base::WriteString(str, length)); + } + +#if RAPIDJSON_HAS_STDSTRING + bool String(const std::basic_string& str) { + return String(str.data(), SizeType(str.size())); + } +#endif + + size_t GetMaxDepth() { + return maxDepth; + } + + bool StartObject() { + IncrDepth(); + PrettyPrefix(kObjectType); + new (Base::level_stack_.template Push()) typename Base::Level(false); + return Base::WriteStartObject(); + } + + bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); } + +#if RAPIDJSON_HAS_STDSTRING + bool Key(const std::basic_string& str) { + return Key(str.data(), SizeType(str.size())); + } +#endif + + bool EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); // not inside an Object + RAPIDJSON_ASSERT(!Base::level_stack_.template Top()->inArray); // currently inside an Array, not Object + RAPIDJSON_ASSERT(0 == Base::level_stack_.template Top()->valueCount % 2); // Object has a Key without a Value + + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty) { + WriteNewline(); + WriteIndent(); + } + bool ret = Base::EndValue(Base::WriteEndObject()); + (void)ret; + RAPIDJSON_ASSERT(ret == true); + if (Base::level_stack_.Empty()) // end of json text + Base::Flush(); + DecrDepth(); + return true; + } + + bool StartArray() { + IncrDepth(); + PrettyPrefix(kArrayType); + new (Base::level_stack_.template Push()) typename Base::Level(true); + return Base::WriteStartArray(); + } + + bool EndArray(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty && !(formatOptions_ & kFormatSingleLineArray)) { + WriteNewline(); + WriteIndent(); + } + bool ret = Base::EndValue(Base::WriteEndArray()); + (void)ret; + RAPIDJSON_ASSERT(ret == true); + if (Base::level_stack_.Empty()) // end of json text + Base::Flush(); + DecrDepth(); + return true; + } + + //@} + + /*! @name Convenience extensions */ + //@{ + + //! Simpler but slower overload. + bool String(const Ch* str) { return String(str, internal::StrLen(str)); } + bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); } + + //@} + + //! Write a raw JSON value. + /*! + For user to write a stringified JSON as a value. + + \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range. + \param length Length of the json. + \param type Type of the root of json. + \note When using PrettyWriter::RawValue(), the result json may not be indented correctly. + */ + bool RawValue(const Ch* json, size_t length, Type type) { + RAPIDJSON_ASSERT(json != 0); + PrettyPrefix(type); + return Base::EndValue(Base::WriteRawValue(json, length)); + } + +protected: + void PrettyPrefix(Type type) { + (void)type; + if (Base::level_stack_.GetSize() != 0) { // this value is not at root + typename Base::Level* level = Base::level_stack_.template Top(); + + if (level->inArray) { + if (level->valueCount > 0) { + Base::os_->Put(','); // add comma if it is not the first element in array + if (formatOptions_ & kFormatSingleLineArray) + WriteSpace(); + } + + if (!(formatOptions_ & kFormatSingleLineArray)) { + WriteNewline(); + WriteIndent(); + } + } + else { // in object + if (level->valueCount > 0) { + if (level->valueCount % 2 == 0) { + Base::os_->Put(','); + WriteNewline(); + } + else { + Base::os_->Put(':'); + WriteSpace(); + } + } + else + WriteNewline(); + + if (level->valueCount % 2 == 0) + WriteIndent(); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else { + RAPIDJSON_ASSERT(!Base::hasRoot_); // Should only has one and only one root. + Base::hasRoot_ = true; + } + } + void WriteStringView(const std::string_view& v) { + if (!v.empty()) { + size_t sz = v.size(); + char *buf = Base::os_->Push(sz); + v.copy(buf, sz); + } + } + void WriteString(const char *ptr, size_t len, bool noescape) { + if (noescape) { + char *p = Base::os_->Push(len + 2); + p[0] = '"'; + std::memcpy(p + 1, ptr, len); + p[len + 1] = '"'; + } else { + Base::WriteString(ptr, len); + } + } + void WriteNewline() { WriteStringView(newline_); } + void WriteSpace() { WriteStringView(space_); } + void WriteIndent() { + size_t count = initialLevel + (Base::level_stack_.GetSize() / sizeof(typename Base::Level)); + for (size_t i = 0; i < count; ++i) WriteStringView(indent_); + } + +public: + // + // Accelerated write when there's definitely no format + // + template + void FastWrite(JValue &value, size_t *max_depth) { + *max_depth = 0; + FastWrite_internal(value, 0, max_depth); + } + + PrettyFormatOptions formatOptions_; + std::string_view newline_; + std::string_view indent_; + std::string_view space_; + size_t initialLevel; + +private: + // Prohibit copy constructor & assignment operator. + PrettyWriter(const PrettyWriter&); + PrettyWriter& operator=(const PrettyWriter&); + size_t curDepth; + size_t maxDepth; + + void IncrDepth() { + curDepth++; + if (curDepth > maxDepth) maxDepth = curDepth; + } + + void DecrDepth() { + RAPIDJSON_ASSERT(curDepth > 0); + curDepth--; + } + + template + void FastWrite_internal(JValue &value, const size_t level, size_t *max_depth) { + if (level > *max_depth) *max_depth = level; + + bool firstElement; + switch (value.GetType()) { + case kStringType: + WriteString(value.GetString(), value.GetStringLength(), value.IsNoescape()); + break; + case kNullType: + Base::WriteNull(); + break; + case kFalseType: + Base::WriteBool(false); + break; + case kTrueType: + Base::WriteBool(true); + break; + case kObjectType: + Base::os_->Put('{'); + firstElement = true; + for (typename JValue::ConstMemberIterator m = value.MemberBegin(); m != value.MemberEnd(); ++m) { + if (!firstElement) { + Base::os_->Put(','); + } else { + firstElement = false; + } + WriteString(m->name.GetString(), m->name.GetStringLength(), m->name.IsNoescape()); + Base::os_->Put(':'); + FastWrite_internal(m->value, level + 1, max_depth); + } + Base::os_->Put('}'); + break; + case kArrayType: + Base::os_->Put('['); + firstElement = true; + for (typename JValue::ConstValueIterator v = value.Begin(); v != value.End(); ++v) { + if (!firstElement) { + Base::os_->Put(','); + } else { + firstElement = false; + } + FastWrite_internal(*v, level + 1, max_depth); + } + Base::os_->Put(']'); + break; + default: + RAPIDJSON_ASSERT(value.GetType() == kNumberType); + if (value.IsDouble()) { + Base::WriteDouble(value.GetDoubleString(), value.GetDoubleStringLength()); + } + else if (value.IsInt()) Base::WriteInt(value.GetInt()); + else if (value.IsUint()) Base::WriteUint(value.GetUint()); + else if (value.IsInt64()) Base::WriteInt64(value.GetInt64()); + else Base::WriteUint64(value.GetUint64()); + break; + } + } + +}; + +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/src/rapidjson/reader.h b/src/rapidjson/reader.h new file mode 100644 index 0000000..7afa0a5 --- /dev/null +++ b/src/rapidjson/reader.h @@ -0,0 +1,2281 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_READER_H_ +#define RAPIDJSON_READER_H_ + +/*! \file reader.h */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) +#include +#pragma intrinsic(_BitScanForward) +#endif +#ifdef RAPIDJSON_SSE42 +#include +#elif defined(RAPIDJSON_SSE2) +#include +#elif defined(RAPIDJSON_NEON) +#include +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(old-style-cast) +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +#elif defined(_MSC_VER) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +RAPIDJSON_DIAG_OFF(4702) // unreachable code +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define RAPIDJSON_NOTHING /* deliberately empty */ +#ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN +#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \ + RAPIDJSON_MULTILINEMACRO_END +#endif +#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \ + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING) +//!@endcond + +/*! \def RAPIDJSON_PARSE_ERROR_NORETURN + \ingroup RAPIDJSON_ERRORS + \brief Macro to indicate a parse error. + \param parseErrorCode \ref rapidjson::ParseErrorCode of the error + \param offset position of the error in JSON input (\c size_t) + + This macros can be used as a customization point for the internal + error handling mechanism of RapidJSON. + + A common usage model is to throw an exception instead of requiring the + caller to explicitly check the \ref rapidjson::GenericReader::Parse's + return value: + + \code + #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \ + throw ParseException(parseErrorCode, #parseErrorCode, offset) + + #include // std::runtime_error + #include "rapidjson/error/error.h" // rapidjson::ParseResult + + struct ParseException : std::runtime_error, rapidjson::ParseResult { + ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset) + : std::runtime_error(msg), ParseResult(code, offset) {} + }; + + #include "rapidjson/reader.h" + \endcode + + \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse + */ +#ifndef RAPIDJSON_PARSE_ERROR_NORETURN +#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \ + SetParseError(parseErrorCode, offset); \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +/*! \def RAPIDJSON_PARSE_ERROR + \ingroup RAPIDJSON_ERRORS + \brief (Internal) macro to indicate and handle a parse error. + \param parseErrorCode \ref rapidjson::ParseErrorCode of the error + \param offset position of the error in JSON input (\c size_t) + + Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing. + + \see RAPIDJSON_PARSE_ERROR_NORETURN + \hideinitializer + */ +#ifndef RAPIDJSON_PARSE_ERROR +#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \ + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +#include // ParseErrorCode, ParseResult + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// ParseFlag + +/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS + \ingroup RAPIDJSON_CONFIG + \brief User-defined kParseDefaultFlags definition. + + User can define this as any \c ParseFlag combinations. +*/ +#ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS +#define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags +#endif + +//! Combination of parseFlags +/*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream + */ +enum ParseFlag { + kParseNoFlags = 0, //!< No flags are set. + kParseInsituFlag = 1, //!< In-situ(destructive) parsing. + kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. + kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing. + kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error. + kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). + kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments. + kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings. + kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays. + kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles. + kParseEscapedApostropheFlag = 512, //!< Allow escaped apostrophe in strings. + kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS +}; + +/////////////////////////////////////////////////////////////////////////////// +// Handler + +/*! \class rapidjson::Handler + \brief Concept for receiving events from GenericReader upon parsing. + The functions return true if no error occurs. If they return false, + the event publisher should terminate the process. +\code +concept Handler { + typename Ch; + + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned i); + bool Int64(int64_t i); + bool Uint64(uint64_t i); + bool Double(double d); + /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) + bool RawNumber(const Ch* str, SizeType length, bool copy); + bool String(const Ch* str, SizeType length, bool copy); + bool StartObject(); + bool Key(const Ch* str, SizeType length, bool copy); + bool EndObject(SizeType memberCount); + bool StartArray(); + bool EndArray(SizeType elementCount); +}; +\endcode +*/ +/////////////////////////////////////////////////////////////////////////////// +// BaseReaderHandler + +//! Default implementation of Handler. +/*! This can be used as base class of any reader handler. + \note implements Handler concept +*/ +template, typename Derived = void> +struct BaseReaderHandler { + typedef typename Encoding::Ch Ch; + + typedef typename internal::SelectIf, BaseReaderHandler, Derived>::Type Override; + + bool Default() { return true; } + bool Null() { return static_cast(*this).Default(); } + bool Bool(bool) { return static_cast(*this).Default(); } + bool Int(int) { return static_cast(*this).Default(); } + bool Uint(unsigned) { return static_cast(*this).Default(); } + bool Int64(int64_t) { return static_cast(*this).Default(); } + bool Uint64(uint64_t) { return static_cast(*this).Default(); } + bool Double(double) { return static_cast(*this).Default(); } + /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) + bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast(*this).String(str, len, copy); } + bool String(const Ch*, SizeType, bool) { return static_cast(*this).Default(); } + bool StartObject() { return static_cast(*this).Default(); } + bool Key(const Ch* str, SizeType len, bool copy) { return static_cast(*this).String(str, len, copy); } + bool EndObject(SizeType) { return static_cast(*this).Default(); } + bool StartArray() { return static_cast(*this).Default(); } + bool EndArray(SizeType) { return static_cast(*this).Default(); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// StreamLocalCopy + +namespace internal { + +template::copyOptimization> +class StreamLocalCopy; + +//! Do copy optimization. +template +class StreamLocalCopy { +public: + StreamLocalCopy(Stream& original) : s(original), original_(original) {} + ~StreamLocalCopy() { original_ = s; } + + Stream s; + +private: + StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; + + Stream& original_; +}; + +//! Keep reference. +template +class StreamLocalCopy { +public: + StreamLocalCopy(Stream& original) : s(original) {} + + Stream& s; + +private: + StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; +}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// SkipWhitespace + +//! Skip the JSON white spaces in a stream. +/*! \param is A input stream for skipping white spaces. + \note This function has SSE2/SSE4.2 specialization. +*/ +template +void SkipWhitespace(InputStream& is) { + internal::StreamLocalCopy copy(is); + InputStream& s(copy.s); + + typename InputStream::Ch c; + while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t') + s.Take(); +} + +inline const char* SkipWhitespace(const char* p, const char* end) { + while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + return p; +} + +#if defined(RAPIDJSON_SSE42) && !defined(__SANITIZE_ADDRESS__) +//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // The rest of string using SIMD + static const char whitespace[16] = " \n\r\t"; + const __m128i w = _mm_loadu_si128(reinterpret_cast(&whitespace[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); + if (r != 16) // some of characters is non-whitespace + return p + r; + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + // The middle of string using SIMD + static const char whitespace[16] = " \n\r\t"; + const __m128i w = _mm_loadu_si128(reinterpret_cast(&whitespace[0])); + + for (; p <= end - 16; p += 16) { + const __m128i s = _mm_loadu_si128(reinterpret_cast(p)); + const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); + if (r != 16) // some of characters is non-whitespace + return p + r; + } + + return SkipWhitespace(p, end); +} + +#elif defined(RAPIDJSON_SSE2) && !defined(__SANITIZE_ADDRESS__) + +//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // The rest of string + #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } + static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; + #undef C16 + + const __m128i w0 = _mm_loadu_si128(reinterpret_cast(&whitespaces[0][0])); + const __m128i w1 = _mm_loadu_si128(reinterpret_cast(&whitespaces[1][0])); + const __m128i w2 = _mm_loadu_si128(reinterpret_cast(&whitespaces[2][0])); + const __m128i w3 = _mm_loadu_si128(reinterpret_cast(&whitespaces[3][0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = static_cast(~_mm_movemask_epi8(x)); + if (r != 0) { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + // The rest of string + #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } + static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; + #undef C16 + + const __m128i w0 = _mm_loadu_si128(reinterpret_cast(&whitespaces[0][0])); + const __m128i w1 = _mm_loadu_si128(reinterpret_cast(&whitespaces[1][0])); + const __m128i w2 = _mm_loadu_si128(reinterpret_cast(&whitespaces[2][0])); + const __m128i w3 = _mm_loadu_si128(reinterpret_cast(&whitespaces[3][0])); + + for (; p <= end - 16; p += 16) { + const __m128i s = _mm_loadu_si128(reinterpret_cast(p)); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = static_cast(~_mm_movemask_epi8(x)); + if (r != 0) { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } + + return SkipWhitespace(p, end); +} + +#elif defined(RAPIDJSON_NEON) && !defined(__SANITIZE_ADDRESS__) + +//! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + const uint8x16_t w0 = vmovq_n_u8(' '); + const uint8x16_t w1 = vmovq_n_u8('\n'); + const uint8x16_t w2 = vmovq_n_u8('\r'); + const uint8x16_t w3 = vmovq_n_u8('\t'); + + for (;; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast(p)); + uint8x16_t x = vceqq_u8(s, w0); + x = vorrq_u8(x, vceqq_u8(s, w1)); + x = vorrq_u8(x, vceqq_u8(s, w2)); + x = vorrq_u8(x, vceqq_u8(s, w3)); + + x = vmvnq_u8(x); // Negate + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract + + if (low == 0) { + if (high != 0) { + uint32_t lz = internal::clzll(high); + return p + 8 + (lz >> 3); + } + } else { + uint32_t lz = internal::clzll(low); + return p + (lz >> 3); + } + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + const uint8x16_t w0 = vmovq_n_u8(' '); + const uint8x16_t w1 = vmovq_n_u8('\n'); + const uint8x16_t w2 = vmovq_n_u8('\r'); + const uint8x16_t w3 = vmovq_n_u8('\t'); + + for (; p <= end - 16; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast(p)); + uint8x16_t x = vceqq_u8(s, w0); + x = vorrq_u8(x, vceqq_u8(s, w1)); + x = vorrq_u8(x, vceqq_u8(s, w2)); + x = vorrq_u8(x, vceqq_u8(s, w3)); + + x = vmvnq_u8(x); // Negate + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract + + if (low == 0) { + if (high != 0) { + uint32_t lz = internal::clzll(high); + return p + 8 + (lz >> 3); + } + } else { + uint32_t lz = internal::clzll(low); + return p + (lz >> 3); + } + } + + return SkipWhitespace(p, end); +} + +#endif // RAPIDJSON_NEON + +#if defined(RAPIDJSON_SIMD) && !defined(__SANITIZE_ADDRESS__) +//! Template function specialization for InsituStringStream +template<> inline void SkipWhitespace(InsituStringStream& is) { + is.src_ = const_cast(SkipWhitespace_SIMD(is.src_)); +} + +//! Template function specialization for StringStream +template<> inline void SkipWhitespace(StringStream& is) { + is.src_ = SkipWhitespace_SIMD(is.src_); +} + +template<> inline void SkipWhitespace(EncodedInputStream, MemoryStream>& is) { + is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_); +} +#endif // RAPIDJSON_SIMD + +/////////////////////////////////////////////////////////////////////////////// +// GenericReader + +//! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator. +/*! GenericReader parses JSON text from a stream, and send events synchronously to an + object implementing Handler concept. + + It needs to allocate a stack for storing a single decoded string during + non-destructive parsing. + + For in-situ parsing, the decoded string is directly written to the source + text string, no temporary buffer is required. + + A GenericReader object can be reused for parsing multiple JSON text. + + \tparam SourceEncoding Encoding of the input stream. + \tparam TargetEncoding Encoding of the parse output. + \tparam StackAllocator Allocator type for stack. +*/ +template +class GenericReader { +public: + typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type + + //! Constructor. + /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) + \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) + */ + GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : + stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {} + + //! Parse JSON text. + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept. + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + ParseResult Parse(InputStream& is, Handler& handler) { + if (parseFlags & kParseIterativeFlag) + return IterativeParse(is, handler); + + parseResult_.Clear(); + + ClearStackOnExit scope(*this); + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell()); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + else { + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (!(parseFlags & kParseStopWhenDoneFlag)) { + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + } + } + + return parseResult_; + } + + //! Parse JSON text (with \ref kParseDefaultFlags) + /*! \tparam InputStream Type of input stream, implementing Stream concept + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + ParseResult Parse(InputStream& is, Handler& handler) { + return Parse(is, handler); + } + + //! Initialize JSON text token-by-token parsing + /*! + */ + void IterativeParseInit() { + parseResult_.Clear(); + state_ = IterativeParsingStartState; + } + + //! Parse one token from JSON text + /*! \tparam InputStream Type of input stream, implementing Stream concept + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + bool IterativeParseNext(InputStream& is, Handler& handler) { + while (RAPIDJSON_LIKELY(is.Peek() != '\0')) { + SkipWhitespaceAndComments(is); + + Token t = Tokenize(is.Peek()); + IterativeParsingState n = Predict(state_, t); + IterativeParsingState d = Transit(state_, t, n, is, handler); + + // If we've finished or hit an error... + if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) { + // Report errors. + if (d == IterativeParsingErrorState) { + HandleError(state_, is); + return false; + } + + // Transition to the finish state. + RAPIDJSON_ASSERT(d == IterativeParsingFinishState); + state_ = d; + + // If StopWhenDone is not set... + if (!(parseFlags & kParseStopWhenDoneFlag)) { + // ... and extra non-whitespace data is found... + SkipWhitespaceAndComments(is); + if (is.Peek() != '\0') { + // ... this is considered an error. + HandleError(state_, is); + return false; + } + } + + // Success! We are done! + return true; + } + + // Transition to the new state. + state_ = d; + + // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now. + if (!IsIterativeParsingDelimiterState(n)) + return true; + } + + // We reached the end of file. + stack_.Clear(); + + if (state_ != IterativeParsingFinishState) { + HandleError(state_, is); + return false; + } + + return true; + } + + //! Check if token-by-token parsing JSON text is complete + /*! \return Whether the JSON has been fully decoded. + */ + RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const { + return IsIterativeParsingCompleteState(state_); + } + + //! Whether a parse error has occurred in the last parsing. + bool HasParseError() const { return parseResult_.IsError(); } + + //! Get the \ref ParseErrorCode of last parsing. + ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); } + + //! Get the position of last parsing error in input, 0 otherwise. + size_t GetErrorOffset() const { return parseResult_.Offset(); } + +protected: + void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); } + +private: + // Prohibit copy constructor & assignment operator. + GenericReader(const GenericReader&); + GenericReader& operator=(const GenericReader&); + + void ClearStack() { stack_.Clear(); } + + // clear stack on any exit from ParseStream, e.g. due to exception + struct ClearStackOnExit { + explicit ClearStackOnExit(GenericReader& r) : r_(r) {} + ~ClearStackOnExit() { r_.ClearStack(); } + private: + GenericReader& r_; + ClearStackOnExit(const ClearStackOnExit&); + ClearStackOnExit& operator=(const ClearStackOnExit&); + }; + + template + void SkipWhitespaceAndComments(InputStream& is) { + SkipWhitespace(is); + + if (parseFlags & kParseCommentsFlag) { + while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) { + if (Consume(is, '*')) { + while (true) { + if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) + RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + else if (Consume(is, '*')) { + if (Consume(is, '/')) + break; + } + else + is.Take(); + } + } + else if (RAPIDJSON_LIKELY(Consume(is, '/'))) + while (is.Peek() != '\0' && is.Take() != '\n') {} + else + RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + + SkipWhitespace(is); + } + } + } + + // Parse object: { string : value, ... } + template + void ParseObject(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == '{'); + is.Take(); // Skip '{' + + if (RAPIDJSON_UNLIKELY(!handler.StartObject())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, '}')) { + if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + + for (SizeType memberCount = 0;;) { + if (RAPIDJSON_UNLIKELY(is.Peek() != '"')) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); + + ParseString(is, handler, true); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (RAPIDJSON_UNLIKELY(!Consume(is, ':'))) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ++memberCount; + + switch (is.Peek()) { + case ',': + is.Take(); + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + break; + case '}': + is.Take(); + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + default: + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy + } + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == '}') { + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); + return; + } + } + } + } + + // Parse array: [ value, ... ] + template + void ParseArray(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == '['); + is.Take(); // Skip '[' + + if (RAPIDJSON_UNLIKELY(!handler.StartArray())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, ']')) { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + + for (SizeType elementCount = 0;;) { + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ++elementCount; + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, ',')) { + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + } + else if (Consume(is, ']')) { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == ']') { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); + return; + } + } + } + } + + template + void ParseNull(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 'n'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) { + if (RAPIDJSON_UNLIKELY(!handler.Null())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template + void ParseTrue(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 't'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) { + if (RAPIDJSON_UNLIKELY(!handler.Bool(true))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template + void ParseFalse(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 'f'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) { + if (RAPIDJSON_UNLIKELY(!handler.Bool(false))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template + RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) { + if (RAPIDJSON_LIKELY(is.Peek() == expect)) { + is.Take(); + return true; + } + else + return false; + } + + // Helper function to parse four hexadecimal digits in \uXXXX in ParseString(). + template + unsigned ParseHex4(InputStream& is, size_t escapeOffset) { + unsigned codepoint = 0; + for (int i = 0; i < 4; i++) { + Ch c = is.Peek(); + codepoint <<= 4; + codepoint += static_cast(c); + if (c >= '0' && c <= '9') + codepoint -= '0'; + else if (c >= 'A' && c <= 'F') + codepoint -= 'A' - 10; + else if (c >= 'a' && c <= 'f') + codepoint -= 'a' - 10; + else { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0); + } + is.Take(); + } + return codepoint; + } + + template + class StackStream { + public: + typedef CharType Ch; + + StackStream(internal::Stack& stack) : stack_(stack), length_(0) {} + RAPIDJSON_FORCEINLINE void Put(Ch c) { + *stack_.template Push() = c; + ++length_; + } + + RAPIDJSON_FORCEINLINE void* Push(SizeType count) { + length_ += count; + return stack_.template Push(count); + } + + size_t Length() const { return length_; } + + Ch* Pop() { + return stack_.template Pop(length_); + } + + private: + StackStream(const StackStream&); + StackStream& operator=(const StackStream&); + + internal::Stack& stack_; + SizeType length_; + }; + + // Parse string and generate String event. Different code paths for kParseInsituFlag. + template + void ParseString(InputStream& is, Handler& handler, bool isKey = false) { + internal::StreamLocalCopy copy(is); + InputStream& s(copy.s); + + RAPIDJSON_ASSERT(s.Peek() == '\"'); + s.Take(); // Skip '\"' + + bool success = false; + if (parseFlags & kParseInsituFlag) { + typename InputStream::Ch *head = s.PutBegin(); + ParseStringToStream(s, s); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + size_t length = s.PutEnd(head) - 1; + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + const typename TargetEncoding::Ch* const str = reinterpret_cast(head); + success = (isKey ? handler.Key(str, SizeType(length), false, false) : handler.String(str, SizeType(length), false, false)); + } + else { + // + // See if it's all non-escaped chars that don't require copying. + // + SizeType length; + const typename TargetEncoding::Ch* str = s.is_.src_; + if (ParseUnescapedString(s, length)) { + success = (isKey ? handler.Key(str, length, true, true) : handler.String(str, length, true, true)); + } else { + StackStream stackStream(stack_); + ParseStringToStream(s, stackStream); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + SizeType length = static_cast(stackStream.Length()) - 1; + const typename TargetEncoding::Ch *const str = stackStream.Pop(); + success = (isKey ? handler.Key(str, length, true, false) : handler.String(str, length, true, false)); + } + } + if (RAPIDJSON_UNLIKELY(!success)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); + } + + // Parse string to an output is + // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation. + template + RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) { +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + static const char escape[256] = { + Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/', + Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, + 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, + 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 + }; +#undef Z16 +//!@endcond + + for (;;) { + // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation. + if (!(parseFlags & kParseValidateEncodingFlag)) + ScanCopyUnescapedString(is, os); + + Ch c = is.Peek(); + if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape + size_t escapeOffset = is.Tell(); // For invalid escaping, report the initial '\\' as error offset + is.Take(); + Ch e = is.Peek(); + if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast(e)])) { + is.Take(); + os.Put(static_cast(escape[static_cast(e)])); + } + else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe + is.Take(); + os.Put('\''); + } + else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode + is.Take(); + unsigned codepoint = ParseHex4(is, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) { + // high surrogate, check if followed by valid low surrogate + if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) { + // Handle UTF-16 surrogate pair + if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + unsigned codepoint2 = ParseHex4(is, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } + // single low surrogate + else + { + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + } + } + TEncoding::Encode(os, codepoint); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset); + } + else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote + is.Take(); + os.Put('\0'); // null-terminate the string + return; + } + else if (RAPIDJSON_UNLIKELY(static_cast(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + if (c == '\0') + RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell()); + else + RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell()); + } + else { + size_t offset = is.Tell(); + if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ? + !Transcoder::Validate(is, os) : + !Transcoder::Transcode(is, os)))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset); + } + } + } + + template + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) { + // Do nothing for generic version + } + + template + static bool ParseUnescapedString(InputStream& is, SizeType& len) { + InsituStringStream s(const_cast(is.is_.src_)); + s.PutBegin(); // Even though we don't put, this makes an assert in SkipUnescapedString happy + SkipUnescapedString(s); + if (s.Peek() != '"') { + // Parse failed, there's either an escape OR a premature end of string + return false; + } + len = s.src_ - is.is_.src_; + s.Take(); // Skip trailing " + is.is_.src_ = s.src_; + return true; + } + +#if (defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)) && !defined(__SANITIZE_ADDRESS__) + // StringStream -> StackStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream& os) { + const char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = p; + return; + } + else + os.Put(*p++); + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + SizeType length; + #ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; + #else + length = static_cast(__builtin_ffs(r) - 1); + #endif + if (length != 0) { + char* q = reinterpret_cast(os.Push(length)); + for (size_t i = 0; i < length; i++) + q[i] = p[i]; + + p += length; + } + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s); + } + + is.src_ = p; + } + + // InsituStringStream -> InsituStringStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { + RAPIDJSON_ASSERT(&is == &os); + (void)os; + + if (is.src_ == is.dst_) { + SkipUnescapedString(is); + return; + } + + char* p = is.src_; + char *q = is.dst_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = p; + is.dst_ = q; + return; + } + else + *q++ = *p++; + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (;; p += 16, q += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast(__builtin_ffs(r) - 1); +#endif + for (const char* pend = p + length; p != pend; ) + *q++ = *p++; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s); + } + + is.src_ = p; + is.dst_ = q; + } + + // When read/write pointers are the same for insitu stream, just skip unescaped characters + static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { + RAPIDJSON_ASSERT(is.src_ == is.dst_); + char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + for (; p != nextAligned; p++) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = is.dst_ = p; + return; + } + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast(__builtin_ffs(r) - 1); +#endif + p += length; + break; + } + } + + is.src_ = is.dst_ = p; + } +#elif defined(RAPIDJSON_NEON) && !defined(__SANITIZE_ADDRESS__) + // StringStream -> StackStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream& os) { + const char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = p; + return; + } + else + os.Put(*p++); + + // The rest of string using SIMD + const uint8x16_t s0 = vmovq_n_u8('"'); + const uint8x16_t s1 = vmovq_n_u8('\\'); + const uint8x16_t s2 = vmovq_n_u8('\b'); + const uint8x16_t s3 = vmovq_n_u8(32); + + for (;; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast(p)); + uint8x16_t x = vceqq_u8(s, s0); + x = vorrq_u8(x, vceqq_u8(s, s1)); + x = vorrq_u8(x, vceqq_u8(s, s2)); + x = vorrq_u8(x, vcltq_u8(s, s3)); + + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract + + SizeType length = 0; + bool escaped = false; + if (low == 0) { + if (high != 0) { + uint32_t lz = internal::clzll(high); + length = 8 + (lz >> 3); + escaped = true; + } + } else { + uint32_t lz = internal::clzll(low); + length = lz >> 3; + escaped = true; + } + if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped + if (length != 0) { + char* q = reinterpret_cast(os.Push(length)); + for (size_t i = 0; i < length; i++) + q[i] = p[i]; + + p += length; + } + break; + } + vst1q_u8(reinterpret_cast(os.Push(16)), s); + } + + is.src_ = p; + } + + // InsituStringStream -> InsituStringStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { + RAPIDJSON_ASSERT(&is == &os); + (void)os; + + if (is.src_ == is.dst_) { + SkipUnescapedString(is); + return; + } + + char* p = is.src_; + char *q = is.dst_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = p; + is.dst_ = q; + return; + } + else + *q++ = *p++; + + // The rest of string using SIMD + const uint8x16_t s0 = vmovq_n_u8('"'); + const uint8x16_t s1 = vmovq_n_u8('\\'); + const uint8x16_t s2 = vmovq_n_u8('\b'); + const uint8x16_t s3 = vmovq_n_u8(32); + + for (;; p += 16, q += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast(p)); + uint8x16_t x = vceqq_u8(s, s0); + x = vorrq_u8(x, vceqq_u8(s, s1)); + x = vorrq_u8(x, vceqq_u8(s, s2)); + x = vorrq_u8(x, vcltq_u8(s, s3)); + + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract + + SizeType length = 0; + bool escaped = false; + if (low == 0) { + if (high != 0) { + uint32_t lz = internal::clzll(high); + length = 8 + (lz >> 3); + escaped = true; + } + } else { + uint32_t lz = internal::clzll(low); + length = lz >> 3; + escaped = true; + } + if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped + for (const char* pend = p + length; p != pend; ) { + *q++ = *p++; + } + break; + } + vst1q_u8(reinterpret_cast(q), s); + } + + is.src_ = p; + is.dst_ = q; + } + + // When read/write pointers are the same for insitu stream, just skip unescaped characters + static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { + RAPIDJSON_ASSERT(is.src_ == is.dst_); + char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + for (; p != nextAligned; p++) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = is.dst_ = p; + return; + } + + // The rest of string using SIMD + const uint8x16_t s0 = vmovq_n_u8('"'); + const uint8x16_t s1 = vmovq_n_u8('\\'); + const uint8x16_t s2 = vmovq_n_u8('\b'); + const uint8x16_t s3 = vmovq_n_u8(32); + + for (;; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast(p)); + uint8x16_t x = vceqq_u8(s, s0); + x = vorrq_u8(x, vceqq_u8(s, s1)); + x = vorrq_u8(x, vceqq_u8(s, s2)); + x = vorrq_u8(x, vcltq_u8(s, s3)); + + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract + + if (low == 0) { + if (high != 0) { + uint32_t lz = internal::clzll(high); + p += 8 + (lz >> 3); + break; + } + } else { + uint32_t lz = internal::clzll(low); + p += lz >> 3; + break; + } + } + + is.src_ = is.dst_ = p; + } +#else + // When read/write pointers are the same for insitu stream, just skip unescaped characters + static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { + RAPIDJSON_ASSERT(is.src_ == is.dst_); + char* p = is.src_; + while (1) { + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || + RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = is.dst_ = p; + return; + } + p++; + } + } +#endif // RAPIDJSON_NEON + + template + class NumberStream; + + template + class NumberStream { + public: + typedef typename InputStream::Ch Ch; + + NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; } + + RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } + RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } + RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } + RAPIDJSON_FORCEINLINE void Push(char) {} + + size_t Tell() { return is.Tell(); } + size_t Length() { return 0; } + const StackCharacter* Pop() { return 0; } + + protected: + NumberStream& operator=(const NumberStream&); + + InputStream& is; + }; + + template + class NumberStream : public NumberStream { + typedef NumberStream Base; + public: + NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {} + + RAPIDJSON_FORCEINLINE Ch TakePush() { + stackStream.Put(static_cast(Base::is.Peek())); + return Base::is.Take(); + } + + RAPIDJSON_FORCEINLINE void Push(StackCharacter c) { + stackStream.Put(c); + } + + size_t Length() { return stackStream.Length(); } + + const StackCharacter* Pop() { + stackStream.Put('\0'); + return stackStream.Pop(); + } + + private: + StackStream stackStream; + }; + + template + class NumberStream : public NumberStream { + typedef NumberStream Base; + public: + NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {} + + RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); } + }; + + template + void ParseNumber(InputStream& is, Handler& handler) { + typedef typename internal::SelectIf, typename TargetEncoding::Ch, char>::Type NumberCharacter; + + internal::StreamLocalCopy copy(is); + NumberStream s(*this, copy.s); + + size_t startOffset = s.Tell(); + double d = 0.0; + bool useNanOrInf = false; + + // Parse minus + bool minus = false; + if (s.Peek() == '-') { + minus = true; + s.TakePush(); + } + + // Parse int: zero / ( digit1-9 *DIGIT ) + unsigned i = 0; + uint64_t i64 = 0; + bool use64bit = false; + int significandDigit = 0; + if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) { + i = 0; + s.TakePush(); + } + else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) { + i = static_cast(s.TakePush() - '0'); + + if (minus) + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648 + if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) { + i64 = i; + use64bit = true; + break; + } + } + i = i * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + else + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295 + if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) { + i64 = i; + use64bit = true; + break; + } + } + i = i * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + } + // Parse NaN or Infinity here + else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) { + if (Consume(s, 'N')) { + if (Consume(s, 'a') && Consume(s, 'N')) { + d = std::numeric_limits::quiet_NaN(); + useNanOrInf = true; + } + } + else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) { + if (Consume(s, 'n') && Consume(s, 'f')) { + d = (minus ? -std::numeric_limits::infinity() : std::numeric_limits::infinity()); + useNanOrInf = true; + + if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n') + && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) { + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + } + } + } + + if (RAPIDJSON_UNLIKELY(!useNanOrInf)) { + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + } + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + + // Parse 64bit int + bool useDouble = false; + if (use64bit) { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808 + if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || + s.Peek() > (minus ? '8' : '7'))) { + d = static_cast(i64); + useDouble = true; + break; + } + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } +#if 0 + else + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615 + if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) { + d = static_cast(i64); + useDouble = true; + break; + } + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } +#endif + } + + // Force double for big integer + if (useDouble) { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + d = d * 10 + (s.TakePush() - '0'); + } + } + + // Parse frac = decimal-point 1*DIGIT + int expFrac = 0; + if (s.Peek() == '.') { + s.TakePush(); + + if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9'))) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell()); + + if (!useDouble) { +#if RAPIDJSON_64BIT + // Use i64 to store significand in 64-bit architecture + if (!use64bit) + i64 = i; + + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path + break; + else { + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + --expFrac; + if (i64 != 0) + significandDigit++; + } + } + + d = static_cast(i64); +#else + // Use double to store significand in 32-bit architecture + d = static_cast(use64bit ? i64 : i); +#endif + useDouble = true; + } + + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (significandDigit < 17) { + s.TakePush(); + --expFrac; + if (RAPIDJSON_LIKELY(d > 0.0)) + significandDigit++; + } + else + s.TakePush(); + } + } + + // Parse exp = e [ minus / plus ] 1*DIGIT + int exp = 0; + if (s.Peek() == 'e' || s.Peek() == 'E') { + if (!useDouble) { + d = static_cast(use64bit ? i64 : i); + useDouble = true; + } + s.TakePush(); + + bool expMinus = false; + if (s.Peek() == '+') { + s.TakePush(); + } else if (s.Peek() == '-') { + s.TakePush(); + expMinus = true; + } + + if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = static_cast(s.Peek() - '0'); + s.TakePush(); + if (expMinus) { + // (exp + expFrac) must not underflow int => we're detecting when -exp gets + // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into + // underflow territory): + // + // -(exp * 10 + 9) + expFrac >= INT_MIN + // <=> exp <= (expFrac - INT_MIN - 9) / 10 + RAPIDJSON_ASSERT(expFrac <= 0); + int maxExp = (expFrac + 2147483639) / 10; + + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = exp * 10 + static_cast(s.Peek() - '0'); + s.TakePush(); + if (RAPIDJSON_UNLIKELY(exp > maxExp)) { + while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent + s.TakePush(); + } + } + } + else { // positive exp + int maxExp = 308 - expFrac; + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = exp * 10 + static_cast(s.Peek() - '0'); + s.TakePush(); + if (RAPIDJSON_UNLIKELY(exp > maxExp)) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); + } + } + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell()); + + if (expMinus) + exp = -exp; + } + + // Finish parsing, call event according to the type of number. + bool cont = true; + + if (parseFlags & kParseNumbersAsStringsFlag) { + if (parseFlags & kParseInsituFlag) { + s.Pop(); // Pop stack no matter if it will be used or not. + typename InputStream::Ch* head = is.PutBegin(); + const size_t length = s.Tell() - startOffset; + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + // unable to insert the \0 character here, it will erase the comma after this number + const typename TargetEncoding::Ch* const str = reinterpret_cast(head); + cont = handler.RawNumber(str, SizeType(length), false); + } + else { + SizeType numCharsToCopy = static_cast(s.Length()); + GenericStringStream > srcStream(s.Pop()); + StackStream dstStream(stack_); + while (numCharsToCopy--) { + Transcoder, TargetEncoding>::Transcode(srcStream, dstStream); + } + dstStream.Put('\0'); + const typename TargetEncoding::Ch* str = dstStream.Pop(); + const SizeType length = static_cast(dstStream.Length()) - 1; + cont = handler.RawNumber(str, SizeType(length), true); + } + } + else { + if (useDouble || useNanOrInf) { + SizeType numCharsToCopy = static_cast(s.Length()); + GenericStringStream > srcStream(s.Pop()); + StackStream dstStream(stack_); + while (numCharsToCopy--) { + Transcoder, TargetEncoding>::Transcode(srcStream, dstStream); + } + dstStream.Put('\0'); + const typename TargetEncoding::Ch* str = dstStream.Pop(); + const SizeType length = static_cast(dstStream.Length()) - 1; + cont = handler.RawNumber(str, SizeType(length), true); + } + else { + if (use64bit) { + if (minus) + cont = handler.Int64(static_cast(~i64 + 1)); + else + cont = handler.Uint64(i64); + } + else { + if (minus) + cont = handler.Int(static_cast(~i + 1)); + else + cont = handler.Uint(i); + } + } + } + if (RAPIDJSON_UNLIKELY(!cont)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset); + } + + // Parse any JSON value + template + void ParseValue(InputStream& is, Handler& handler) { + switch (is.Peek()) { + case 'n': ParseNull (is, handler); break; + case 't': ParseTrue (is, handler); break; + case 'f': ParseFalse (is, handler); break; + case '"': ParseString(is, handler); break; + case '{': ParseObject(is, handler); break; + case '[': ParseArray (is, handler); break; + default : + ParseNumber(is, handler); + break; + + } + } + + // Iterative Parsing + + // States + enum IterativeParsingState { + IterativeParsingFinishState = 0, // sink states at top + IterativeParsingErrorState, // sink states at top + IterativeParsingStartState, + + // Object states + IterativeParsingObjectInitialState, + IterativeParsingMemberKeyState, + IterativeParsingMemberValueState, + IterativeParsingObjectFinishState, + + // Array states + IterativeParsingArrayInitialState, + IterativeParsingElementState, + IterativeParsingArrayFinishState, + + // Single value state + IterativeParsingValueState, + + // Delimiter states (at bottom) + IterativeParsingElementDelimiterState, + IterativeParsingMemberDelimiterState, + IterativeParsingKeyValueDelimiterState, + + cIterativeParsingStateCount + }; + + // Tokens + enum Token { + LeftBracketToken = 0, + RightBracketToken, + + LeftCurlyBracketToken, + RightCurlyBracketToken, + + CommaToken, + ColonToken, + + StringToken, + FalseToken, + TrueToken, + NullToken, + NumberToken, + + kTokenCount + }; + + RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const { + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define N NumberToken +#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N + // Maps from ASCII to Token + static const unsigned char tokenMap[256] = { + N16, // 00~0F + N16, // 10~1F + N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F + N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F + N16, // 40~4F + N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F + N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F + N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F + N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF + }; +#undef N +#undef N16 +//!@endcond + + if (sizeof(Ch) == 1 || static_cast(c) < 256) + return static_cast(tokenMap[static_cast(c)]); + else + return NumberToken; + } + + RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const { + // current state x one lookahead token -> new state + static const char G[cIterativeParsingStateCount][kTokenCount] = { + // Finish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Error(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Start + { + IterativeParsingArrayInitialState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingValueState, // String + IterativeParsingValueState, // False + IterativeParsingValueState, // True + IterativeParsingValueState, // Null + IterativeParsingValueState // Number + }, + // ObjectInitial + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberKey + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingKeyValueDelimiterState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberValue + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingMemberDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ObjectFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // ArrayInitial + { + IterativeParsingArrayInitialState, // Left bracket(push Element state) + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // Element + { + IterativeParsingErrorState, // Left bracket + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingElementDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ArrayFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Single Value (sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // ElementDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push Element state) + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // MemberDelimiter + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // KeyValueDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberValueState, // String + IterativeParsingMemberValueState, // False + IterativeParsingMemberValueState, // True + IterativeParsingMemberValueState, // Null + IterativeParsingMemberValueState // Number + }, + }; // End of G + + return static_cast(G[state][token]); + } + + // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). + // May return a new state on state pop. + template + RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { + (void)token; + + switch (dst) { + case IterativeParsingErrorState: + return dst; + + case IterativeParsingObjectInitialState: + case IterativeParsingArrayInitialState: + { + // Push the state(Element or MemeberValue) if we are nested in another array or value of member. + // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. + IterativeParsingState n = src; + if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) + n = IterativeParsingElementState; + else if (src == IterativeParsingKeyValueDelimiterState) + n = IterativeParsingMemberValueState; + // Push current state. + *stack_.template Push(1) = n; + // Initialize and push the member/element count. + *stack_.template Push(1) = 0; + // Call handler + bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return dst; + } + } + + case IterativeParsingMemberKeyState: + ParseString(is, handler, true); + if (HasParseError()) + return IterativeParsingErrorState; + else + return dst; + + case IterativeParsingKeyValueDelimiterState: + RAPIDJSON_ASSERT(token == ColonToken); + is.Take(); + return dst; + + case IterativeParsingMemberValueState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return dst; + + case IterativeParsingElementState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return dst; + + case IterativeParsingMemberDelimiterState: + case IterativeParsingElementDelimiterState: + is.Take(); + // Update member/element count. + *stack_.template Top() = *stack_.template Top() + 1; + return dst; + + case IterativeParsingObjectFinishState: + { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); + return IterativeParsingErrorState; + } + // Get member count. + SizeType c = *stack_.template Pop(1); + // If the object is not empty, count the last member. + if (src == IterativeParsingMemberValueState) + ++c; + // Restore the state. + IterativeParsingState n = static_cast(*stack_.template Pop(1)); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + bool hr = handler.EndObject(c); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return n; + } + } + + case IterativeParsingArrayFinishState: + { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell()); + return IterativeParsingErrorState; + } + // Get element count. + SizeType c = *stack_.template Pop(1); + // If the array is not empty, count the last element. + if (src == IterativeParsingElementState) + ++c; + // Restore the state. + IterativeParsingState n = static_cast(*stack_.template Pop(1)); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + bool hr = handler.EndArray(c); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return n; + } + } + + default: + // This branch is for IterativeParsingValueState actually. + // Use `default:` rather than + // `case IterativeParsingValueState:` is for code coverage. + + // The IterativeParsingStartState is not enumerated in this switch-case. + // It is impossible for that case. And it can be caught by following assertion. + + // The IterativeParsingFinishState is not enumerated in this switch-case either. + // It is a "derivative" state which cannot triggered from Predict() directly. + // Therefore it cannot happen here. And it can be caught by following assertion. + RAPIDJSON_ASSERT(dst == IterativeParsingValueState); + + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return IterativeParsingFinishState; + } + } + + template + void HandleError(IterativeParsingState src, InputStream& is) { + if (HasParseError()) { + // Error flag has been set. + return; + } + + switch (src) { + case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return; + case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return; + case IterativeParsingObjectInitialState: + case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return; + case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return; + case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return; + case IterativeParsingKeyValueDelimiterState: + case IterativeParsingArrayInitialState: + case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return; + default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; + } + } + + RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const { + return s >= IterativeParsingElementDelimiterState; + } + + RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const { + return s <= IterativeParsingErrorState; + } + + template + ParseResult IterativeParse(InputStream& is, Handler& handler) { + parseResult_.Clear(); + ClearStackOnExit scope(*this); + IterativeParsingState state = IterativeParsingStartState; + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + while (is.Peek() != '\0') { + Token t = Tokenize(is.Peek()); + IterativeParsingState n = Predict(state, t); + IterativeParsingState d = Transit(state, t, n, is, handler); + + if (d == IterativeParsingErrorState) { + HandleError(state, is); + break; + } + + state = d; + + // Do not further consume streams if a root JSON has been parsed. + if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState) + break; + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + + // Handle the end of file. + if (state != IterativeParsingFinishState) + HandleError(state, is); + + return parseResult_; + } + + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. + internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. + ParseResult parseResult_; + IterativeParsingState state_; +}; // class GenericReader + +//! Reader with UTF8 encoding and default allocator. +typedef GenericReader, UTF8<> > Reader; + +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) || defined(_MSC_VER) +RAPIDJSON_DIAG_POP +#endif + + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_READER_H_ diff --git a/src/rapidjson/stringbuffer.h b/src/rapidjson/stringbuffer.h new file mode 100644 index 0000000..0ab6c0d --- /dev/null +++ b/src/rapidjson/stringbuffer.h @@ -0,0 +1,119 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_STRINGBUFFER_H_ +#define RAPIDJSON_STRINGBUFFER_H_ + +#include +#include + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS +#include // std::move +#endif + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory output stream. +/*! + \tparam Encoding Encoding of the stream. + \tparam Allocator type for allocating memory buffer. + \note implements Stream concept +*/ +template +class GenericStringBuffer { +public: + typedef typename Encoding::Ch Ch; + + GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {} + GenericStringBuffer& operator=(GenericStringBuffer&& rhs) { + if (&rhs != this) + stack_ = std::move(rhs.stack_); + return *this; + } +#endif + + void Put(Ch c) { *stack_.template Push() = c; } + void PutUnsafe(Ch c) { *stack_.template PushUnsafe() = c; } + void Flush() {} + + void Clear() { stack_.Clear(); } + void ShrinkToFit() { + // Push and pop a null terminator. This is safe. + *stack_.template Push() = '\0'; + stack_.ShrinkToFit(); + stack_.template Pop(1); + } + + void Reserve(size_t count) { stack_.template Reserve(count); } + Ch* Push(size_t count) { return stack_.template Push(count); } + Ch* PushUnsafe(size_t count) { return stack_.template PushUnsafe(count); } + void Pop(size_t count) { stack_.template Pop(count); } + + const Ch* GetString() const { + // Push and pop a null terminator. This is safe. + *stack_.template Push() = '\0'; + stack_.template Pop(1); + + return stack_.template Bottom(); + } + + //! Get the size of string in bytes in the string buffer. + size_t GetSize() const { return stack_.GetSize(); } + + //! Get the length of string in Ch in the string buffer. + size_t GetLength() const { return stack_.GetSize() / sizeof(Ch); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack stack_; + +private: + // Prohibit copy constructor & assignment operator. + GenericStringBuffer(const GenericStringBuffer&); + GenericStringBuffer& operator=(const GenericStringBuffer&); +}; + +//! String buffer with UTF8 encoding +typedef GenericStringBuffer > StringBuffer; + +template +inline void PutReserve(GenericStringBuffer& stream, size_t count) { + stream.Reserve(count); +} + +template +inline void PutUnsafe(GenericStringBuffer& stream, typename Encoding::Ch c) { + stream.PutUnsafe(c); +} + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(GenericStringBuffer >& stream, char c, size_t n) { + std::memset(stream.stack_.Push(n), c, n * sizeof(c)); +} + +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_STRINGBUFFER_H_ diff --git a/src/rapidjson/writer.h b/src/rapidjson/writer.h new file mode 100644 index 0000000..53d5184 --- /dev/null +++ b/src/rapidjson/writer.h @@ -0,0 +1,730 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_WRITER_H_ +#define RAPIDJSON_WRITER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "stringbuffer.h" +#include +#include // placement new + +#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) +#include +#pragma intrinsic(_BitScanForward) +#endif +#ifdef RAPIDJSON_SSE42 +#include +#elif defined(RAPIDJSON_SSE2) +#include +#elif defined(RAPIDJSON_NEON) +#include +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(unreachable-code) +RAPIDJSON_DIAG_OFF(c++98-compat) +#elif defined(_MSC_VER) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// WriteFlag + +/*! \def RAPIDJSON_WRITE_DEFAULT_FLAGS + \ingroup RAPIDJSON_CONFIG + \brief User-defined kWriteDefaultFlags definition. + + User can define this as any \c WriteFlag combinations. +*/ +#ifndef RAPIDJSON_WRITE_DEFAULT_FLAGS +#define RAPIDJSON_WRITE_DEFAULT_FLAGS kWriteNoFlags +#endif + +template void Put(StringBuffer& os, typename StringBuffer::Ch c) { os.Put(c); } + +//! Combination of writeFlags +enum WriteFlag { + kWriteNoFlags = 0, //!< No flags are set. + kWriteValidateEncodingFlag = 1, //!< Validate encoding of JSON strings. + kWriteNanAndInfFlag = 2, //!< Allow writing of Infinity, -Infinity and NaN. + kWriteDefaultFlags = RAPIDJSON_WRITE_DEFAULT_FLAGS //!< Default write flags. Can be customized by defining RAPIDJSON_WRITE_DEFAULT_FLAGS +}; + +//! JSON writer +/*! Writer implements the concept Handler. + It generates JSON text by events to an output os. + + User may programmatically calls the functions of a writer to generate JSON text. + + On the other side, a writer can also be passed to objects that generates events, + + for example Reader::Parse() and Document::Accept(). + + \tparam OutputStream Type of output stream. + \tparam SourceEncoding Encoding of source string. + \tparam TargetEncoding Encoding of output stream. + \tparam StackAllocator Type of allocator for allocating memory of stack. + \note implements Handler concept +*/ +template, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags> +class Writer { +public: + typedef typename SourceEncoding::Ch Ch; + + static const int kDefaultMaxDecimalPlaces = 324; + + //! Constructor + /*! \param os Output stream. + \param stackAllocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of stack. + */ + explicit + Writer(OutputStream& os, StackAllocator* stackAllocator = 0, size_t levelDepth = kDefaultLevelDepth) : + os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {} + + explicit + Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : + os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Writer(Writer&& rhs) : + os_(rhs.os_), level_stack_(std::move(rhs.level_stack_)), maxDecimalPlaces_(rhs.maxDecimalPlaces_), hasRoot_(rhs.hasRoot_) { + rhs.os_ = 0; + } +#endif + + //! Reset the writer with a new stream. + /*! + This function reset the writer with a new stream and default settings, + in order to make a Writer object reusable for output multiple JSONs. + + \param os New output stream. + \code + Writer writer(os1); + writer.StartObject(); + // ... + writer.EndObject(); + + writer.Reset(os2); + writer.StartObject(); + // ... + writer.EndObject(); + \endcode + */ + void Reset(OutputStream& os) { + os_ = &os; + hasRoot_ = false; + level_stack_.Clear(); + } + + //! Checks whether the output is a complete JSON. + /*! + A complete JSON has a complete root object or array. + */ + bool IsComplete() const { + return hasRoot_ && level_stack_.Empty(); + } + + int GetMaxDecimalPlaces() const { + return maxDecimalPlaces_; + } + + //! Sets the maximum number of decimal places for double output. + /*! + This setting truncates the output with specified number of decimal places. + + For example, + + \code + writer.SetMaxDecimalPlaces(3); + writer.StartArray(); + writer.Double(0.12345); // "0.123" + writer.Double(0.0001); // "0.0" + writer.Double(1.234567890123456e30); // "1.234567890123456e30" (do not truncate significand for positive exponent) + writer.Double(1.23e-4); // "0.0" (do truncate significand for negative exponent) + writer.EndArray(); + \endcode + + The default setting does not truncate any decimal places. You can restore to this setting by calling + \code + writer.SetMaxDecimalPlaces(Writer::kDefaultMaxDecimalPlaces); + \endcode + */ + void SetMaxDecimalPlaces(int maxDecimalPlaces) { + maxDecimalPlaces_ = maxDecimalPlaces; + } + + /*!@name Implementation of Handler + \see Handler + */ + //@{ + + bool Null() { Prefix(kNullType); return EndValue(WriteNull()); } + bool Bool(bool b) { Prefix(b ? kTrueType : kFalseType); return EndValue(WriteBool(b)); } + bool Int(int i) { Prefix(kNumberType); return EndValue(WriteInt(i)); } + bool Uint(unsigned u) { Prefix(kNumberType); return EndValue(WriteUint(u)); } + bool Int64(int64_t i64) { Prefix(kNumberType); return EndValue(WriteInt64(i64)); } + bool Uint64(uint64_t u64) { Prefix(kNumberType); return EndValue(WriteUint64(u64)); } + + //! Writes the given \c double value to the stream + /*! + \param d The value to be written. + \return Whether it is succeed. + */ + bool Double(double d) { Prefix(kNumberType); return EndValue(WriteDouble(d)); } + + bool RawNumber(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + Prefix(kNumberType); + return EndValue(WriteDouble(str, length)); + } + + bool String(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + Prefix(kStringType); + return EndValue(WriteString(str, length)); + } + +#if RAPIDJSON_HAS_STDSTRING + bool String(const std::basic_string& str) { + return String(str.data(), SizeType(str.size())); + } +#endif + + bool StartObject() { + Prefix(kObjectType); + new (level_stack_.template Push()) Level(false); + return WriteStartObject(); + } + + bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); } + +#if RAPIDJSON_HAS_STDSTRING + bool Key(const std::basic_string& str) + { + return Key(str.data(), SizeType(str.size())); + } +#endif + + bool EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object + RAPIDJSON_ASSERT(!level_stack_.template Top()->inArray); // currently inside an Array, not Object + RAPIDJSON_ASSERT(0 == level_stack_.template Top()->valueCount % 2); // Object has a Key without a Value + level_stack_.template Pop(1); + return EndValue(WriteEndObject()); + } + + bool StartArray() { + Prefix(kArrayType); + new (level_stack_.template Push()) Level(true); + return WriteStartArray(); + } + + bool EndArray(SizeType elementCount = 0) { + (void)elementCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + return EndValue(WriteEndArray()); + } + //@} + + /*! @name Convenience extensions */ + //@{ + + //! Simpler but slower overload. + bool String(const Ch* const& str) { return String(str, internal::StrLen(str)); } + bool Key(const Ch* const& str) { return Key(str, internal::StrLen(str)); } + + //@} + + //! Write a raw JSON value. + /*! + For user to write a stringified JSON as a value. + + \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range. + \param length Length of the json. + \param type Type of the root of json. + */ + bool RawValue(const Ch* json, size_t length, Type type) { + RAPIDJSON_ASSERT(json != 0); + Prefix(type); + return EndValue(WriteRawValue(json, length)); + } + + //! Flush the output stream. + /*! + Allows the user to flush the output stream immediately. + */ + void Flush() { + os_->Flush(); + } + + static const size_t kDefaultLevelDepth = 32; + +protected: + //! Information for each nested level + struct Level { + Level(bool inArray_) : valueCount(0), inArray(inArray_) {} + size_t valueCount; //!< number of values in this level + bool inArray; //!< true if in array, otherwise in object + }; + + bool WriteNull() { + PutReserve(*os_, 4); + PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true; + } + + bool WriteBool(bool b) { + if (b) { + PutReserve(*os_, 4); + PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'r'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'e'); + } + else { + PutReserve(*os_, 5); + PutUnsafe(*os_, 'f'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 's'); PutUnsafe(*os_, 'e'); + } + return true; + } + + bool WriteInt(int i) { + char buffer[11]; + const char* end = internal::i32toa(i, buffer); + PutReserve(*os_, static_cast(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteUint(unsigned u) { + char buffer[10]; + const char* end = internal::u32toa(u, buffer); + PutReserve(*os_, static_cast(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteInt64(int64_t i64) { + char buffer[21]; + const char* end = internal::i64toa(i64, buffer); + PutReserve(*os_, static_cast(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteUint64(uint64_t u64) { + char buffer[20]; + char* end = internal::u64toa(u64, buffer); + PutReserve(*os_, static_cast(end - buffer)); + for (char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteDouble(double d) { + if (internal::Double(d).IsNanOrInf()) { + if (!(writeFlags & kWriteNanAndInfFlag)) + return false; + if (internal::Double(d).IsNan()) { + PutReserve(*os_, 3); + PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N'); + return true; + } + if (internal::Double(d).Sign()) { + PutReserve(*os_, 9); + PutUnsafe(*os_, '-'); + } + else + PutReserve(*os_, 8); + PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f'); + PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y'); + return true; + } + + char buffer[25]; + char* end = internal::dtoa(d, buffer, maxDecimalPlaces_); + PutReserve(*os_, static_cast(end - buffer)); + for (char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteDouble(const Ch* str, SizeType length) { + PutReserve(*os_, length); + + for (const char* p = str; p!= str+length; ++p) + PutUnsafe(*os_, static_cast(*p)); + + return true; + } + + bool WriteString(const Ch* str, SizeType length) { + static const typename OutputStream::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + static const char escape[256] = { +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00 + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10 + 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 + Z16, Z16, // 30~4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, // 50 + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 // 60~FF +#undef Z16 + }; + + // + // efficiency wants us to PutReserve far in advance + // But that causes memory allocation issues with Chunked Buffer, so don't reserve too far into the future... + // + Put(*os_, '\"'); + GenericStringStream is(str); + while (ScanWriteUnescapedString(is, length)) { + const Ch c = is.Peek(); + if (!TargetEncoding::supportUnicode && static_cast(c) >= 0x80) { + // Unicode escaping + unsigned codepoint; + if (RAPIDJSON_UNLIKELY(!SourceEncoding::Decode(is, &codepoint))) + return false; + PutReserve(*os_, 12); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, 'u'); + if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { + PutUnsafe(*os_, hexDigits[(codepoint >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint ) & 15]); + } + else { + RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF); + // Surrogate pair + unsigned s = codepoint - 0x010000; + unsigned lead = (s >> 10) + 0xD800; + unsigned trail = (s & 0x3FF) + 0xDC00; + PutUnsafe(*os_, hexDigits[(lead >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(lead ) & 15]); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, 'u'); + PutUnsafe(*os_, hexDigits[(trail >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(trail >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(trail >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(trail ) & 15]); + } + } + else if ((sizeof(Ch) == 1 || static_cast(c) < 256) && RAPIDJSON_UNLIKELY(escape[static_cast(c)])) { + is.Take(); + PutReserve(*os_, 6); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, static_cast(escape[static_cast(c)])); + if (escape[static_cast(c)] == 'u') { + PutUnsafe(*os_, '0'); + PutUnsafe(*os_, '0'); + PutUnsafe(*os_, hexDigits[static_cast(c) >> 4]); + PutUnsafe(*os_, hexDigits[static_cast(c) & 0xF]); + } + } + else { + PutReserve(*os_,16); // I think worst case is only 12, but so what..... + if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? + Transcoder::Validate(is, *os_) : + Transcoder::TranscodeUnsafe(is, *os_)))) + return false; + } + } + Put(*os_, '\"'); + return true; + } + + bool ScanWriteUnescapedString(GenericStringStream& is, size_t length) { + return RAPIDJSON_LIKELY(is.Tell() < length); + } + + bool WriteStartObject() { os_->Put('{'); return true; } + bool WriteEndObject() { os_->Put('}'); return true; } + bool WriteStartArray() { os_->Put('['); return true; } + bool WriteEndArray() { os_->Put(']'); return true; } + + bool WriteRawValue(const Ch* json, size_t length) { + PutReserve(*os_, length); + GenericStringStream is(json); + while (RAPIDJSON_LIKELY(is.Tell() < length)) { + RAPIDJSON_ASSERT(is.Peek() != '\0'); + if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? + Transcoder::Validate(is, *os_) : + Transcoder::TranscodeUnsafe(is, *os_)))) + return false; + } + return true; + } + + void Prefix(Type type) { + (void)type; + if (RAPIDJSON_LIKELY(level_stack_.GetSize() != 0)) { // this value is not at root + Level* level = level_stack_.template Top(); + if (level->valueCount > 0) { + if (level->inArray) + os_->Put(','); // add comma if it is not the first element in array + else // in object + os_->Put((level->valueCount % 2 == 0) ? ',' : ':'); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else { + RAPIDJSON_ASSERT(!hasRoot_); // Should only has one and only one root. + hasRoot_ = true; + } + } + + // Flush the value if it is the top level one. + bool EndValue(bool ret) { + if (RAPIDJSON_UNLIKELY(level_stack_.Empty())) // end of json text + Flush(); + return ret; + } + + OutputStream* os_; + internal::Stack level_stack_; + int maxDecimalPlaces_; + bool hasRoot_; + +private: + // Prohibit copy constructor & assignment operator. + Writer(const Writer&); + Writer& operator=(const Writer&); +}; + +// Full specialization for StringStream to prevent memory copying + +template<> +inline bool Writer::WriteInt(int i) { + char *buffer = os_->Push(11); + const char* end = internal::i32toa(i, buffer); + os_->Pop(static_cast(11 - (end - buffer))); + return true; +} + +template<> +inline bool Writer::WriteUint(unsigned u) { + char *buffer = os_->Push(10); + const char* end = internal::u32toa(u, buffer); + os_->Pop(static_cast(10 - (end - buffer))); + return true; +} + +template<> +inline bool Writer::WriteInt64(int64_t i64) { + char *buffer = os_->Push(21); + const char* end = internal::i64toa(i64, buffer); + os_->Pop(static_cast(21 - (end - buffer))); + return true; +} + +template<> +inline bool Writer::WriteUint64(uint64_t u) { + char *buffer = os_->Push(20); + const char* end = internal::u64toa(u, buffer); + os_->Pop(static_cast(20 - (end - buffer))); + return true; +} + +template<> +inline bool Writer::WriteDouble(double d) { + if (internal::Double(d).IsNanOrInf()) { + // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS & kWriteNanAndInfFlag). + if (!(kWriteDefaultFlags & kWriteNanAndInfFlag)) + return false; + if (internal::Double(d).IsNan()) { + PutReserve(*os_, 3); + PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N'); + return true; + } + if (internal::Double(d).Sign()) { + PutReserve(*os_, 9); + PutUnsafe(*os_, '-'); + } + else + PutReserve(*os_, 8); + PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f'); + PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y'); + return true; + } + + char *buffer = os_->Push(25); + char* end = internal::dtoa(d, buffer, maxDecimalPlaces_); + os_->Pop(static_cast(25 - (end - buffer))); + return true; +} + +#if (defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)) && !defined(__SANITIZE_ADDRESS__) +template<> +inline bool Writer::ScanWriteUnescapedString(StringStream& is, size_t length) { + if (length < 16) + return RAPIDJSON_LIKELY(is.Tell() < length); + + if (!RAPIDJSON_LIKELY(is.Tell() < length)) + return false; + + const char* p = is.src_; + const char* end = is.head_ + length; + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + const char* endAligned = reinterpret_cast(reinterpret_cast(end) & static_cast(~15)); + if (nextAligned > end) + return true; + + os_->Reserve(nextAligned - p); + + while (p != nextAligned) + if (*p < 0x20 || *p == '\"' || *p == '\\') { + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); + } + else + os_->PutUnsafe(*p++); + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (; p != endAligned; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + SizeType len; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + len = offset; +#else + len = static_cast(__builtin_ffs(r) - 1); +#endif + char* q = reinterpret_cast(os_->Push(len)); + for (size_t i = 0; i < len; i++) + q[i] = p[i]; + + p += len; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(os_->Push(16)), s); + } + + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); +} +#elif defined(RAPIDJSON_NEON) && !defined(__SANITIZE_ADDRESS__) +template<> +inline bool Writer::ScanWriteUnescapedString(StringStream& is, size_t length) { + if (length < 16) + return RAPIDJSON_LIKELY(is.Tell() < length); + + if (!RAPIDJSON_LIKELY(is.Tell() < length)) + return false; + + const char* p = is.src_; + const char* end = is.head_ + length; + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + const char* endAligned = reinterpret_cast(reinterpret_cast(end) & static_cast(~15)); + if (nextAligned > end) + return true; + + os_->Reserve(nextAligned - p); + + while (p != nextAligned) + if (*p < 0x20 || *p == '\"' || *p == '\\') { + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); + } + else + os_->PutUnsafe(*p++); + + // The rest of string using SIMD + const uint8x16_t s0 = vmovq_n_u8('"'); + const uint8x16_t s1 = vmovq_n_u8('\\'); + const uint8x16_t s2 = vmovq_n_u8('\b'); + const uint8x16_t s3 = vmovq_n_u8(32); + + for (; p != endAligned; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast(p)); + uint8x16_t x = vceqq_u8(s, s0); + x = vorrq_u8(x, vceqq_u8(s, s1)); + x = vorrq_u8(x, vceqq_u8(s, s2)); + x = vorrq_u8(x, vcltq_u8(s, s3)); + + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract + + SizeType len = 0; + bool escaped = false; + if (low == 0) { + if (high != 0) { + uint32_t lz = internal::clzll(high); + len = 8 + (lz >> 3); + escaped = true; + } + } else { + uint32_t lz = internal::clzll(low); + len = lz >> 3; + escaped = true; + } + if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped + char* q = reinterpret_cast(os_->Push(len)); + for (size_t i = 0; i < len; i++) + q[i] = p[i]; + + p += len; + break; + } + vst1q_u8(reinterpret_cast(os_->Push(16)), s); + } + + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); +} +#endif // RAPIDJSON_NEON + +RAPIDJSON_NAMESPACE_END + +#if defined(_MSC_VER) || defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/tst/CMakeLists.txt b/tst/CMakeLists.txt new file mode 100644 index 0000000..b00a099 --- /dev/null +++ b/tst/CMakeLists.txt @@ -0,0 +1,19 @@ +################################################## +# We use GoogleTest for both unit and system tests +################################################## +message("tst/CMakeLists.txt") + +# Fetch GoogleTest. +include(FetchContent) + +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG 58d77fa8070e8cec2dc1ed015d66b454c8d78850 # release-1.12.1 + OVERRIDE_FIND_PACKAGE) +FetchContent_MakeAvailable(googletest) + + +include(GoogleTest) + +add_subdirectory(unit) \ No newline at end of file diff --git a/tst/integration/data/store.json b/tst/integration/data/store.json new file mode 100644 index 0000000..9337573 --- /dev/null +++ b/tst/integration/data/store.json @@ -0,0 +1,80 @@ +{ + "store": { + "books": [ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95, + "in-stock": true + }, + { + "category": "fiction", + "author": "Evelyn Waugh", + "title": "Sword of Honour", + "price": 12.99, + "in-stock": true, + "movies": [ + { + "title": "Sword of Honour - movie", + "realisator": { + "first_name": "Bill", + "last_name": "Anderson" + } + } + ] + }, + { + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 9, + "in-stock": false + }, + { + "category": "fiction", + "author": "J. R. R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-115-03266-2", + "price": 22.99, + "in-stock": true + }, + { + "category": "reference", + "author": "William Jr. Strunk", + "title": "The Elements of Style", + "price": 6.99, + "in-stock": false + }, + { + "category": "fiction", + "author": "Leo Tolstoy", + "title": "Anna Karenina", + "price": 22.99, + "in-stock": true + }, + { + "category": "reference", + "author": "Sarah Janssen", + "title": "The World Almanac and Book of Facts 2021", + "isbn": "0-925-23305-2", + "price": 10.69, + "in-stock": false + }, + { + "category": "reference", + "author": "Kate L. Turabian", + "title": "Manual for Writers of Research Papers", + "isbn": "0-675-16695-1", + "price": 8.59, + "in-stock": true + } + ], + "bicycle": { + "color": "red", + "price": 19.64, + "in-stock": true + } + } +} \ No newline at end of file diff --git a/tst/integration/data/wikipedia.json b/tst/integration/data/wikipedia.json new file mode 100644 index 0000000..30cdfd4 --- /dev/null +++ b/tst/integration/data/wikipedia.json @@ -0,0 +1,26 @@ +{ + "firstName": "John", + "lastName": "Smith", + "age": 27, + "weight": 135.17, + "isAlive": true, + "address": { + "street": "21 2nd Street", + "city": "New York", + "state": "NY", + "zipcode": "10021-3100" + }, + "phoneNumbers": [ + { + "type": "home", + "number": "212 555-1234" + }, + { + "type": "office", + "number": "646 555-4567" + } + ], + "children": [], + "spouse": null, + "groups": {} +} \ No newline at end of file diff --git a/tst/integration/data/wikipedia_compact.json b/tst/integration/data/wikipedia_compact.json new file mode 100644 index 0000000..112c922 --- /dev/null +++ b/tst/integration/data/wikipedia_compact.json @@ -0,0 +1 @@ +{"firstName":"John","lastName":"Smith","age":27,"weight":135.17,"isAlive":true,"address":{"street":"21 2nd Street","city":"New York","state":"NY","zipcode":"10021-3100"},"phoneNumbers":[{"type":"home","number":"212 555-1234"},{"type":"office","number":"646 555-4567"}],"children":[],"spouse":null,"groups":{}} \ No newline at end of file diff --git a/tst/integration/error_handlers.py b/tst/integration/error_handlers.py new file mode 100644 index 0000000..a3ae709 --- /dev/null +++ b/tst/integration/error_handlers.py @@ -0,0 +1,31 @@ +import re + + +class ErrorStringTester: + def is_syntax_error(string): + return string.startswith("SYNTAXERR") or \ + string.startswith("unknown subcommand") + + def is_nonexistent_error(string): + return string.startswith("NONEXISTENT") + + def is_wrongtype_error(string): + return string.startswith("WRONGTYPE") + + def is_number_overflow_error(string): + return string.startswith("OVERFLOW") + + def is_outofboundaries_error(string): + return string.startswith("OUTOFBOUNDARIES") + + def is_limit_exceeded_error(string): + return string.startswith("LIMIT") + + def is_write_error(string): + return string.startswith("ERROR") or string.startswith("OUTOFBOUNDARIES") or \ + string.startswith("WRONGTYPE") or string.startswith("NONEXISTENT") + + # NOTE: Uses .find instead of .startswith in case prefix added in the future + def is_wrong_number_of_arguments_error(string): + return string.find("wrong number of arguments") >= 0 or \ + string.lower().find('invalid number of arguments') >= 0 diff --git a/tst/integration/json_test_case.py b/tst/integration/json_test_case.py new file mode 100644 index 0000000..7ee27e9 --- /dev/null +++ b/tst/integration/json_test_case.py @@ -0,0 +1,61 @@ +import valkey +import pytest +import os +import logging +import shutil +import time +from valkeytests.valkey_test_case import ValkeyTestCase, ValkeyServerHandle +from valkey import ResponseError +from error_handlers import ErrorStringTester + + +class SimpleTestCase(ValkeyTestCase): + ''' + Simple test case, single server without loading JSON module. + ''' + + def setup(self): + super(SimpleTestCase, self).setup() + self.client = self.server.get_new_client() + + def teardown(self): + if self.is_connected(): + self.client.execute_command("FLUSHALL") + logging.info("executed FLUSHALL at teardown") + super(SimpleTestCase, self).teardown() + + def is_connected(self): + try: + self.client.ping() + return True + except: + return False + + +class JsonTestCase(SimpleTestCase): + ''' + Base class for JSON test, single server with JSON module loaded. + ''' + + def get_custom_args(self): + self.set_server_version(os.environ['SERVER_VERSION']) + return { + 'loadmodule': os.getenv('MODULE_PATH'), + "enable-debug-command": "local", + 'enable-protected-configs': 'yes' + } + + def verify_error_response(self, client, cmd, expected_err_reply): + try: + client.execute_command(cmd) + assert False + except ResponseError as e: + assert_error_msg = f"Actual error message: '{str(e)}' is different from expected error message '{expected_err_reply}'" + assert str(e) == expected_err_reply, assert_error_msg + + def setup(self): + super(JsonTestCase, self).setup() + self.error_class = ErrorStringTester + + def teardown(self): + super(JsonTestCase, self).teardown() diff --git a/tst/integration/run.sh b/tst/integration/run.sh new file mode 100755 index 0000000..983ae7c --- /dev/null +++ b/tst/integration/run.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Sometimes processes are left running when test is cancelled. +# Therefore, before build start, we kill all running test processes left from previous test run. +echo "Kill old running test" +pkill -9 -x Pytest || true +pkill -9 -f "valkey-server.*:" || true +pkill -9 -f Valgrind || true +pkill -9 -f "valkey-benchmark" || true + +# cd to the current directory of the script +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cd "${DIR}" + +export MODULE_PATH=$2/build/src/libjson.so \ +export SERVER_VERSION="unstable" + +if [[ ! -z "${TEST_PATTERN}" ]] ; then + export TEST_PATTERN="-k ${TEST_PATTERN}" +fi + +BINARY_PATH=".build/binaries/$SERVER_VERSION/valkey-server" + +if [[ ! -f "${BINARY_PATH}" ]] ; then + echo "${BINARY_PATH} missing" + exit 1 +fi + +if [[ $1 == "test" ]] ; then + python -m pytest --html=report.html --cache-clear -v ${TEST_FLAG} ./ ${TEST_PATTERN} +else + echo "Unknown target: $1" + exit 1 +fi \ No newline at end of file diff --git a/tst/integration/test_json_basic.py b/tst/integration/test_json_basic.py new file mode 100644 index 0000000..2b78cec --- /dev/null +++ b/tst/integration/test_json_basic.py @@ -0,0 +1,4104 @@ +from utils_json import DEFAULT_MAX_PATH_LIMIT, DEFAULT_MAX_DOCUMENT_SIZE, \ + DEFAULT_WIKIPEDIA_COMPACT_PATH, DEFAULT_WIKIPEDIA_PATH, \ + JSON_INFO_METRICS_SECTION, JSON_INFO_NAMES +from valkey.exceptions import ResponseError, NoPermissionError +from valkeytests.conftest import resource_port_tracker +import pytest +import glob +import logging +import os +import random +import struct +import json +from math import isclose, isnan, isinf, frexp +from json_test_case import JsonTestCase + +DATA_ORGANISM = ''' + { + "heavy_animal" : 200, + "plants" : [ + { + "name" : "Cactus", + "height" : 120, + "weight" : 90 + }, + { + "name" : "Ghost Plant", + "height" : "Unknown", + "weight" : "Unknown" + }, + { + "name" : "Redwood", + "height" : 4200, + "weight" : 50000 + } + ], + "animals" : [ + { + "name" : "Platypus", + "length" : 24, + "weight" : 5 + }, + { + "fish" : [ + { + "name" : "Bass", + "length" : 34, + "weight" : 5 + }, + { + "name" : "Swordfish", + "length" : 177, + "weight" : 200 + } + + ] + }, + { + "mammals" : [ + { + "name" : "Platypus", + "length" : 24, + "weight" : 5 + }, + { + "name" : "Horse", + "height" : 68, + "weight" : 660 + }, + { + "primates" : [ + { + "name" : "Monkey", + "height" : 18, + "weight" : 30 + }, + { + "name" : "Baboon", + "height" : 26, + "weight" : 50 + }, + { + "apes" : [ + { + "name" : "Chimpanzee", + "height" : 66, + "weight" : 130 + }, + { + "name" : "Gorilla", + "height" : 66, + "weight" : 400 + }, + { + "name" : "Orangutan", + "height" : 70, + "weight" : 300 + } + ] + } + ] + } + ] + } + ] + } + ''' + +# valkey keys +wikipedia = 'wikipedia' +wikipedia2 = 'wikipedia2' +wikipedia3 = 'wikipedia3' +wikipedia4 = 'wikipedia4' +store = 'store' +store2 = 'store2' +organism = 'organism' +organism2 = 'organism2' +str_key = 'str_key' +k1 = 'k1' +k2 = 'k2' +k3 = 'k3' +k4 = 'k4' +k5 = 'k5' +k6 = 'k6' +k7 = 'k7' +k8 = 'k8' +k9 = 'k9' +k10 = 'k10' +k11 = 'k11' +k12 = 'k12' +k = 'k' +nonexistentkey = 'nonexistentkey' +nonexistentpath = 'nonexistentpath' +input = 'input' +input2 = 'input2' +arr = 'arr' +foo = 'foo' +baz = 'baz' + + +# Base Test class containing all core json module tests +class TestJsonBasic(JsonTestCase): + + def setup_data(self): + client = self.server.get_new_client() + client.config_set( + 'json.max-path-limit', DEFAULT_MAX_PATH_LIMIT) + client.config_set( + 'json.max-document-size', DEFAULT_MAX_DOCUMENT_SIZE) + # Need the following line when executing the test against a running Valkey. + # Otherwise, data from previous test cases will interfere current test case. + client.execute_command("FLUSHDB") + + # Load wikipedia sample JSONs. We use wikipedia.json as input to create a document key. Then, use + # wikipedia_compact.json, which does not have indent/space/newline, to verify correctness of serialization. + with open(DEFAULT_WIKIPEDIA_PATH, 'r') as file: + self.data_wikipedia = file.read() + with open(DEFAULT_WIKIPEDIA_COMPACT_PATH, 'r') as file: + self.data_wikipedia_compact = file.read() + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.', self.data_wikipedia) + + # Create a string key to be used for verifying that JSON.GET should not operate on a non-document key. + client.execute_command( + 'SET', str_key, '{"firstName":"John","lastName":"Smith"}') + + def setup(self): + super(TestJsonBasic, self).setup() + self.setup_data() + + def test_sanity(self): + ''' + Test simple SET/GET/MGET/DEL, both legacy and JSONPath syntax. + ''' + + client = self.server.get_new_client() + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', '{"a":"1", "b":"2", "c":"3"}') + + assert b'OK' == client.execute_command( + 'JSON.SET', k2, '.', '[1,2,3,4,5]') + assert [b'{"a":"1","b":"2","c":"3"}', b'[1,2,3,4,5]'] == client.execute_command( + 'JSON.MGET', k1, k2, '.') + assert b'{"a":"1","b":"2","c":"3"}' == client.execute_command( + 'JSON.GET', k1) + for (key, path, exp) in [ + (k1, '.', '{"a":"1","b":"2","c":"3"}'), + (k1, '.a', '"1"'), + (k2, '.', '[1,2,3,4,5]'), + (k2, '[-1]', '5') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path).decode() + # pretty print + for (key, fmt, path, exp) in [ + (k1, 'SPACE', '.', '{"a": "1","b": "2","c": "3"}'), + (k1, 'INDENT', '.', '{ "a":"1", "b":"2", "c":"3"}'), + (k2, 'INDENT', '.', '[ 1, 2, 3, 4, 5]'), + (k2, 'INDENT', '[-2]', '4') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, fmt, ' ', path).decode() + + assert [b'[{"a":"1","b":"2","c":"3"}]', b'[[1,2,3,4,5]]'] == client.execute_command( + 'JSON.MGET', k1, k2, '$') + for (key, path, exp) in [ + (k1, '$', '[{"a":"1","b":"2","c":"3"}]'), + (k1, '$.*', '["1","2","3"]'), + (k2, '$', '[[1,2,3,4,5]]'), + (k2, '$.[*]', '[1,2,3,4,5]'), + (k2, '$.[-1]', '[5]'), + (k2, '$.[0:3]', '[1,2,3]') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path).decode() + # pretty print + for (key, fmt, path, exp) in [ + (k1, 'SPACE', '$', '[{"a": "1","b": "2","c": "3"}]'), + (k1, 'INDENT', '$', + '[ { "a":"1", "b":"2", "c":"3" }]'), + (k1, 'INDENT', '$.*', '[ "1", "2", "3"]'), + (k2, 'INDENT', '$', '[ [ 1, 2, 3, 4, 5 ]]'), + (k2, 'INDENT', '$.[0:3]', '[ 1, 2, 3]') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, fmt, ' ', path).decode() + + assert 1 == client.execute_command('JSON.DEL', k1) + assert 1 == client.execute_command('JSON.DEL', k2) + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '$', '{"a":"1", "b":"2", "c":"3"}') + assert b'OK' == client.execute_command( + 'JSON.SET', k2, '$', '[1,2,3,4,5]') + for (key, val_before, del_path, del_ret, val_after) in [ + (k1, '[{"a":"1","b":"2","c":"3"}]', '$.*', 3, '[{}]'), + (k2, '[[1,2,3,4,5]]', '$.[*]', 5, '[[]]') + ]: + assert val_before == client.execute_command( + 'JSON.GET', key, '$').decode() + assert del_ret == client.execute_command( + 'JSON.DEL', key, del_path) + assert val_after == client.execute_command( + 'JSON.GET', key, '$').decode() + + def test_parse_invalid_json_string(self): + client = self.server.get_new_client() + for input_str in ['foo', '{"a"}', '[a]']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, '.firstName', input_str) + assert self.error_class.is_syntax_error(str(e.value)) + + def test_json_set_command_supports_all_datatypes(self): + client = self.server.get_new_client() + for (path, value) in [('.address.city', '"Boston"'), # string + # number + ('.age', '30'), + ('.foo', '[1,2,3]'), # array + # array element access + ('.phoneNumbers[0].number', '"1234567"'), + # object + ('.foo', '{"a":"b"}'), + ('.lastName', 'null'), # null + ('.isAlive', 'false')]: # boolean + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, path, value) + assert value.encode() == client.execute_command( + 'JSON.GET', wikipedia, path) + + def test_json_set_command_root_document(self): + client = self.server.get_new_client() + # path to the root document is represented as '.' + for (key, value) in [(k1, '"Boston"'), # string + (k2, '123'), # number + (k3, '["Seattle","Boston"]'), # array + (k3, '[1,2,3]'), # array + (k4, '{"a":"b"}'), # object + (k4, '{}'), # empty object + (k5, 'null'), # null + (k6, 'false')]: # boolean + assert b'OK' == client.execute_command( + 'JSON.SET', key, '.', value) + assert value.encode() == client.execute_command('JSON.GET', key) + + def test_json_set_command_nx_xx_options(self): + client = self.server.get_new_client() + for (path, value, cond, exp_set_return, exp_get_return) in [ + ('.address.city', '"Boston"', 'NX', None, b'"New York"'), + ('.address.city', '"Boston"', 'XX', b'OK', b'"Boston"'), + ('.foo', '"bar"', 'NX', b'OK', b'"bar"'), + ('.firstName', '"bar"', 'NX', None, b'"John"')]: + assert exp_set_return == client.execute_command( + 'JSON.SET', wikipedia, path, value, cond) + assert exp_get_return == client.execute_command( + 'JSON.GET', wikipedia, path) + + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', k1, '.' '"value"', 'badword') + assert self.error_class.is_syntax_error(str(e.value)) + + def test_json_set_command_with_error_conditions(self): + client = self.server.get_new_client() + # A new Valkey key's path must be root + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.SET', foo, '.bar', '"bar"') + assert self.error_class.is_syntax_error(str(e.value)) + + # Option XX means setting the value only if the JSON path exists, a.k.a, updating the value. + # According to API, if the value does not exist, the command should return null instead of error. + assert None == client.execute_command( + 'JSON.SET', k1, '.', '"some value"', 'XX') + assert None == client.execute_command( + 'JSON.SET', wikipedia, '.foo', '"bar"', 'XX') + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.GET', wikipedia, '.foo') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # Option NX means setting the value only if the JSON path does not exists, a.k.a, inserting the value. + # According to API, if the value exists, the command should return null instead of error. + assert None == client.execute_command( + 'JSON.SET', wikipedia, '.', '"some new value"', 'NX') + assert None == client.execute_command( + 'JSON.SET', wikipedia, '.firstName', '"Tom"', 'NX') + assert b'"John"' == client.execute_command( + 'JSON.GET', wikipedia, '.firstName') + + # syntax error: wrong option + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, '.', '"some new value"', 'NXXX') + assert self.error_class.is_syntax_error(str(e.value)) + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, '.', '"some new value"', 'XXXX') + assert self.error_class.is_syntax_error(str(e.value)) + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.SET', wikipedia, '.', '"bar"', 'a', 'b') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_set_ancestor_keys_should_not_be_overridden(self): + client = self.server.get_new_client() + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, '.firstName.a', '"some new value"') + assert self.error_class.is_write_error(str(e.value)) + + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, '.age.a', '1') + assert self.error_class.is_write_error(str(e.value)) + + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, '.address.a.b', '"some new value"') + assert self.error_class.is_write_error(str(e.value)) + + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, '.address[0]', '"some new value"') + assert self.error_class.is_write_error(str(e.value)) + + def test_json_set_reject_out_of_boundary_array_index(self): + client = self.server.get_new_client() + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, 'phoneNumbers[9]', '"123"') + assert self.error_class.is_outofboundaries_error(str(e.value)) + + def test_json_set_insert_value(self): + client = self.server.get_new_client() + # insert is allowed if and only if the parent node is the last child in the path. + for (path, new_val) in [ + ('["address"]["z"]', '"z"'), + ('.address.z2', '"z2"') + ]: + client.execute_command( + 'JSON.SET', wikipedia, path, new_val) + assert new_val.encode() == client.execute_command( + 'JSON.GET', wikipedia, path) + + # if the parent node is not the last child in the path, insertion is not allowed. + for (path, new_val) in [ + ('["address"]["foo"]["z"]', '"z"'), + ('.address.foo.z', '"z"') + ]: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', wikipedia, path, new_val) + assert self.error_class.is_nonexistent_error(str(e.value)) + + def test_json_set_negative_array_index(self): + client = self.server.get_new_client() + new_val = '"1-2-3"' + client.execute_command( + 'JSON.SET', wikipedia, '.phoneNumbers[-1].number', new_val) + assert b'"1-2-3"' == client.execute_command( + 'JSON.GET', wikipedia, '.phoneNumbers[-1].number') + assert b'["212 555-1234","1-2-3"]' == client.execute_command( + 'JSON.GET', wikipedia, '$.phoneNumbers[*].number') + + def test_json_set_legacy_and_v2path_wildcard(self): + client = self.server.get_new_client() + data = ''' + {"firstName":"John","lastName":"Smith","age":27,"weight":135.17,"isAlive":true,"address": + {"street":"21 2nd Street","city":"New York","state":"NY","zipcode":"10021-3100"}, + "phoneNumbers":[{"type":"home","number":"212 555-1234"},{"type":"office","number":"646 555-4567"}], + "children":[],"spouse":null,"groups":{}} + ''' + client.execute_command( + 'JSON.SET', wikipedia2, '.', data) + client.execute_command( + 'JSON.SET', wikipedia3, '.', data) + client.execute_command( + 'JSON.SET', wikipedia4, '.', data) + + for (key, path, new_val, exp, path2, exp2) in [ + (wikipedia, '$.address.*', '"1"', + b'["1","1","1","1"]', '$.address.*', b'["1","1","1","1"]'), + (wikipedia2, '.address.*', '"1"', b'"1"', + '$.address.*', b'["1","1","1","1"]'), + (wikipedia3, '$.phoneNumbers[*].number', '"1"', b'["1","1"]', + '$.phoneNumbers[*].number', b'["1","1"]'), + (wikipedia4, '.phoneNumbers[*].number', '"1"', b'"1"', + '$.phoneNumbers[*].number', b'["1","1"]') + ]: + client.execute_command( + 'JSON.SET', key, path, new_val) + assert exp == client.execute_command( + 'JSON.GET', key, path) + # verify all values + assert exp2 == client.execute_command( + 'JSON.GET', key, path2) + + client.execute_command( + 'JSON.SET', k1, '.', '{"a":[], "b":[1], "c":[1,2,3]}') + client.execute_command( + 'JSON.SET', k2, '.', '{"a":{}, "b":{"a":1}, "c":{"a":1, "b":2, "c":3}}') + + # NOTE: The expected results below account for the outcome of previous commands. + test_cases = [ + (k1, '$.a[*]', '1', b'[]', + b'{"a":[],"b":[1],"c":[1,2,3]}'), + (k1, '$.b[*]', '2', b'[2]', + b'{"a":[],"b":[2],"c":[1,2,3]}'), + (k1, '$.c[*]', '4', b'[4,4,4]', + b'{"a":[],"b":[2],"c":[4,4,4]}'), + (k1, '.a[*]', '1', None, None), + (k1, '.b[*]', '3', b'3', + b'{"a":[],"b":[3],"c":[4,4,4]}'), + (k1, '.c[*]', '5', b'5', + b'{"a":[],"b":[3],"c":[5,5,5]}'), + (k2, '$.a.*', '1', b'[]', + b'{"a":{},"b":{"a":1},"c":{"a":1,"b":2,"c":3}}'), + (k2, '$.b.*', '2', + b'[2]', b'{"a":{},"b":{"a":2},"c":{"a":1,"b":2,"c":3}}'), + (k2, '$.c.*', '4', + b'[4,4,4]', b'{"a":{},"b":{"a":2},"c":{"a":4,"b":4,"c":4}}'), + (k2, '.a.*', '1', None, None), + (k2, '.b.*', '3', b'3', + b'{"a":{},"b":{"a":3},"c":{"a":4,"b":4,"c":4}}'), + (k2, '.c.*', '5', b'5', + b'{"a":{},"b":{"a":3},"c":{"a":5,"b":5,"c":5}}') + ] + + for (key, path, new_val, exp, exp2) in test_cases: + if exp == None: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', key, path, new_val) + client.execute_command( + 'JSON.GET', key, path) + assert self.error_class.is_nonexistent_error(str(e.value)) + else: + client.execute_command( + 'JSON.SET', key, path, new_val) + assert exp == client.execute_command( + 'JSON.GET', key, path) + # verify entire key + assert exp2 == client.execute_command( + 'JSON.GET', key, '.') + + def test_json_get_command_supports_all_datatypes(self): + client = self.server.get_new_client() + for (path, value) in [('.firstName', '"John"'), # string + ('.address.city', '"New York"'), # string + ('.spouse', 'null'), # null + ('.children', '[]'), # empy array + ('.groups', '{}'), # empy object + ('.isAlive', 'true'), # boolean + ('.age', '27')]: # float number + assert value.encode() == client.execute_command( + 'JSON.GET', wikipedia, path) + + for (path, value) in [('["weight"]', '135.17')]: # float number + assert value == client.execute_command( + 'JSON.GET', wikipedia, path).decode() + + def test_json_path_syntax_objectkeys(self): + client = self.server.get_new_client() + for (path, value) in [('["firstName"]', '"John"'), + ('address[\'city\']', '"New York"'), + ('[\'address\'][\'city\']', '"New York"'), + ('["address"]["city"]', '"New York"'), + ('["address"][\'city\']', '"New York"'), + ('["isAlive"]', 'true'), + ('[\'age\']', '27')]: + assert value.encode() == client.execute_command( + 'JSON.GET', wikipedia, path) + + for (path, value) in [('["weight"]', '135.17')]: + assert value == client.execute_command( + 'JSON.GET', wikipedia, path).decode() + + test_cases = [ + '[firstName"]', + 'address["city\'', + '["address\'][[[["city"]', + '[[["address"]]]["city"]', + '"["address"][\'city\']', + '"[\'address"]["city"]', + '[""""address]["city"]', + '[address""""]', + '[\'address\']]][\'city\']', + '["address"]\'[\'"city"]', + ] + + # invalid json path + for path in test_cases: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.GET', wikipedia, path) + assert self.error_class.is_syntax_error(str(e.value)) + + def test_json_get_command_floating_point(self): + ''' + Test special cases of floating point values. + ''' + client = self.server.get_new_client() + for value in ['0', '0.1', '0.3', '1.23456789', '-0.1', '-0.3', '-1.23456789']: + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', value) + assert value == client.execute_command( + 'JSON.GET', wikipedia, '.foo').decode() + + # max double and min double: floating points will be returned exactly as is + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', '1.7976e+308') + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.bar', '-1.7976e+308') + + assert '1.7976e+308' == client.execute_command( + 'JSON.GET', wikipedia, '.foo').decode() + assert '-1.7976e+308' == client.execute_command( + 'JSON.GET', wikipedia, '.bar').decode() + + # 1.234567890123456789 exceeds the precision of a double but will persist regardless. + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', '1.234567890123456789') + + assert '1.234567890123456789' == client.execute_command( + 'JSON.GET', wikipedia, '.foo').decode() + + # trailing zeros will no longer be removed. + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', '0.3000000') + + assert '0.3000000' == client.execute_command( + 'JSON.GET', wikipedia, '.foo').decode() + + def test_json_get_command_with_multiple_paths(self): + client = self.server.get_new_client() + assert b'{".firstName":"John",".lastName":"Smith"}' == client.execute_command( + 'JSON.GET', wikipedia, '.firstName', '.lastName') + + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.GET', wikipedia, '.firstName', '.lastName', '.foo', '.bar') + assert self.error_class.is_nonexistent_error(str(e.value)) + + def test_json_get_command_returns_json_with_default_format(self): + client = self.server.get_new_client() + exp = self.data_wikipedia_compact + + # default format is compact JSON string - no indent, no space and no newline + assert exp == client.execute_command( + 'JSON.GET', wikipedia).decode('utf-8') + + # test NOESCAPE: verify that NOESCAPE is ignored. See the API doc. + assert exp == client.execute_command( + 'JSON.GET', wikipedia, 'NOESCAPE').decode('utf-8') + + def test_json_get_command_returns_json_with_custom_format(self): + client = self.server.get_new_client() + exp = self.data_wikipedia + + # get the root document with custom indent/space/newline + ret = client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', 'SPACE', ' ', 'NEWLINE', '\n').decode('utf-8') + assert exp == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', 'SPACE', ' ', 'NEWLINE', '\n').decode('utf-8') + + # test NOESCAPE: verify that NOESCAPE is ignored. See the API doc. + assert exp == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', 'SPACE', ' ', 'NEWLINE', '\n', 'NOESCAPE').decode('utf-8') + assert exp == client.execute_command( + 'JSON.GET', wikipedia, 'NOESCAPE', 'INDENT', ' ', 'SPACE', ' ', 'NEWLINE', '\n').decode('utf-8') + assert exp == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', 'NOESCAPE', 'SPACE', ' ', 'NEWLINE', '\n').decode('utf-8') + + # get a sub-document with custom indent/space/newline + exp_json = '{\n\t"street": "21 2nd Street",\n\t"city": "New York",\n\t"state": "NY",\n\t"zipcode": "10021-3100"\n}' + assert exp_json == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', '\t', 'SPACE', ' ', 'NEWLINE', '\n', '.address').decode('utf-8') + + # INDENT: =*=*, SPACE: --, NEWLINE: \r\n + exp_json = '{\r\n=*=*"street":--"21 2nd Street",\r\n=*=*"city":--"New York",\r\n=*=*"state":--"NY",\r\n=*=*"zipcode":--"10021-3100"\r\n}' + assert exp_json == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', '=*=*', 'SPACE', '--', 'NEWLINE', '\r\n', '.address').decode('utf-8') + + # verify that path args do not need to be positioned at the end + assert exp == client.execute_command( + 'JSON.GET', wikipedia, '.', 'INDENT', ' ', 'SPACE', ' ', 'NEWLINE', '\n').decode('utf-8') + exp_json = '"John"' + assert exp_json == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', '.firstName', 'SPACE', ' ', 'NEWLINE', '\n').decode('utf-8') + + exp_json = '{\n ".firstName": "John",\n ".lastName": "Smith"\n}' + assert exp_json == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', 'SPACE', ' ', '.firstName', '.lastName', 'NEWLINE', '\n').decode('utf-8') + exp_json = '{\n\t"street": "21 2nd Street",\n\t"city": "New York",\n\t"state": "NY",\n\t"zipcode": "10021-3100"\n}' + assert exp_json == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', '\t', '.address', 'SPACE', ' ', 'NEWLINE', '\n').decode('utf-8') + exp_json = '[\n\t{\n\t\t"street": "21 2nd Street",\n\t\t"city": "New York",\n\t\t"state": "NY",\n\t\t"zipcode": "10021-3100"\n\t}\n]' + assert exp_json == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', '\t', '$.address', 'SPACE', ' ', 'NEWLINE', '\n').decode('utf-8') + + # check that path args can have formatting in between them + exp_json = '{\n ".firstName": "John",\n ".lastName": "Smith"\n}' + assert exp_json == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', '.firstName', 'SPACE', ' ', '.lastName', 'NEWLINE', '\n').decode('utf-8') + assert exp_json == client.execute_command( + 'JSON.GET', wikipedia, '.firstName', 'INDENT', ' ', 'SPACE', ' ', 'NEWLINE', '\n', '.lastName').decode('utf-8') + + def test_json_get_command_returns_json_with_custom_format_error_conditions(self): + client = self.server.get_new_client() + # NEWLINE is the last arg + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', 'SPACE', ' ', 'NEWLINE') + assert self.error_class.is_syntax_error(str(e.value)) + + # SPACE is the last arg + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.GET', wikipedia, 'INDENT', ' ', 'SPACE') + assert self.error_class.is_syntax_error(str(e.value)) + + # INDENT is the last arg + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.GET', wikipedia, 'NEWLINE', '\n', 'SPACE', ' ', 'INDENT') + assert self.error_class.is_syntax_error(str(e.value)) + + def test_json_get_command_with_error_conditions(self): + client = self.server.get_new_client() + # if the document key does not exist, the command should return null without throwing an error. + assert None == client.execute_command( + 'JSON.GET', foo, '.firstName') + + # if the key is not a document key, the command should throw an error. + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.GET', str_key) + assert self.error_class.is_wrongtype_error(str(e.value)) + + # if the JSON path does not exist, the command should throw an error. + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.GET', wikipedia, '.foo') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command('JSON.GET') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_number_as_member_name(self): + ''' + Given a JSON that has numbers as member names, test GET and SET. + ''' + client = self.server.get_new_client() + client.execute_command( + 'JSON.SET', k1, '.', '{"1":1, "2":{"3":4}}') + client.execute_command('COPY', k1, k2) + client.execute_command('COPY', k1, k3) + client.execute_command('COPY', k1, k4) + client.execute_command('COPY', k1, k5) + + # test GET + for (path, exp) in [ + ('["2"]["3"]', '4'), + ('[\'2\'][\'3\']', '4'), + ('.1', '1'), + ('.2.3', '4'), + ('$.2.3', '[4]') + ]: + assert exp == client.execute_command( + 'JSON.GET', k1, path).decode() + + # test SET + for (key, path, val, exp_new_val) in [ + (k1, '["2"]["3"]', '5', '{"1":1,"2":{"3":5}}'), + (k2, '[\'2\'][\'3\']', '5', '{"1":1,"2":{"3":5}}'), + (k3, '.2.3', '5', '{"1":1,"2":{"3":5}}'), + (k4, '.1', '2', '{"1":2,"2":{"3":4}}'), + (k5, '$.2.*', '5', '{"1":1,"2":{"3":5}}') + ]: + assert b'OK' == client.execute_command( + 'JSON.SET', key, path, val) + assert exp_new_val == client.execute_command( + 'JSON.GET', key, '.').decode() + + def test_json_get_legacy_and_v2path_wildcard(self): + ''' + Test two versions of path syntax - V2 JSONPath and the legacy path. A V2 JSONPath must starts with the dollar sign + that represents the root element. If a path does not start with the dollar sign, it's a legacy path. + + For queries, the legacy path always returns a single value, which is the first value if multiple values are + selected. If no value is selected, the legacy path returns NONEXISTENT error. The JSONPath always returns an + array of values, which could contain zero or one or more values. + ''' + client = self.server.get_new_client() + test_cases = [ + ('$.address.*', + b'["21 2nd Street","New York","NY","10021-3100"]'), + ('$.[\'address\'].*', + b'["21 2nd Street","New York","NY","10021-3100"]'), + ('.address.*', b'"21 2nd Street"'), + ('.["address"].*', b'"21 2nd Street"'), + ('$.phoneNumbers.*.type', b'["home","office"]'), + ('$.phoneNumbers[*].type', b'["home","office"]'), + ('$.["phoneNumbers"][*].["type"]', b'["home","office"]'), + ('.phoneNumbers.*.type', b'"home"'), + ('.phoneNumbers[*].type', b'"home"'), + ('.["phoneNumbers"][*].["type"]', b'"home"'), + ('$.[ \'address\' ].*', + b'["21 2nd Street","New York","NY","10021-3100"]'), + ('.[ "address" ].*', b'"21 2nd Street"'), + ('$.[ "phoneNumbers" ][ * ].[ "type" ]', b'["home","office"]'), + ] + + for (path, exp) in test_cases: + assert exp == client.execute_command( + 'JSON.GET', wikipedia, path) + + client.execute_command( + 'JSON.SET', k1, '.', '{"a":[], "b":[1], "c":[1,2]}') + client.execute_command( + 'JSON.SET', k2, '.', '{"a":{}, "b": {"a": 1}, "c": {"a": 1, "b": 2}}') + client.execute_command( + 'JSON.SET', k3, '.', '{"a":[[[1,2],[3,4],[5,6]],[[7,8],[9,10],[11,12]]]}') + client.execute_command( + 'JSON.SET', k4, '.', '{"a":{"b":{"c":{"d":{"e":{"f":{"g":{"h:":1}}}}}}}}') + + # JSONPath always returns an array of values + # Test multiple wildcards + for (key, path, exp) in [ + (k1, '$.a[*]', b'[]'), + (k1, '$.b[*]', b'[1]'), + (k1, '$.c[*]', b'[1,2]'), + (k2, '$.a.*', b'[]'), + (k2, '$.b.*', b'[1]'), + (k2, '$.c.*', b'[1,2]'), + (k3, '$.a[*]', b'[[[1,2],[3,4],[5,6]],[[7,8],[9,10],[11,12]]]'), + (k3, '$.a[*][*]', b'[[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]]'), + (k3, '$.a[*][*][1]', b'[2,4,6,8,10,12]'), + (k4, '$.a.*.*', b'[{"d":{"e":{"f":{"g":{"h:":1}}}}}]'), + (k4, '$.a.*.*.*', b'[{"e":{"f":{"g":{"h:":1}}}}]'), + (k4, '$.a.*.c.*', b'[{"e":{"f":{"g":{"h:":1}}}}]'), + (k4, '$.a.*.c.*.e.*', b'[{"g":{"h:":1}}]'), + (k4, '$.a.*.c.*.e.*.g.*', b'[1]') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path) + + # Legacy path always returns a single value, which is the first value. + # Test multiple wildcards + for (key, path, exp) in [ + (k1, '.b[*]', b'1'), + (k1, '.c[*]', b'1'), + (k2, '.b.*', b'1'), + (k2, '.c.*', b'1'), + (k3, '.a[*]', b'[[1,2],[3,4],[5,6]]'), + (k3, '.a[*][*]', b'[1,2]'), + (k3, '.a[*][*][1]', b'2'), + (k4, '.a.*.*', b'{"d":{"e":{"f":{"g":{"h:":1}}}}}'), + (k4, '.a.*.*.*', b'{"e":{"f":{"g":{"h:":1}}}}'), + (k4, '.a.*.c.*', b'{"e":{"f":{"g":{"h:":1}}}}'), + (k4, '.a.*.c.*.e.*', b'{"g":{"h:":1}}'), + (k4, '.a.*.c.*.e.*.g.*', b'1') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path) + + # Legacy path returns non-existent error if no value is selected. + for (key, path, exp) in [ + (k1, '.a[*]', None), + (k2, '.a.*', None) + ]: + with pytest.raises(ResponseError) as e: + assert exp == client.execute_command( + 'JSON.GET', key, path) + assert self.error_class.is_nonexistent_error(str(e.value)) + + def test_json_get_negative_array_index_legacypath(self): + ''' + Test negative array index with the legacy path syntax. + ''' + client = self.server.get_new_client() + # Negative indices do not throw error in Valkey + test_cases = [ + ('.phoneNumbers[-2].type', '"home"'), + ('.phoneNumbers[-1].type', '"office"'), + ('.phoneNumbers[ -1 ].type', '"office"'), + ('.["phoneNumbers"][-2]["type"]', '"home"'), + ('.["phoneNumbers"][-1]["type"]', '"office"'), + ('.["phoneNumbers"][ -1]["type" ]', '"office"'), + ('.["phoneNumbers"][-1 ][ "type"]', '"office"'), + ('.["phoneNumbers"][ -1 ][ "type" ]', '"office"') + ] + + # Out of boundary test cases + oob_test_cases = [ + '.phoneNumbers[2].type', + '.phoneNumbers[10].type', + '.phoneNumbers[-3].type', + ] + + # Legacy path always returns a single value + for (path, exp) in test_cases: + assert exp.encode() == client.execute_command( + 'JSON.GET', wikipedia, path) + + # index out of bounds + for path in oob_test_cases: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.GET', wikipedia, path) + assert self.error_class.is_outofboundaries_error(str(e.value)) + + # test using negative index on a non-array value + for path in [ + '.firstName[-1]', + '.age[-1]', + '.weight[-1]', + '.address[-1]', + '.phoneNumbers[0][-1]' + ]: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.GET', wikipedia, path) + assert self.error_class.is_wrongtype_error(str(e.value)) + + def test_json_get_negative_array_index_v2path(self): + ''' + Test negative array index with the V2 JSONPath syntax. + ''' + client = self.server.get_new_client() + test_cases = [ + ('$.phoneNumbers[-2].type', '["home"]'), + ('$.phoneNumbers[ -2 ].type', '["home"]'), + ('$.phoneNumbers[-1].type', '["office"]'), + ('$.phoneNumbers[ -1].type', '["office"]'), + ('$.phoneNumbers[-1 ].type', '["office"]'), + ('$.["phoneNumbers"][-2]["type"]', '["home"]'), + ('$.["phoneNumbers"][-1]["type"]', '["office"]'), + # index out of bounds + ('$.phoneNumbers[2].type', '[]'), + ('$.phoneNumbers[10].type', '[]'), + # using negative index on a non-array value + ('$.firstName[-1]', '[]'), + ('$.age[-1]', '[]'), + ('$.weight[-1]', '[]'), + ('$.weight[ -1 ]', '[]'), + ('$.phoneNumbers[0][-1]', '[]'), + ('$.phoneNumbers[ 0][ -1 ]', '[]'), + ('$.phoneNumbers[ 0 ][ -1 ]', '[]'), + ('$.[ "phoneNumbers" ][ -1 ][ "type" ]', '["office"]'), + ('$.phoneNumbers[-3].type', '[]'), + ('$.phoneNumbers[ -3 ].type', '[]'), + ] + + # JSONPath always returns an array of values + for (path, exp) in test_cases: + assert exp.encode() == client.execute_command( + 'JSON.GET', wikipedia, path) + + def test_json_get_v2path_array_slice(self): + ''' + Test negative array slice with the V2 JSONPath syntax. + ''' + client = self.server.get_new_client() + + test_cases = [ + ('$[0:3]', '[0,1,2]'), + ('$[ 0 : 3 ]', '[0,1,2]'), + ('$[0:+3]', '[0,1,2]'), + ('$[ 0 : +3 ]', '[0,1,2]'), + ('$[0:-1]', '[0,1,2,3,4,5,6,7,8]'), + ('$[ 0 : -1 ]', '[0,1,2,3,4,5,6,7,8]'), + ('$[2:-2]', '[2,3,4,5,6,7]'), + ('$[ 2 : -2 ]', '[2,3,4,5,6,7]'), + ('$[+2:-2]', '[2,3,4,5,6,7]'), + ('$[1:1]', '[]'), + ('$[1:2]', '[1]'), + ('$[+1:+2]', '[1]'), + ('$[1:3]', '[1,2]'), + ('$[1:0]', '[]'), + ('$[5:]', '[5,6,7,8,9]'), + ('$[ 5 : ]', '[5,6,7,8,9]'), + ('$[:3]', '[0,1,2]'), + ('$[ : 3]', '[0,1,2]'), + ('$[:+3]', '[0,1,2]'), + ('$[: +3]', '[0,1,2]'), + ('$[:6:2]', '[0,2,4]'), + ('$[ : 6 : 2]', '[0,2,4]'), + ('$[:]', '[0,1,2,3,4,5,6,7,8,9]'), + ('$[ : ]', '[0,1,2,3,4,5,6,7,8,9]'), + ('$[::]', '[0,1,2,3,4,5,6,7,8,9]'), + ('$[ : : ]', '[0,1,2,3,4,5,6,7,8,9]'), + ('$[::2]', '[0,2,4,6,8]'), + ('$[ : : 2 ]', '[0,2,4,6,8]'), + ('$[3::2]', '[3,5,7,9]'), + ('$[3 :: 2]', '[3,5,7,9]'), + ('$[0::1]', '[0,1,2,3,4,5,6,7,8,9]'), + ('$[0:8:2]', '[0,2,4,6]'), + ('$[ 0 : 8 : 2 ]', '[0,2,4,6]'), + ('$[0:+8:+2]', '[0,2,4,6]'), + ('$[0 : +8 : +2]', '[0,2,4,6]'), + ('$[6:0:-1]', '[6,5,4,3,2,1]'), + ('$[6::-1]', '[]'), + ('$[6:0:-2]', '[6,4,2]'), + ('$[6::-2]', '[]'), + ('$[8:0:-2]', '[8,6,4,2]') + ] + + client.execute_command( + 'JSON.SET', k1, '.', '[0,1,2,3,4,5,6,7,8,9]') + for (path, exp) in test_cases: + assert exp.encode() == client.execute_command('JSON.GET', k1, path) + + def test_json_get_v2path_array_union(self): + ''' + Test array union with the V2 JSONPath syntax. + ''' + client = self.server.get_new_client() + + test_cases = [ + ('$[0,1,2]', '[0,1,2]'), + ('$[0, 1, 2]', '[0,1,2]'), + ('$[0, 1, 2 ]', '[0,1,2]'), + ('$[ 0, 1, 2 ]', '[0,1,2]'), + ('$[ 0,1, 2 ]', '[0,1,2]'), + ('$[0,1]', '[0,1]'), + ('$[-1,-2]', '[9,8]'), + ('$[-10,-5,-6]', '[0,5,4]'), + ('$[0,1,5,0,1,2]', '[0,1,5,0,1,2]'), + ('$[0, 1,5,0, 1,2]', '[0,1,5,0,1,2]'), + ('$[ 0, 1, 5, 0, 1, 2 ]', '[0,1,5,0,1,2]'), + ('$[ -10 , -5 , -6 ]', '[0,5,4]'), + ('$[-10,-9,-8,0,1,2,-1000,1000]', '[0,1,2,0,1,2]'), + ] + + client.execute_command( + 'JSON.SET', k1, '.', '[0,1,2,3,4,5,6,7,8,9]') + for (path, exp) in test_cases: + assert exp.encode() == client.execute_command('JSON.GET', k1, path) + + client.execute_command( + 'JSON.SET', k2, '.', '[{"name":"name0","id":0},{"name":"name1","id":1},{"name":"name2","id":2}]') + for (path, exp) in [ + ('$[0,2].name', '["name0","name2"]'), + ]: + assert exp.encode() == client.execute_command('JSON.GET', k2, path) + + # we do not support mixing of unions and slices, nor do we support extraneous commas + for path in [ + '$[0,1,2:4]', + '$[0:2,3,4]', + '$[0,,4]', + '$[0,,,4]', + '$[,4]', + '$[4,]', + '$[,4,]', + '$[,,4,,]', + '$[,]', + '$[,0,4]', + '$[,0,4,]', + '$[0,4,]', + ]: + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.GET', k1, path) + assert self.error_class.is_syntax_error(str(e.value)) + + def test_json_get_multipaths_legacy_and_v2path_wildcard(self): + ''' + Test JSON.GET with multiple paths, legacy path or v2 JSONPath or mixed. + ''' + client = self.server.get_new_client() + + client.execute_command( + 'JSON.SET', k1, '.', '{"a":[], "b":[1], "c":[1,2]}') + client.execute_command( + 'JSON.SET', k2, '.', '{"a":{}, "b": {"a": 1}, "c": {"a": 1, "b": 2}}') + + test_cases = [ + (k1, '.b[*]', '.c[*]', '{".b[*]":1,".c[*]":1}'), + (k2, '.b.*', '.c.*', '{".b.*":1,".c.*":1}'), + ] + + # if all paths are legacy path, the result conforms to the legacy path version + for (key, path1, path2, exp) in test_cases: + # 1st path returns 1 value. 2nd path returns the first one of 2 values. + assert exp.encode() == client.execute_command( + 'JSON.GET', key, path1, path2) + + # all paths are legacy path, the result conforms to the legacy path version. + # If one path returns 0 value, the command should fail with NONEXISTENT error. + for (key, path1, path2, exp) in [ + (k1, '.a[*]', '.b[*]', None), + (k2, '.a.*', '.b.*', None), + (k2, '.foo.*', '.c.*', None) + ]: + with pytest.raises(ResponseError) as e: + assert exp == client.execute_command( + 'JSON.GET', key, path1, path2) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # if at least one path is JSONPath, the result conforms to the JSONPath version + for (key, path1, path2, path3, exp) in [ + (k1, '$.a[*]', '$.b[*]', '$.c[*]', + '{"$.a[*]":[],"$.b[*]":[1],"$.c[*]":[1,2]}'), + (k1, '$.a[*]', '.b[*]', '.c[*]', + '{"$.a[*]":[],".b[*]":[1],".c[*]":[1,2]}'), + (k1, '.a[*]', '$.b[*]', '.c[*]', + '{".a[*]":[],"$.b[*]":[1],".c[*]":[1,2]}'), + (k2, '.a.*', '.b.*', '$.c.*', + '{".a.*":[],".b.*":[1],"$.c.*":[1,2]}'), + (k2, '$.a.*', '$.b.*', '.c.*', + '{"$.a.*":[],"$.b.*":[1],".c.*":[1,2]}'), + (k2, '.a.*', '$.b.*', '$.c.*', + '{".a.*":[],"$.b.*":[1],"$.c.*":[1,2]}'), + # 1st path returns 0 value. 2nd path returns 1 value. 3rd path returns 2 values. + (k2, '.foo.*', '$.b.*', '$.c.*', + '{".foo.*":[],"$.b.*":[1],"$.c.*":[1,2]}') + ]: + assert exp.encode() == client.execute_command( + 'JSON.GET', key, path1, path2, path3) + + def test_json_mget_command(self): + client = self.server.get_new_client() + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', '{"foo":"bar1"}') + assert b'OK' == client.execute_command( + 'JSON.SET', k2, '.', '{"foo":"bar2"}') + assert b'OK' == client.execute_command( + 'JSON.SET', k3, '.', '{"foo":"bar3"}') + assert [b'"bar1"', b'"bar2"', b'"bar3"'] == client.execute_command( + 'JSON.MGET', k1, k2, k3, '.foo') + # test the condition of JSON path does not exist + assert [None, None, None] == client.execute_command( + 'JSON.MGET', k1, k2, k3, '.bar') + # test the condition of key does not exist + assert [None, None] == client.execute_command( + 'JSON.MGET', baz, foo, '.') + assert [None, b'"bar2"'] == client.execute_command( + 'JSON.MGET', baz, k2, '.foo') + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command('JSON.MGET') + assert str(e.value).find('wrong number of arguments') >= 0 + + assert b'OK' == client.execute_command( + 'json.set', k4, '.', '[2,5,{"level0":[null,true,{"level0_1":[3,false]}],"level1":{"level1_0":33}}]') + assert b'OK' == client.execute_command( + 'json.set', k5, '.', '[4,5,{"level0":[null,false,{"level0_1":[null,false]}],"level1":{"level1_0":[12,13]}}]') + assert b'OK' == client.execute_command( + 'json.set', k6, '.', '[2,5,{"level0":[true,20,{"level0_1":[3,true]}],"level1":{"level1_0":33}}]') + for (path, exp) in [ + ('$..level0_1', [b"[[3,false]]", + b"[[null,false]]", b"[[3,true]]"]), + ('$.[2].level1', [b'[{"level1_0":33}]', + b'[{"level1_0":[12,13]}]', b'[{"level1_0":33}]']), + ('$.[2].level1.level1_0[2]', [b"[]", b"[]", b"[]"]), + ('$.[2].level1.level1_0[1]', [b"[]", b"[13]", b"[]"]) + ]: + assert [exp[0], exp[1], exp[2]] == client.execute_command( + 'JSON.MGET', k4, k5, k6, path) + + def test_json_key_declaration(self): + client = self.server.get_new_client() + cmd_need_val = set( + 'SET NUMMULTBY NUMINCRBY ARRAPPEND ARRINDEX STRAPPEND RESP'.split()) + + # These commands should only get the single key + for cmd in ('DEL', 'GET', 'SET', 'TYPE', 'NUMINCRBY', 'NUMMULTBY', 'TOGGLE', 'STRAPPEND', 'STRLEN', + 'ARRAPPEND', 'ARRINDEX', 'ARRLEN', 'ARRPOP', 'CLEAR', 'OBJKEYS', + 'OBJLEN', 'FORGET', 'RESP'): + if cmd not in cmd_need_val: + assert [k1] == client.execute_command( + 'COMMAND GETKEYS', f'JSON.{cmd}', k1) + else: + # Dummy value in command + assert [k1] == client.execute_command( + 'COMMAND GETKEYS', f'JSON.{cmd}', k1, '.', '5') + + # ARRINSERT requires index + assert ['k1'] == client.execute_command( + 'COMMAND GETKEYS', 'JSON.ARRINSERT', 'k1', '.', 0, '5') + # ARRINSERT requires start end + assert ['k1'] == client.execute_command( + 'COMMAND GETKEYS', 'JSON.ARRTRIM', 'k1', '.', 0, 5) + + debug_subcmd = set('MEMORY DEPTH'.split()) + for cmd in debug_subcmd: + assert [k1] == client.execute_command( + 'COMMAND GETKEYS', 'JSON.DEBUG', cmd, k1) + + # JSON.MGET is the only multi-key command, so make sure it returns the right set of keys + assert [k1, k2, k3] == client.execute_command( + 'COMMAND GETKEYS', 'JSON.MGET', k1, k2, k3, '.') + + def __json_del_or_forget__(self, cmd): + client = self.server.get_new_client() + # delete an element + for path in ['.spouse', '.phoneNumbers']: + assert 1 == client.execute_command( + cmd, wikipedia, path) + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.GET', wikipedia, path) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # delete a doc: path not provided + assert 1 == client.execute_command(cmd, wikipedia) + assert 0 == client.execute_command('EXISTS', wikipedia) + + # delete a doc: path arg provided + client.execute_command('json.set', k1, '.', '1') + assert 1 == client.execute_command(cmd, k1, '.') + assert 0 == client.execute_command('EXISTS', k1) + + # return should be 0 if the document key does not exist + assert 0 == client.execute_command( + cmd, foo, '.firstName') + + # return should be 0 if the path does not exist + assert 0 == client.execute_command( + cmd, wikipedia, '.foo') + + # Wrong number of arguments + + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + cmd, wikipedia, '.children', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_del_command(self): + self.__json_del_or_forget__('JSON.DEL') + + def test_json_forget_command(self): + self.__json_del_or_forget__('JSON.FORGET') + + def test_json_del_command_v2path_wildcard(self): + client = self.server.get_new_client() + client.execute_command( + 'json.set', k1, '.', '{"x": {}, "y": {"a":"a"}, "z": {"a":"", "b":"b"}}') + client.execute_command( + 'json.set', k2, '.', '[0,1,2,3,4,5,6,7,8,9]') + client.execute_command( + 'json.set', k3, '.', '[0,1,2,3,4,5,6,7,8,9]') + client.execute_command( + 'json.set', k4, '.', '[0,1,2,3,4,5,6,7,8,9]') + client.execute_command( + 'json.set', k5, '.', '[0,1,2,3,4,5,6,7,8,9]') + + # NOTE: The expected values below account for the outcome of previous commands. + for (key, path, exp_ret, exp_val) in [ + (k1, '$.z.*', 2, '{"x":{},"y":{"a":"a"},"z":{}}'), + (k1, '$.*', 3, '{}'), + (k2, '$.[3:6]', 3, '[0,1,2,6,7,8,9]'), + (k2, '$.*', 7, '[]'), + ]: + assert exp_ret == client.execute_command( + 'JSON.DEL', key, path) + assert exp_val == client.execute_command( + 'JSON.GET', key).decode() + + # delete whole doc + for key in [k1, k2]: + assert 1 == client.execute_command( + 'JSON.DEL', key, '$') + assert 0 == client.execute_command('EXISTS', key) + + # delete with wildcard, slice, and union + assert 10 == client.execute_command( + 'JSON.DEL', k3, '$[*]') + assert b'[]' == client.execute_command('JSON.GET', k3) + assert 4 == client.execute_command( + 'JSON.DEL', k4, '$[3:7]') + assert b'[0,1,2,7,8,9]' == client.execute_command( + 'JSON.GET', k4) + assert 4 == client.execute_command( + 'JSON.DEL', k5, '$[1,5,7,8]') + assert b'[0,2,3,4,6,9]' == client.execute_command( + 'JSON.GET', k5) + + assert b'OK' == client.execute_command( + 'json.set', k1, '.', '[2,5,{"level0":[null,true,{"level0_1":[3,false]}],"level1":{"level1_0":33}}]') + for (key, path, exp_ret, exp_val) in [ + (k1, '$[2].level0..level0_1[1]', 1, + '[2,5,{"level0":[null,true,{"level0_1":[3]}],"level1":{"level1_0":33}}]'), + (k1, '$[2].level0..level0_1', 1, + '[2,5,{"level0":[null,true,{}],"level1":{"level1_0":33}}]'), + (k1, '$..level1.level1_0', 1, + '[2,5,{"level0":[null,true,{}],"level1":{}}]'), + (k1, '$..level1', 1, + '[2,5,{"level0":[null,true,{}]}]'), + (k1, '.*', 3, '[]'), + ]: + assert exp_ret == client.execute_command( + 'JSON.DEL', key, path) + assert exp_val == client.execute_command( + 'JSON.GET', key).decode() + if path != '.*': + assert '[]' == client.execute_command( + 'JSON.GET', key, path).decode() + else: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.GET', key, path).decode() + assert self.error_class.is_nonexistent_error(str(e.value)) + + def test_json_unicode_is_supported(self): + client = self.server.get_new_client() + for unicode_str in [ + '"Eat, drink, æ„›"', + '"hyvää-élève"' + ]: + utf8 = unicode_str.encode('utf-8') + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', unicode_str) + assert utf8 == client.execute_command( + 'JSON.GET', k1, 'NOESCAPE', '.') + assert 1 == client.execute_command('JSON.DEL', k1) + assert 0 == client.execute_command('EXISTS', k1) + + def test_nonASCII_in_jsonpath(self): + client = self.server.get_new_client() + client.execute_command( + 'JSON.SET', k1, '.', '{"a": {"æ„›": "love", "b": "b"}}') + client.execute_command( + 'JSON.SET', k2, '.', '{"æ„›": [1,2,3]}') + for (key, path1, path2, exp) in [ + (k1, '.a.æ„›', None, b'"love"'), + (k1, '.a.æ„›', '.a.b', b'{".a.\xe6\x84\x9b":"love",".a.b":"b"}'), + (k2, '.æ„›[1]', None, b'2'), + (k2, '$.æ„›[*]', None, b'[1,2,3]') + ]: + if path2 is None: + assert exp == client.execute_command( + 'JSON.GET', key, path1) + else: + # Valkey behavior is weird and returns dictionaries in a non-deterministic order + assert exp == client.execute_command( + 'JSON.GET', key, path1, path2) + + def test_json_number_scanner(self): + '''Test that numeric conversion gets the right types around various edge cases''' + client = self.server.get_new_client() + maxpos = (1 << 63) - 1 + + for v in [ + (maxpos, b'integer'), + (-maxpos, b'integer'), + (-maxpos-1, b'integer')]: + client.execute_command( + 'JSON.SET', k1, '.', str(v[0])) + assert v[1] == client.execute_command( + 'JSON.TYPE', k1, '.'), "Value is " + str(v[0]) + + for v in [ + (maxpos+1, b'number'), + (-maxpos-2, b'number') + ]: + + client.execute_command( + 'JSON.SET', k1, '.', str(v[0])) + assert v[1] == client.execute_command( + 'JSON.TYPE', k1, '.'), "Value is " + str(v[0]) + + def test_json_toggle(self): + client = self.server.get_new_client() + # Toggle back and forth + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foobool', 'false') + assert b'false' == client.execute_command( + 'JSON.GET', wikipedia, '.foobool') + + assert b'true' == client.execute_command( + 'JSON.TOGGLE', wikipedia, '.foobool') + assert b'true' == client.execute_command( + 'JSON.GET', wikipedia, '.foobool') + + assert b'false' == client.execute_command( + 'JSON.TOGGLE', wikipedia, '.foobool') + assert b'false' == client.execute_command( + 'JSON.GET', wikipedia, '.foobool') + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.TOGGLE', wikipedia, '.foobool', 'extra') + assert self.error_class.is_wrong_number_of_arguments_error( + str(e.value)) + + # Wrong types + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foonum', '55') + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.TOGGLE', wikipedia, '.foonum') + assert self.error_class.is_wrongtype_error(str(e.value)) + + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foostr', '"ok"') + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.TOGGLE', wikipedia, '.foostr') + assert self.error_class.is_wrongtype_error(str(e.value)) + + def test_json_toggle_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '{"a":true, "b":false, "c":1, "d":null, "e":"foo", "f":[], "g":{}}') + assert b'OK' == client.execute_command('JSON.SET', k2, '.', + '[true, false, 1, null, "foo", [], {}]') + + for (key, path, exp, exp_new_val) in [ + (k1, '$.*', [0, 1, None, None, None, None, None], + '{"a":false,"b":true,"c":1,"d":null,"e":"foo","f":[],"g":{}}'), + (k1, '$.*', [1, 0, None, None, None, None, None], + '{"a":true,"b":false,"c":1,"d":null,"e":"foo","f":[],"g":{}}'), + (k2, '$[*]', [0, 1, None, None, None, None, None], + '[false,true,1,null,"foo",[],{}]'), + (k2, '$[*]', [1, 0, None, None, None, None, None], + '[true,false,1,null,"foo",[],{}]') + ]: + assert exp == client.execute_command( + 'JSON.TOGGLE', key, path) + assert exp_new_val == client.execute_command( + 'JSON.GET', key, '.').decode() + + def test_json_numincrby(self): + client = self.server.get_new_client() + assert b'28' == client.execute_command( + 'JSON.NUMINCRBY', wikipedia, '.age', '1') + assert b'38' == client.execute_command( + 'JSON.NUMINCRBY', wikipedia, '.age', '10') + assert b'33' == client.execute_command( + 'JSON.NUMINCRBY', wikipedia, '.age', '-5') + + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', '1') + assert b'1.5' == client.execute_command( + 'JSON.NUMINCRBY', wikipedia, '.foo', '0.5') + assert b'2' == client.execute_command( + 'JSON.NUMINCRBY', wikipedia, '.foo', '0.5') + + # error condition: document key does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.NUMINCRBY', foo, '.age', '2') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.NUMINCRBY', wikipedia, '.bar', '2') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.NUMINCRBY', wikipedia, '.age', '1', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_nummultby(self): + client = self.server.get_new_client() + assert b'270' == client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.age', '10') + assert b'2700' == client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.age', '10') + assert b'27' == client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.age', '0.01') + + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', '1') + assert b'0.5' == client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.foo', '0.5') + assert b'0.25' == client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.foo', '0.5') + assert b'1' == client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.foo', '4') + + # error condition: document key does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.NUMMULTBY', foo, '.age', '2') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.bar', '2') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.age', '2', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_simple_operations_remove_double_styling(self): + # Multiplying by one or adding zero will now remove styling + # and really change output for negative exponents due to decimals + client = self.server.get_new_client() + data = [ + ('2.50000', '2.5'), + ('2e30', '2e+30'), + ('2E+30', '2e+30'), + ('2E30', '2e+30'), + ('2E-30', '2.0000000000000002e-30'), + ('2e5', '200000'), + ('-2.50000', '-2.5'), + ('-2e30', '-2e+30'), + ('-2E+30', '-2e+30'), + ('-2E30', '-2e+30'), + ('-2E-30', '-2.0000000000000002e-30'), + ('-2e5', '-200000'), + ] + + for (initial, unstyled) in data: + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', initial) + assert initial == client.execute_command( + 'JSON.GET', wikipedia, '.foo').decode() + client.execute_command( + 'JSON.NUMMULTBY', wikipedia, '.foo', '1') + assert unstyled == client.execute_command( + 'JSON.GET', wikipedia, '.foo').decode() + + for (initial, unstyled) in data: + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', initial) + assert initial == client.execute_command( + 'JSON.GET', wikipedia, '.foo').decode() + client.execute_command( + 'JSON.NUMINCRBY', wikipedia, '.foo', '0') + assert unstyled == client.execute_command( + 'JSON.GET', wikipedia, '.foo').decode() + + def test_json_double_operations_wrongtype(self): + client = self.server.get_new_client() + # None of these will these will succeed because the doubles are not valid or the field is not a double + for (cmd, key, field, arg) in [ + ('JSON.NUMMULTBY', wikipedia, '.age', '"2.0"'), + ('JSON.NUMMULTBY', wikipedia, '.age', '2.'), + ('JSON.NUMINCRBY', wikipedia, '.age', '2.0.'), + ('JSON.NUMMULTBY', wikipedia, '.age', '-2.'), + ('JSON.NUMINCRBY', wikipedia, '.age', '-2.0.'), + ('JSON.NUMMULTBY', wikipedia, '.age', '+2.'), + ('JSON.NUMINCRBY', wikipedia, '.age', '+2.0.'), + ('JSON.NUMINCRBY', wikipedia, '.age', '.2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', 'a2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', '-a2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', '+a2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', 'e2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', '-e2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', '+e2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', 'e+2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', '-e-2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', '+E+2.0'), + ('JSON.NUMINCRBY', wikipedia, '.age', '2.0e'), + ('JSON.NUMINCRBY', wikipedia, '.age', '2.0eq'), + ('JSON.NUMINCRBY', wikipedia, '.age', '2.0e3q'), + ('JSON.NUMINCRBY', wikipedia, '.age', '2.0e+'), + ('JSON.NUMINCRBY', wikipedia, '.age', '2.0e+a'), + ('JSON.NUMINCRBY', wikipedia, '.age', '2.0e+41a'), + ('JSON.NUMINCRBY', wikipedia, '.firstName', '2'), + ]: + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + cmd, key, field, arg) + assert self.error_class.is_wrongtype_error(str(e.value)) + + def test_json_double_consistency(self): + ''' + Test that double values remain consistent when going through JSON Engine. + This tests a tolerance of 2^-50 for a decent number of iterations, + but is not enough to guarantee that level of presicion to our customers. + Also verify that regular and pretty print double values have the same output. + ''' + client = self.server.get_new_client() + # arbitrarily generated hex to double to string, to send to json + random.seed(1234) + data = [] + for i in range(24000): + potential_double = struct.unpack( + '= 0 + + def test_json_strappend_command(self): + client = self.server.get_new_client() + for (val, new_len, new_val) in [ + ('"son"', 7, '"Johnson"'), + ('" Junior"', 14, '"Johnson Junior"'), + ('" is"', 17, '"Johnson Junior is"'), + ('" my"', 20, '"Johnson Junior is my"'), + ('" friend."', 28, '"Johnson Junior is my friend."'), + ('""', 28, '"Johnson Junior is my friend."') + ]: + assert new_len == client.execute_command( + 'JSON.STRAPPEND', wikipedia, '.firstName', val) + assert new_val == client.execute_command( + 'JSON.GET', wikipedia, '.firstName').decode('utf-8') + + # edge case: appending to an empty string + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foo', '""') + client.execute_command( + 'JSON.STRAPPEND', wikipedia, '.foo', '"abc"') + assert b'"abc"' == client.execute_command( + 'JSON.GET', wikipedia, '.foo') + + # error condition: document key does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.STRAPPEND', foo, '.firstName', '"abc"') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.STRAPPEND', wikipedia, '.bar', '"abc"') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: attempt to append a non-string value + for val in ['123', 'true', 'false', 'null', '{}', '[]']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.STRAPPEND', wikipedia, '.firstName', val) + assert self.error_class.is_wrongtype_error(str(e.value)) + + # error condition: attempt to append to a non-string element + for path in ['.address', '.groups', '.age', '.isAlive', '.spouse']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.STRAPPEND', wikipedia, path, '"12"') + assert self.error_class.is_wrongtype_error(str(e.value)) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.STRAPPEND', wikipedia, '.firstName', '"abc"', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_strlen_command_legacy_and_jsonpath_wildcard(self): + client = self.server.get_new_client() + + client.execute_command('JSON.SET', k1, '.', + '{"a":{"a":"a"}, "b":{"a":""}, "c":{"a":"a", "b":"bb"}, "d":{"a":1, "b":"b", "c":3}}') + + for (key, path, exp) in [ + (k1, '$.a.a', [1]), + (k1, '$.a.*', [1]), + (k1, '$.b.a', [0]), + (k1, '$.b.*', [0]), + (k1, '$.c.*', [1, 2]), + (k1, '$.c.b', [2]), + (k1, '$.d.*', [None, 1, None]), + (k1, '.a.a', 1), + (k1, '.a.*', 1), + (k1, '.b.a', 0), + (k1, '.b.*', 0), + (k1, '.c.*', 1), + (k1, '.c.b', 2), + ]: + assert exp == client.execute_command( + 'JSON.STRLEN', key, path) + + assert 1 == client.execute_command( + 'JSON.STRLEN', k1, '.d.*') + + def test_json_strappend_command_legacy_and_jsonpath_wildcard(self): + client = self.server.get_new_client() + + client.execute_command('JSON.SET', k1, '.', + '{"a":{"a":"a"}, "b":{"a":""}, "c":{"a":"a", "b":"bb"}, "d":{"a":1, "b":"b", "c":3}}') + + # NOTE: The expected result below accounts for the outcome of previous commands. + for (key, path, append, exp_ret, exp_new_str, exp_whole_json) in [ + (k1, '$.a.a', '"x"', [ + 2], '["ax"]', '{"a":{"a":"ax"},"b":{"a":""},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '$.a.a', '""', [ + 2], '["ax"]', '{"a":{"a":"ax"},"b":{"a":""},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '$.a.*', '"yz"', [4], '["axyz"]', + '{"a":{"a":"axyz"},"b":{"a":""},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '.a.a', '"a"', 5, '"axyza"', + '{"a":{"a":"axyza"},"b":{"a":""},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '.a.*', '"a"', 6, '"axyzaa"', + '{"a":{"a":"axyzaa"},"b":{"a":""},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '$.b.a', '"a"', [ + 1], '["a"]', '{"a":{"a":"axyzaa"},"b":{"a":"a"},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '$.b.*', '""', [1], '["a"]', + '{"a":{"a":"axyzaa"},"b":{"a":"a"},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '$.b.*', '"a"', [2], '["aa"]', + '{"a":{"a":"axyzaa"},"b":{"a":"aa"},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '.b.a', '"a"', 3, '"aaa"', + '{"a":{"a":"axyzaa"},"b":{"a":"aaa"},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '.b.*', '"a"', 4, '"aaaa"', + '{"a":{"a":"axyzaa"},"b":{"a":"aaaa"},"c":{"a":"a","b":"bb"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '$.c.*', '"a"', [2, 3], '["aa","bba"]', + '{"a":{"a":"axyzaa"},"b":{"a":"aaaa"},"c":{"a":"aa","b":"bba"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '$.c.b', '"a"', [ + 4], '["bbaa"]', '{"a":{"a":"axyzaa"},"b":{"a":"aaaa"},"c":{"a":"aa","b":"bbaa"},"d":{"a":1,"b":"b","c":3}}'), + + # The following strappend changes value at $.c value to {"a":"aaa", "b":"bbaaa"}. + # strappend returns length of the last updated value, which is 5, + # while 'json.get .c.*' returns the first selected element, which is "aaa". + (k1, '.c.*', '"a"', 5, '"aaa"', + '{"a":{"a":"axyzaa"},"b":{"a":"aaaa"},"c":{"a":"aaa","b":"bbaaa"},"d":{"a":1,"b":"b","c":3}}'), + + (k1, '.c.b', '"a"', 6, '"bbaaaa"', + '{"a":{"a":"axyzaa"},"b":{"a":"aaaa"},"c":{"a":"aaa","b":"bbaaaa"},"d":{"a":1,"b":"b","c":3}}'), + (k1, '$.d.*', '"a"', [None, 2, None], '[1,"ba",3]', + '{"a":{"a":"axyzaa"},"b":{"a":"aaaa"},"c":{"a":"aaa","b":"bbaaaa"},"d":{"a":1,"b":"ba","c":3}}'), + + # strappend returns length of the last updated value, which is 3 ("baa"), + # while 'json.get .d.*' returns the first selected element, which is 1. + (k1, '.d.*', '"a"', 3, '1', + '{"a":{"a":"axyzaa"},"b":{"a":"aaaa"},"c":{"a":"aaa","b":"bbaaaa"},"d":{"a":1,"b":"baa","c":3}}') + ]: + assert exp_ret == client.execute_command( + 'JSON.STRAPPEND', key, path, append) + assert exp_new_str == client.execute_command( + 'JSON.GET', key, path).decode() + + def test_json_objectlen_command(self): + client = self.server.get_new_client() + assert 4 == client.execute_command( + 'JSON.OBJLEN', wikipedia, '.address') + + # edge case: empty object + assert 0 == client.execute_command( + 'JSON.OBJLEN', wikipedia, '.groups') + + # return should be null if document key does not exist + assert None == client.execute_command( + 'JSON.OBJLEN', foo, '.address') + + # return error if path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.OBJLEN', wikipedia, '.foo') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: the element is not an object + for path in ['.children', '.phoneNumbers', '.age', '.weight', '.isAlive', '.spouse', '.firstName']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.OBJLEN', wikipedia, path) + assert self.error_class.is_wrongtype_error(str(e.value)) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.OBJLEN', wikipedia, '.', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_objlen_command_jsonpath_wildcard(self): + client = self.server.get_new_client() + + client.execute_command('JSON.SET', k1, '.', + '{"a":{}, "c":{"a":"a", "b":"bb"}, "d":{"a":1, "b":"b", "c":{"a":3,"b":4}}, "e":1}') + + test_cases = [ + (k1, '$.a', [0]), + (k1, '$.a.*', []), + (k1, '.a', 0), + (k1, '$.c', [2]), + (k1, '$.c.*', [None, None]), + (k1, '.c', 2), + (k1, '$.d', [3]), + (k1, '$.d.*', [None, None, 2]), + (k1, '.d', 3), + (k1, '$.*', [0, 2, 3, None]), + (k1, '.*', 0), + (k1, '.d.*', 2), + ] + + for (key, path, exp) in test_cases: + assert exp == client.execute_command( + 'JSON.OBJLEN', key, path) + + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.OBJLEN', k1, '.a.*') + assert self.error_class.is_nonexistent_error(str(e.value)) + + for (key, path) in [ + (k1, '.c.*'), + (k1, '.e') + ]: + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.OBJLEN', key, path) + assert self.error_class.is_wrongtype_error(str(e.value)) + + def test_json_objectkeys_command(self): + client = self.server.get_new_client() + obj_keys = [b'street', b'city', b'state', b'zipcode'] + assert obj_keys == client.execute_command( + 'JSON.OBJKEYS', wikipedia, '.address') + + # edge case: empty object + assert [] == client.execute_command( + 'JSON.OBJKEYS', wikipedia, '.groups') + + # return should be null if document key does not exist + assert None == client.execute_command( + 'JSON.OBJKEYS', foo, '.address') + + # return should be null if path does not exist + assert None == client.execute_command( + 'JSON.OBJKEYS', wikipedia, '.foo') + + # error condition: the element is not an object + for path in ['.children', '.phoneNumbers', '.age', '.weight', '.isAlive', '.spouse', '.firstName']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.OBJKEYS', wikipedia, path) + assert self.error_class.is_wrongtype_error(str(e.value)) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.OBJKEYS', wikipedia, '.', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_objkeys_command_jsonpath_wildcard(self): + client = self.server.get_new_client() + + client.execute_command('JSON.SET', k1, '.', + '{"a":{}, "c":{"a":"a", "b":"bb"}, "d":{"a":1, "b":"b", "c":{"a":3,"b":4}}, "e":1}') + + test_cases = [ + (k1, '$.a', [[]]), + (k1, '$.a.*', []), + (k1, '.a', []), + (k1, '$.c', [[b"a", b"b"]]), + (k1, '.c', [b"a", b"b"]), + (k1, '$.d', [[b"a", b"b", b"c"]]), + (k1, '$.d.*', [[], [], [b"a", b"b"]]), + (k1, '.d', [b"a", b"b", b"c"]), + (k1, '.d.*', [b"a", b"b"]), + (k1, '$.*', [[], [b"a", b"b"], [b"a", b"b", b"c"], []]), + (k1, '.*', [b"a", b"b"]), + (k1, '$.c.*', [[], []]), + (k1, '$.c.*', [None, None]) + ] + + for (key, path, exp) in [ + ]: + assert exp == client.execute_command( + 'JSON.OBJKEYS', key, path) + + for (key, path) in [ + (k1, '.c.*'), + (k1, '.e') + ]: + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.OBJLEN', key, path) + assert self.error_class.is_wrongtype_error(str(e.value)) + + def test_json_arrlen_command(self): + client = self.server.get_new_client() + assert 2 == client.execute_command( + 'JSON.ARRLEN', wikipedia, '.phoneNumbers') + + # edge case: empty array + assert 0 == client.execute_command( + 'JSON.ARRLEN', wikipedia, '.children') + + # return should be null if document key does not exist + assert None == client.execute_command( + 'JSON.ARRLEN', foo, '.phoneNumbers') + + # return error if path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRLEN', wikipedia, '.foo') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: the element is not an array + for path in ['.address', '.groups', '.age', '.weight', '.isAlive', '.spouse', '.firstName']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRLEN', wikipedia, path) + assert self.error_class.is_wrongtype_error(str(e.value)) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.ARRLEN', wikipedia, '.phoneNumbers', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_arrlen_command_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '[[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]') + assert b'OK' == client.execute_command('JSON.SET', k2, '.', + '[[], \"a\", [\"a\", \"b\"], [\"a\", \"b\", \"c\"], 4]') + + for (key, path, exp) in [ + (k1, '$.[*]', [0, 1, 2, 3]), + (k2, '$.[*]', [0, None, 2, 3, None]) + ]: + assert exp == client.execute_command( + 'JSON.ARRLEN', key, path) + + def test_json_arrappend_command(self): + client = self.server.get_new_client() + # edge case: append to an empty array + assert 1 == client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.children', '"John"') + assert b'["John"]' == client.execute_command( + 'JSON.GET', wikipedia, '.children') + + # append to non-empty array + assert 3 == client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.children', '"Mary"', '"Tom"') + assert b'["John","Mary","Tom"]' == client.execute_command( + 'JSON.GET', wikipedia, '.children') + assert 3 == client.execute_command( + 'JSON.ARRLEN', wikipedia, '.children') + + # return error if document key does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRAPPEND', foo, '.children', '"Mary"') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.foo', '"abc"') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: the element is not an array + for path in ['.address', '.groups', '.age', '.weight', '.isAlive', '.spouse', '.firstName']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRAPPEND', wikipedia, path, '123') + assert self.error_class.is_wrongtype_error(str(e.value)) + + def test_json_arrappend_command_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '[[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]') + assert b'OK' == client.execute_command('JSON.SET', k2, '.', + '[[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]') + + for (key, path, val, exp, new_val) in [ + (k1, '$.[*]', '"a"', [1, 2, 3, 4], + '[[\"a\"],[\"a\",\"a\"],[\"a\",\"b\",\"a\"],[\"a\",\"b\",\"c\",\"a\"]]'), + (k2, '$.[*]', '""', [1, 2, 3, 4], + '[[\"\"],[\"a\",\"\"],[\"a\",\"b\",\"\"],[\"a\",\"b\",\"c\",\"\"]]') + ]: + assert exp == client.execute_command( + 'JSON.ARRAPPEND', key, path, val) + assert new_val == client.execute_command( + 'JSON.GET', key, path).decode() + + def test_json_arrpop_command(self): + client = self.server.get_new_client() + # edge case: pop an empty array + assert None == client.execute_command( + 'JSON.ARRPOP', wikipedia, '.children') + + # populate the array + assert 3 == client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.children', '"John"', '"Mary"', '"Tom"') + + for (idx, popped_out, new_len, new_val) in [ + (1, '"Mary"', 2, '["John","Tom"]'), + (-1, '"Tom"', 1, '["John"]'), + (0, '"John"', 0, '[]') + ]: + assert popped_out == client.execute_command( + 'JSON.ARRPOP', wikipedia, '.children', idx).decode('utf-8') + assert new_len == client.execute_command( + 'JSON.ARRLEN', wikipedia, '.children') + assert new_val == client.execute_command( + 'JSON.GET', wikipedia, '.children').decode('utf-8') + + # edge case: pop an empty array + assert None == client.execute_command( + 'JSON.ARRPOP', wikipedia, '.children') + + # return error if document key does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRPOP', foo, '.children') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRPOP', wikipedia, '.foo') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: the element is not an array + for path in ['.address', '.groups', '.age', '.weight', '.isAlive', '.spouse', '.firstName']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRPOP', wikipedia, path) + assert self.error_class.is_wrongtype_error(str(e.value)) + + # test large index: larger than int32 + client.execute_command( + 'JSON.ARRPOP', wikipedia, ".children", 3000000000) + + # Wrong number of arguments + res = b'{"type":"home","number":"212 555-1234"}' + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.ARRPOP', wikipedia, '.phoneNumbers', 0, 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_arrpop_command_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '[[], ["a"], ["a", "b"], ["a", "b", "c"]]') + + client.execute_command('COPY', k1, k2) + client.execute_command('COPY', k1, k3) + + for (key, path, index, exp, exp_new_val) in [ + (k1, '$.[*]', 0, [None, b'"a"', b'"a"', b'"a"'], + '[[],[],["b"],["b","c"]]'), + (k2, '$.[*]', 1, [None, b'"a"', b'"b"', b'"b"'], + '[[],[],["a"],["a","c"]]'), + (k3, '$.[*]', -1, [None, b'"a"', b'"b"', b'"c"'], + '[[],[],["a"],["a","b"]]') + ]: + assert exp == client.execute_command( + 'JSON.ARRPOP', key, path, index) + assert exp_new_val == client.execute_command( + 'JSON.GET', key, '.').decode() + + def test_json_arrinsert_command(self): + client = self.server.get_new_client() + # edge case: insert into an empty array + assert 1 == client.execute_command( + 'JSON.ARRINSERT', wikipedia, '.children', 0, '"foo"') + assert b'["foo"]' == client.execute_command( + 'JSON.GET', wikipedia, '.children') + assert b'"foo"' == client.execute_command( + 'JSON.ARRPOP', wikipedia, '.children') + + # populate the array + assert 3 == client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.children', '"John"', '"Mary"', '"Tom"') + assert b'["John","Mary","Tom"]' == client.execute_command( + 'JSON.GET', wikipedia, '.children') + + for (idx, val, new_len, new_val) in [ + (0, '"Kathy"', 4, '["Kathy","John","Mary","Tom"]'), + (2, '"Rose"', 5, '["Kathy","John","Rose","Mary","Tom"]'), + (3, '"Bob"', 6, '["Kathy","John","Rose","Bob","Mary","Tom"]'), + (-1, '"Peter"', 7, + '["Kathy","John","Rose","Bob","Mary","Peter","Tom"]'), + (-1, '"Jane"', 8, + '["Kathy","John","Rose","Bob","Mary","Peter","Jane","Tom"]'), + (8, '"Grace"', 9, + '["Kathy","John","Rose","Bob","Mary","Peter","Jane","Tom","Grace"]') + ]: + assert new_len == client.execute_command( + 'JSON.ARRINSERT', wikipedia, '.children', idx, val) + assert new_val == client.execute_command( + 'JSON.GET', wikipedia, '.children').decode('utf-8') + + # return error if document key does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINSERT', foo, '.children', 0, '"abc"') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINSERT', wikipedia, '.foo', 0, '123') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: the element is not an array + for path in ['.address', '.groups', '.age', '.weight', '.isAlive', '.spouse', '.firstName']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINSERT', wikipedia, path, 0, '1') + assert self.error_class.is_wrongtype_error(str(e.value)) + + # error condition: index arg is out of array boundaries + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINSERT', wikipedia, ".children", 3000000000, '"a"') + assert self.error_class.is_outofboundaries_error(str(e.value)) + + # error condition: index arg is out of array boundaries + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINSERT', wikipedia, ".children", 31, '"a"') + assert self.error_class.is_outofboundaries_error(str(e.value)) + + def test_json_arrinsert_command_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '[[], [0], [0, 1], [0, 1, 2]]') + # COPY NOT SUPPORTED by REJSON + client.execute_command('JSON.SET', k2, '.', + '[[], [0], [0, 1], [0, 1, 2]]') + client.execute_command('JSON.SET', k3, '.', + '[[], [0], [0, 1], [0, 1, 2]]') + + test_cases = [ + (k1, '$.[*]', 0, '3', [1, 2, 3, 4], + '[[3],[3,0],[3,0,1],[3,0,1,2]]'), + (k3, '$.[*]', -1, '3', [1, 2, 3, 4], + '[[3],[3,0],[0,3,1],[0,1,3,2]]') + ] + + # Negative paths beyond array length work strangely in ReJSON + for (key, path, index, val, exp, exp_new_val) in test_cases: + assert exp == client.execute_command( + 'JSON.ARRINSERT', key, path, index, val) + assert exp_new_val == client.execute_command( + 'JSON.GET', key, '.').decode() + + # test index out of bounds error + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINSERT', k2, "$.[*]", 1, '3') + assert self.error_class.is_outofboundaries_error(str(e.value)) + + def test_json_clear_command(self): + client = self.server.get_new_client() + + # populate the array to be cleared + assert 3 == client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.children', '"John"', '"Mary"', '"Tom"') + assert b'["John","Mary","Tom"]' == client.execute_command( + 'JSON.GET', wikipedia, '.children') + + # clears and counts 1, 0 remain + assert 1 == client.execute_command( + 'JSON.CLEAR', wikipedia, '.children') + assert 0 == client.execute_command( + 'JSON.ARRLEN', wikipedia, '.children') + + # clears empty array + assert 0 == client.execute_command( + 'JSON.CLEAR', wikipedia, '.children') + assert 0 == client.execute_command( + 'JSON.ARRLEN', wikipedia, '.children') + + # if path does not exist, the command should return 0 + assert 0 == client.execute_command( + 'JSON.CLEAR', wikipedia, '.foo') + + # if the value at the path is not a container, the command should return 0 + assert b'OK' == client.execute_command( + 'JSON.SET', wikipedia, '.foobool', 'false') + assert 0 == client.execute_command( + 'JSON.CLEAR', wikipedia, '.foobool') + + # clear the wikipedia object entirely + wikipedia_objlen = client.execute_command( + 'JSON.OBJLEN', wikipedia) + assert 0 != wikipedia_objlen + assert 1 == client.execute_command( + 'JSON.CLEAR', wikipedia) + assert 0 == client.execute_command( + 'JSON.OBJLEN', wikipedia) + + def test_json_clear_command_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '{"a":{}, "b":{"a": 1, "b": null, "c": true}, "c":1, "d":true, "e":null, "f":"d"}') + assert b'OK' == client.execute_command('JSON.SET', k2, '.', + '[[], [0], [0,1], [0,1,2], 1, true, null, "d"]') + + test_cases = [ + (k1, '$.*', 4, '{"a":{},"b":{},"c":0,"d":false,"e":null,"f":""}'), + (k2, '$[*]', 6, '[[],[],[],[],0,false,null,""]') + ] + + for (key, path, exp, exp_new_val) in test_cases: + assert exp == client.execute_command( + 'JSON.CLEAR', key, path) + assert exp_new_val == client.execute_command( + 'JSON.GET', key, '.').decode() + + def test_json_arrtrim_command(self): + client = self.server.get_new_client() + # edge case: empty array + assert 0 == client.execute_command( + 'JSON.ARRTRIM', wikipedia, '.children', 0, 1) + + # populate the array + assert 3 == client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.children', '"John"', '"Mary"', '"Tom"') + assert b'["John","Mary","Tom"]' == client.execute_command( + 'JSON.GET', wikipedia, '.children') + + for (path, start, end, new_len, new_val) in [ + ('.children', 1, 2, 2, '["Mary","Tom"]'), + ('.children', 0, 0, 1, '["Mary"]'), + ('.children', -1, 5, 1, '["Mary"]'), + ('.phoneNumbers', 2, 0, 0, '[]') + ]: + assert new_len == client.execute_command( + 'JSON.ARRTRIM', wikipedia, path, start, end) + assert new_val == client.execute_command( + 'JSON.GET', wikipedia, path).decode('utf-8') + + # return error if document key does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRTRIM', foo, '.children', 0, 1) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRTRIM', wikipedia, '.foo', 0, 3) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: the element is not an array + for path in ['.address', '.groups', '.age', '.weight', '.isAlive', '.spouse', '.firstName']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRTRIM', wikipedia, path, 0, 1) + assert self.error_class.is_wrongtype_error(str(e.value)) + + # test large index: larger than int32 + client.execute_command( + 'JSON.ARRTRIM', wikipedia, ".phoneNumbers", 3000000000, 3000000001) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.ARRTRIM', wikipedia, '.phoneNumbers', 0, 1, 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_arrtrim_command_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '[[], ["a"], ["a", "b"], ["a", "b", "c"]]') + assert b'OK' == client.execute_command('JSON.SET', k2, '.', + '[[], ["a"], ["a", "b"], ["a", "b", "c"]]') + assert b'OK' == client.execute_command('JSON.SET', k3, '.', + '[[], [0], [0,1], [0,1,2], [0,1,2,3]]') + + for (key, path, start, stop, exp, exp_new_val) in [ + (k1, '$.[*]', 0, 1, [0, 1, 2, 2], + '[[],["a"],["a","b"],["a","b"]]'), + (k2, '$.[*]', 1, 1, [0, 0, 1, 1], '[[],[],["b"],["b"]]'), + (k3, '$.[*]', 1, 2, [0, 0, 1, 2, 2], '[[],[],[1],[1,2],[1,2]]') + ]: + assert exp == client.execute_command( + 'JSON.ARRTRIM', key, path, start, stop) + assert exp_new_val == client.execute_command( + 'JSON.GET', key, '.').decode() + + def test_json_arrindex_command(self): + # edge case: empty array + client = self.server.get_new_client() + assert -1 == client.execute_command( + 'JSON.ARRINDEX', wikipedia, '.children', '"tom"') + + # populate the array + assert 5 == client.execute_command('JSON.ARRAPPEND', wikipedia, '.children', + '"John"', '"Mary"', '"Tom"', '"Paul"', '"Peter"') + + for (val, idx) in [ + ('"John"', 0), + ('"Tom"', 2), + ('"Peter"', 4), + ('"Peter2"', -1) + ]: + assert idx == client.execute_command( + 'JSON.ARRINDEX', wikipedia, '.children', val) + + for (val, start, stop, idx) in [ + ('"Tom"', 5, 0, -1), + ('"Paul"', 0, 4, 3), + ('"Paul"', 0, 0, 3) + ]: + assert idx == client.execute_command( + 'JSON.ARRINDEX', wikipedia, '.children', val, start, stop) + + assert b'OK' == client.execute_command( + 'JSON.SET', arr, '.', '[0, 1, 2, 3, 4]') + assert 1 == client.execute_command( + 'JSON.ARRINDEX', arr, '.', '1') + assert - \ + 1 == client.execute_command( + 'JSON.ARRINDEX', arr, '.', '1', '2') + + # return error if document key does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINDEX', foo, '.children', 0, 5) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINDEX', wikipedia, '.foo', 0, 3) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # error condition: the element is not an array + for path in ['.address', '.groups', '.age', '.weight', '.isAlive', '.spouse', '.firstName']: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.ARRINDEX', wikipedia, path, '1') + assert self.error_class.is_wrongtype_error(str(e.value)) + + # test large index: larger than int32 + client.execute_command( + 'JSON.ARRINDEX', wikipedia, ".phoneNumbers", '1', 3000000000, 0) + + def test_json_arrindex_command_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '[[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]') + assert b'OK' == client.execute_command('JSON.SET', k2, '.', + '[[], [0], [0,1], [0,1,2]]') + assert b'OK' == client.execute_command('JSON.SET', k3, '.', + '[[], [0,true], [0,1,false], [0,1,2,null,true]]') + + for (key, path, val, exp) in [ + (k1, '$.[*]', '"a"', [-1, 0, 0, 0]), + (k1, '$.[*]', '"b"', [-1, -1, 1, 1]), + (k1, '$.[*]', '"c"', [-1, -1, -1, 2]), + (k2, '$.[*]', '1', [-1, -1, 1, 1]), + (k2, '$.[*]', '2', [-1, -1, -1, 2]), + (k2, '$.[*]', 'true', [-1, -1, -1, -1]), + (k3, '$.[*]', 'true', [-1, 1, -1, 4]), + (k3, '$.[*]', 'null', [-1, -1, -1, 3]) + ]: + assert exp == client.execute_command( + 'JSON.ARRINDEX', key, path, val) + + def test_json_arrindex_should_not_limit_to_scalar_value(self): + client = self.server.get_new_client() + client.execute_command( + 'JSON.SET', k1, '.', '[5, 6, {"a":"b"}, [99,100]]') + assert 2 == client.execute_command( + 'JSON.ARRINDEX', k1, '.', '{"a":"b"}', 0, 0) + assert 3 == client.execute_command( + 'JSON.ARRINDEX', k1, '.', '[99,100]', 0, 0) + assert 0 == client.execute_command( + 'JSON.ARRINDEX', k1, '.', '5', 0, 0) + assert 1 == client.execute_command( + 'JSON.ARRINDEX', k1, '.', '6', 0, 0) + + # Wrong number of arguments + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.ARRINDEX', wikipedia, '.phoneNumbers', '1', 0, 3, 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_arrindex_complex_v2_path(self): + client = self.server.get_new_client() + + json_string = '{"level0":{"level1_0":{"level2":[1,2,3, [25, [4,5,{"c":"d"}]]]},"level1_1":{"level2":[[{"a":[2,5]},true,null]]}}}' + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', json_string) + for (path, val, exp) in [ + ("$..level0.level1_0..", b'[4,5,{"c":"d"}]', [ + None, -1, 1, -1, None]), + ("$..level0.level1_0..", b'[25,[4,5,{"c":"d"}]]', [ + None, 3, -1, -1, None]), + ("$..level0.level1_0..", b'{"c":"d"}', + [None, -1, -1, 2, None]), + ("$..level0.level1_1..", b'[{"a":[2,5]},true,null]', [ + None, 0, -1, None, -1]), + ("$..level0.level1_1..", b'[null,true,{"a":[2,5]}]', [ + None, -1, -1, None, -1]), + ("$..level0.level1_1..", b'[{"a":[2,5]},true]', [ + None, -1, -1, None, -1]), + ("$..level0.level1_0..", b'[4,{"c":"d"}]', [ + None, -1, -1, -1, None]) + ]: + assert exp == client.execute_command( + 'JSON.ARRINDEX', k1, path, val) + + def test_json_type_command(self): + client = self.server.get_new_client() + for (path, type) in [ + ('.', 'object'), + ('.groups', 'object'), + ('.phoneNumbers', 'array'), + ('.children', 'array'), + ('.isAlive', 'boolean'), + ('.spouse', 'null'), + ('.address.city', 'string'), + ('.age', 'integer'), + ('.weight', 'number') + ]: + assert type == client.execute_command( + 'JSON.TYPE', wikipedia, path).decode('utf-8') + + # return should be null if document key does not exist + assert None == client.execute_command('JSON.TYPE', foo) + + # return should be null if path does not exist + assert None == client.execute_command( + 'JSON.TYPE', wikipedia, '.foo') + + # Wrong number of arguments + + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.TYPE', wikipedia, '.phoneNumbers', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_type_command_jsonpath(self): + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '{"a":1, "b":2.3, "c":"foo", "d":true, "e":null, "f":{}, "g":[]}') + assert b'OK' == client.execute_command('JSON.SET', k2, '.', + '[1, 2.3, "foo", true, null, {}, []]') + + for (key, path, exp) in [ + (k1, '$.*', [b"integer", b"number", b"string", + b"boolean", b"null", b"object", b"array"]), + (k2, '$[*]', [b"integer", b"number", b"string", + b"boolean", b"null", b"object", b"array"]) + ]: + assert exp == client.execute_command( + 'JSON.TYPE', key, path) + + def test_json_resp_command(self): + client = self.server.get_new_client() + for (path, res) in [ + ('.firstName', b'John'), + ('.isAlive', b'true'), + ('.age', 27), + ('.spouse', None) + ]: + assert res == client.execute_command( + 'JSON.RESP', wikipedia, path) + + for (path, res) in [('.weight', '135.17')]: + assert res == client.execute_command( + 'JSON.RESP', wikipedia, path).decode() + + arr = client.execute_command( + 'JSON.RESP', wikipedia, '.children') + assert 1 == len(arr) + assert arr == [b'['] + + arr = client.execute_command( + 'JSON.RESP', wikipedia, '.address') + assert 5 == len(arr) + + assert arr[0:4] == [b'{', [b'street', b'21 2nd Street'], [ + b'city', b'New York'], [b'state', b'NY']] + assert b'10021-3100' == arr[4][1] + + arr = client.execute_command( + 'JSON.RESP', wikipedia, '.phoneNumbers') + assert 3 == len(arr) + assert b'[' == arr[0] + + assert 3 == len(arr[1]) + assert arr[1] == [b'{', [b'type', b'home'], + [b'number', b'212 555-1234']] + assert 3 == len(arr[2]) + assert arr[2] == [b'{', [b'type', b'office'], + [b'number', b'646 555-4567']] + + # return should be null if document key does not exist + assert None == client.execute_command('JSON.RESP', foo) + + # error condition: path does not exist + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.RESP', wikipedia, '.foo') + assert self.error_class.is_nonexistent_error(str(e.value)) + + # Wrong number of arguments + + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.RESP', wikipedia, '.phoneNumbers', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + def test_json_resp_command_jsonpath(self): + client = self.server.get_new_client() + + for (path, res) in [ + ('$.firstName', [b'John']), + ('$.isAlive', [b'true']), + ('$.age', [27]), + ('$.spouse', [None]), + ('$.foo', []) + ]: + assert res == client.execute_command( + 'JSON.RESP', wikipedia, path) + + for (path, res) in [('$.weight', '135.17')]: + assert res == client.execute_command( + 'JSON.RESP', wikipedia, path)[0].decode() + + arr = client.execute_command( + 'JSON.RESP', wikipedia, '$.children') + assert 1 == len(arr) + assert arr == [[b'[']] + + arr = client.execute_command( + 'JSON.RESP', wikipedia, '$.address.*') + assert 4 == len(arr) + assert arr == [b'21 2nd Street', b'New York', b'NY', b'10021-3100'] + + arr = client.execute_command( + 'JSON.RESP', wikipedia, '$.phoneNumbers.*') + assert 2 == len(arr) + assert arr[0] == [b'{', [b'type', b'home'], + [b'number', b'212 555-1234']] + assert arr[1] == [b'{', [b'type', b'office'], + [b'number', b'646 555-4567']] + + def test_json_debug_memory(self): + # non-existent key + client = self.server.get_new_client() + + assert None == client.execute_command( + 'JSON.DEBUG MEMORY', nonexistentkey) + + # non-existent path + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.DEBUG MEMORY', wikipedia, nonexistentpath) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # syntax error: key not provided + with pytest.raises(ResponseError) as e: + client.execute_command('JSON.DEBUG MEMORY') + assert str(e.value).startswith('wrong number of arguments') + + # syntax error: wrong subcommand + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.DEBUG MEMORY123', wikipedia) + assert self.error_class.is_syntax_error(str(e.value)) + + with pytest.raises(ResponseError) as e: + client.execute_command('JSON.DEBUG M', wikipedia) + assert self.error_class.is_syntax_error(str(e.value)) + + with pytest.raises(ResponseError) as e: + assert None == client.execute_command( + 'JSON.DEBUG MEMORY', wikipedia, '.', 'extra') + assert str(e.value).find('wrong number of arguments') >= 0 + + # Test shared path + no_shared_mem = client.execute_command( + 'JSON.DEBUG', 'MEMORY', wikipedia) + with_shared_mem = client.execute_command( + 'JSON.DEBUG', 'MEMORY', wikipedia, '.') + assert with_shared_mem > no_shared_mem + + def test_json_duplicate_keys(self): + client = self.server.get_new_client() + '''Test handling of object with duplicate keys''' + client.execute_command( + 'JSON.SET', k1, '.', '{"a":0, "a":1}') + assert b'{"a":1}' == client.execute_command( + 'JSON.GET', k1, '.') + assert [b"a"] == client.execute_command( + 'JSON.OBJKEYS', k1) + assert b'1' == client.execute_command( + 'JSON.GET', k1, 'a') + assert 1 == client.execute_command('JSON.OBJLEN', k1) + client.execute_command('JSON.SET', k1, 'a', '2') + assert b'{"a":2}' == client.execute_command( + 'JSON.GET', k1, '.') + client.execute_command('JSON.NUMINCRBY', k1, 'a', '2') + assert b'{"a":4}' == client.execute_command( + 'JSON.GET', k1, '.') + client.execute_command('JSON.NUMMULTBY', k1, 'a', '2') + assert b'{"a":8}' == client.execute_command( + 'JSON.GET', k1, '.') + client.execute_command('JSON.DEL', k1, 'a') + assert b'{}' == client.execute_command( + 'JSON.GET', k1, '.') + + def test_json_set_command_max_depth(self): + client = self.server.get_new_client() + + def json_with_depth(depth): + return '{"a":'*depth + '{}' + '}'*depth + + depth_limit = 128 + client.execute_command( + 'config set json.max-path-limit ' + str(depth_limit)) + + # json not too deep: ok + assert b'OK' == client.execute_command( + 'JSON.SET', k, '.', json_with_depth(127)) + + # error condition: json is too deep + with pytest.raises(ResponseError) as e: + json_deep = json_with_depth(200000) + client.execute_command( + 'JSON.SET', k, '.', json_deep) + assert self.error_class.is_limit_exceeded_error(str(e.value)) + + def test_json_set_command_max_size(self): + client = self.server.get_new_client() + + def json_with_size(size): + return '"' + 'a'*(size-2) + '"' + + MB = 2**20 + assert b'OK' == client.execute_command( + 'JSON.SET', k, '.', json_with_size(33*MB)) + + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', k, '.', json_with_size(64*MB)) + assert self.error_class.is_limit_exceeded_error(str(e.value)) + + def test_multi_exec(self): + client = self.server.get_new_client() + client.execute_command('MULTI') + client.execute_command( + 'JSON.DEL', wikipedia, '.address.street') + client.execute_command( + 'JSON.DEL', wikipedia, '.address.zipcode') + client.execute_command( + 'JSON.SET', wikipedia, '.address.region', '"US East"') + client.execute_command('EXEC') + v = client.execute_command( + 'JSON.GET', wikipedia, '.address').decode() + assert v == '{"city":"New York","state":"NY","region":"US East"}' or \ + v == '{"state":"NY","city":"New York","region":"US East"}' + + client.execute_command('MULTI') + client.execute_command( + 'JSON.ARRPOP', wikipedia, '.phoneNumbers') + client.execute_command( + 'JSON.ARRPOP', wikipedia, '.phoneNumbers') + client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.phoneNumbers', '123') + client.execute_command( + 'JSON.ARRAPPEND', wikipedia, '.phoneNumbers', '456') + client.execute_command('EXEC') + v = client.execute_command( + 'JSON.GET', wikipedia, '.phoneNumbers').decode() + assert v == '[123,456]' + + def test_escaped_member_names(self): + ''' + Test accessing member names that contain escaped characters. + ''' + client = self.server.get_new_client() + + assert b'OK' == client.execute_command('JSON.SET', k1, '.', + '{"a\\\\a":1, "b\\tb":2, "c\\nc":3, "d\\rd":4, "e\\be":5, "f\\"f":6, "":7, "\'":8}') + assert b'OK' == client.execute_command('JSON.SET', k2, '.', + '{"key\\u0000":"value\\u0000", "key\\u001F":"value\\u001F"}') + for (key, path, exp) in [ + (k1, '$["a\\\\a"]', '[1]'), + (k1, "$['a\\a']", '[1]'), + (k1, '$["b\\tb"]', '[2]'), + (k1, "$['b\\tb']", '[2]'), + (k1, '$["c\\nc"]', '[3]'), + (k1, "$['c\\nc']", '[3]'), + (k1, '$["d\\rd"]', '[4]'), + (k1, "$['d\\rd']", '[4]'), + (k1, '$["e\\be"]', '[5]'), + (k1, "$['e\\be']", '[5]'), + (k1, '$["f\\"f"]', '[6]'), + (k1, "$['f\"f']", '[6]'), + (k1, '$[""]', '[7]'), + (k1, "$['']", '[7]'), + (k1, '$["\'"]', '[8]'), + (k1, "$['\\\'']", '[8]'), + (k2, '$["key\\u0000"]', '["value\\u0000"]'), + (k2, '$["key\\u001F"]', '["value\\u001F"]') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path).decode() + + def test_serializing_escaped_quotes_in_member_name(self): + client = self.server.get_new_client() + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', '{"\\"a":1, "\\"b":2}') + for (path, space, exp) in [ + ('.', None, '{"\\"a":1,"\\"b":2}'), + ('.', ' ', '{"\\"a": 1,"\\"b": 2}') + ]: + if space is None: + assert exp == client.execute_command( + 'JSON.GET', k1, path).decode() + else: + assert exp == client.execute_command( + 'JSON.GET', k1, 'space', space, path).decode() + + def test_json_numincrby_jsonpath_and_wildcard(self): + client = self.server.get_new_client() + + client.execute_command( + 'JSON.SET', k1, '.', '{"a":[], "b":[1], "c":[1,2], "d":[1,2,3]}') + client.execute_command( + 'JSON.SET', k2, '.', '{"a":{}, "b":{"a":1}, "c":{"a":1, "b":2}, "d":{"a":1, "b":2, "c":3}}') + client.execute_command( + 'JSON.SET', k3, '.', '{"a":{"a":"a"}, "b":{"a":"a", "b":1}, "c":{"a":"a", "b":"b"}, "d":{"a":1, "b":"b", "c":3}}') + + # JSONPath: return an array of values. + # If a value is not a number, its corresponding returned element is JSON null. + # NOTE: The expected value has accounted for the outcome of previous commands on the same key. + for (cmd, key, path, incr_num, exp) in [ + ('JSON.NUMINCRBY', k1, '$.a.*', '1', '[]'), + ('JSON.GET', k1, '$.a.*', None, '[]'), + ('JSON.NUMINCRBY', k1, '$.b.*', '1', '[2]'), + ('JSON.GET', k1, '$.b.*', None, '[2]'), + ('JSON.NUMINCRBY', k1, '$.b[*]', '1', '[3]'), + ('JSON.GET', k1, '$.b[*]', None, '[3]'), + ('JSON.NUMINCRBY', k1, '$.d.*', '1', '[2,3,4]'), + ('JSON.GET', k1, '$.d.*', None, '[2,3,4]'), + ('JSON.NUMINCRBY', k1, '$.d[*]', '1', '[3,4,5]'), + ('JSON.GET', k1, '$.d[*]', None, '[3,4,5]'), + ('JSON.NUMINCRBY', k2, '$.a.*', '1', '[]'), + ('JSON.GET', k2, '$.a.*', None, '[]'), + ('JSON.NUMINCRBY', k2, '$.b.*', '1', '[2]'), + ('JSON.GET', k2, '$.b.*', None, '[2]'), + ('JSON.NUMINCRBY', k2, '$.d.*', '1', '[2,3,4]'), + ('JSON.GET', k2, '$.d.*', None, '[2,3,4]'), + ('JSON.NUMINCRBY', k3, '$.a.*', '1', '[null]'), + ('JSON.GET', k3, '$.a.*', None, '["a"]'), + ('JSON.NUMINCRBY', k3, '$.b.*', '1', '[null,2]'), + ('JSON.GET', k3, '$.b.*', None, '["a",2]'), + ('JSON.NUMINCRBY', k3, '$.c.*', '1', '[null,null]'), + ('JSON.GET', k3, '$.c.*', None, '["a","b"]'), + ('JSON.NUMINCRBY', k3, '$.d.*', '1', '[2,null,4]'), + ('JSON.GET', k3, '$.d.*', None, '[2,"b",4]') + ]: + if incr_num is not None: + assert exp.encode() == client.execute_command(cmd, key, path, incr_num) + else: + assert exp.encode() == client.execute_command(cmd, key, path) + + def test_json_numincrby_legacy_path_and_wildcard(self): + client = self.server.get_new_client() + client.execute_command( + 'JSON.SET', k1, '.', '{"a":[], "b":[1], "c":[1,2], "d":[1,2,3]}') + client.execute_command( + 'JSON.SET', k2, '.', '{"a":{}, "b":{"a":1}, "c":{"a":1, "b":2}, "d":{"a":1, "b":2, "c":3}}') + client.execute_command( + 'JSON.SET', k3, '.', '{"a":{"a":"a"}, "b":{"a":"a", "b":1}, "c":{"a":"a", "b":"b"}, "d":{"a":1, "b":"b", "c":3}}') + + # Legacy path: return NONEXISTENT error if no value is selected + for (cmd, key, path, incr_num, exp) in [ + ('JSON.NUMINCRBY', k1, '.a.*', '1', None), + ('JSON.NUMINCRBY', k2, '.a.*', '1', None) + ]: + with pytest.raises(ResponseError) as e: + assert exp == client.execute_command( + cmd, key, path, incr_num) + assert self.error_class.is_nonexistent_error(str(e.value)) + with pytest.raises(ResponseError) as e: + assert exp == client.execute_command( + 'JSON.GET', key, path) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # Legacy path: return WRONGTYPE error if no number value is selected + for (cmd, key, path, incr_num, exp) in [ + ('JSON.NUMINCRBY', k3, '.a.*', '1', None), + ('JSON.NUMINCRBY', k3, '.c.*', '1', None) + ]: + with pytest.raises(ResponseError) as e: + assert exp == client.execute_command( + cmd, key, path, incr_num) + assert self.error_class.is_wrongtype_error(str(e.value)) + + # Legacy path: return a single value, which is the last updated value. + # NOTE: The expected value has accounted for the outcome of previous commands on the same key. + for (cmd, key, path, incr_num, exp) in [ + ('JSON.NUMINCRBY', k1, '.b.*', '1', '2'), + ('JSON.GET', k1, '.b.*', None, '2'), + ('JSON.NUMINCRBY', k1, '.b[*]', '1', '3'), + ('JSON.GET', k1, '.b[*]', None, '3'), + ('JSON.NUMINCRBY', k1, '.d.*', '1', '4'), + ('JSON.GET', k1, '.d.*', None, '2'), + ('JSON.NUMINCRBY', k1, '.d[*]', '1', '5'), + ('JSON.GET', k1, '.d[*]', None, '3'), + ('JSON.NUMINCRBY', k2, '.b.*', '1', '2'), + ('JSON.GET', k2, '.b.*', None, '2'), + ('JSON.NUMINCRBY', k2, '.d.*', '1', '4'), + ('JSON.GET', k2, '.d.*', None, '2'), + ('JSON.NUMINCRBY', k3, '.b.*', '1', '2'), + ('JSON.GET', k3, '.b.*', None, '"a"'), + ('JSON.NUMINCRBY', k3, '.d.*', '1', '4'), + ('JSON.GET', k3, '.d.*', None, '2') + ]: + if incr_num is not None: + assert exp.encode() == client.execute_command(cmd, key, path, incr_num) + else: + assert exp.encode() == client.execute_command(cmd, key, path) + + def test_json_nummultby_jsonpath_and_wildcard(self): + client = self.server.get_new_client() + + client.execute_command( + 'JSON.SET', k1, '.', '{"a":[], "b":[1], "c":[1,2], "d":[1,2,3]}') + client.execute_command( + 'JSON.SET', k2, '.', '{"a":{}, "b":{"a":1}, "c":{"a":1, "b":2}, "d":{"a":1, "b":2, "c":3}}') + client.execute_command( + 'JSON.SET', k3, '.', '{"a":{"a":"a"}, "b":{"a":"a", "b":1}, "c":{"a":"a", "b":"b"}, "d":{"a":1, "b":"b", "c":3}}') + + # JSONPath: return an array of values. + # If a value is not a number, its corresponding returned element is JSON null. + # NOTE: The expected value has accounted for the outcome of previous commands on the same key. + for (cmd, key, path, incr_num, exp) in [ + ('JSON.NUMMULTBY', k1, '$.a.*', '2', '[]'), + ('JSON.GET', k1, '$.a.*', None, '[]'), + ('JSON.NUMMULTBY', k1, '$.b.*', '2', '[2]'), + ('JSON.GET', k1, '$.b.*', None, '[2]'), + ('JSON.NUMMULTBY', k1, '$.b[*]', '2', '[4]'), + ('JSON.GET', k1, '$.b[*]', None, '[4]'), + ('JSON.NUMMULTBY', k1, '$.d.*', '2', '[2,4,6]'), + ('JSON.GET', k1, '$.d.*', None, '[2,4,6]'), + ('JSON.NUMMULTBY', k1, '$.d[*]', '2', '[4,8,12]'), + ('JSON.GET', k1, '$.d[*]', None, '[4,8,12]'), + ('JSON.NUMMULTBY', k2, '$.a.*', '2', '[]'), + ('JSON.GET', k2, '$.a.*', None, '[]'), + ('JSON.NUMMULTBY', k2, '$.b.*', '2', '[2]'), + ('JSON.GET', k2, '$.b.*', None, '[2]'), + ('JSON.NUMMULTBY', k2, '$.d.*', '2', '[2,4,6]'), + ('JSON.GET', k2, '$.d.*', None, '[2,4,6]'), + ('JSON.NUMMULTBY', k3, '$.a.*', '2', '[null]'), + ('JSON.GET', k3, '$.a.*', None, '["a"]'), + ('JSON.NUMMULTBY', k3, '$.b.*', '2', '[null,2]'), + ('JSON.GET', k3, '$.b.*', None, '["a",2]'), + ('JSON.NUMMULTBY', k3, '$.c.*', '2', '[null,null]'), + ('JSON.GET', k3, '$.c.*', None, '["a","b"]'), + ('JSON.NUMMULTBY', k3, '$.d.*', '2', '[2,null,6]'), + ('JSON.GET', k3, '$.d.*', None, '[2,"b",6]') + ]: + if incr_num is not None: + assert exp.encode() == client.execute_command(cmd, key, path, incr_num) + else: + assert exp.encode() == client.execute_command(cmd, key, path) + + def test_json_nummultby_legacy_path_and_wildcard(self): + client = self.server.get_new_client() + + client.execute_command( + 'JSON.SET', k1, '.', '{"a":[], "b":[1], "c":[1,2], "d":[1,2,3]}') + client.execute_command( + 'JSON.SET', k2, '.', '{"a":{}, "b":{"a":1}, "c":{"a":1, "b":2}, "d":{"a":1, "b":2, "c":3}}') + client.execute_command( + 'JSON.SET', k3, '.', '{"a":{"a":"a"}, "b":{"a":"a", "b":1}, "c":{"a":"a", "b":"b"}, "d":{"a":1, "b":"b", "c":3}}') + + # Legacy path: return NONEXISTENT error if no value is selected + for (cmd, key, path, incr_num, exp) in [ + ('JSON.NUMMULTBY', k1, '.a.*', '2', None), + ('JSON.NUMMULTBY', k2, '.a.*', '2', None) + ]: + with pytest.raises(ResponseError) as e: + assert exp == client.execute_command( + cmd, key, path, incr_num) + assert self.error_class.is_nonexistent_error(str(e.value)) + with pytest.raises(ResponseError) as e: + assert exp == client.execute_command( + 'JSON.GET', key, path) + assert self.error_class.is_nonexistent_error(str(e.value)) + + # Legacy path: return WRONGTYPE error if no number value is selected + for (cmd, key, path, incr_num, exp) in [ + ('JSON.NUMMULTBY', k3, '.a.*', '2', None), + ('JSON.NUMMULTBY', k3, '.c.*', '2', None) + ]: + with pytest.raises(ResponseError) as e: + assert exp == client.execute_command( + cmd, key, path, incr_num) + + assert self.error_class.is_wrongtype_error(str(e.value)) + + # Legacy path: return a single value, which is the last updated value. + # NOTE: The expected value has accounted for the outcome of previous commands on the same key. + for (cmd, key, path, incr_num, exp) in [ + ('JSON.NUMMULTBY', k1, '.b.*', '2', '2'), + ('JSON.GET', k1, '.b.*', None, '2'), + ('JSON.NUMMULTBY', k1, '.b[*]', '2', '4'), + ('JSON.GET', k1, '.b[*]', None, '4'), + ('JSON.NUMMULTBY', k1, '.d.*', '2', '6'), + ('JSON.GET', k1, '.d.*', None, '2'), + ('JSON.NUMMULTBY', k1, '.d[*]', '2', '12'), + ('JSON.GET', k1, '.d[*]', None, '4'), + ('JSON.NUMMULTBY', k2, '.b.*', '2', '2'), + ('JSON.GET', k2, '.b.*', None, '2'), + ('JSON.NUMMULTBY', k2, '.d.*', '2', '6'), + ('JSON.GET', k2, '.d.*', None, '2'), + ('JSON.NUMMULTBY', k3, '.b.*', '2', '2'), + ('JSON.GET', k3, '.b.*', None, '"a"'), + ('JSON.NUMMULTBY', k3, '.d.*', '2', '6'), + ('JSON.GET', k3, '.d.*', None, '2') + ]: + if incr_num is not None: + assert exp.encode() == client.execute_command(cmd, key, path, incr_num) + else: + assert exp.encode() == client.execute_command(cmd, key, path) + + def test_json_digest(self): + client = self.server.get_new_client() + orig_digest = client.debug_digest() + assert orig_digest != 0 + client.execute_command("flushall") + new_digest = client.debug_digest() + assert int(new_digest) == 0 + + def test_big_dup(self): + client = self.server.get_new_client() + # This test is to test memory leak + + for fle in glob.glob("data/*.json"): + with open(fle, 'r') as file: + self.data = file.read() + logging.debug("File %s is size %d" % (fle, len(self.data))) + b0 = client.info(JSON_INFO_METRICS_SECTION)[ + JSON_INFO_NAMES['total_memory_bytes']] + try: + client.execute_command( + "json.set x .", self.data) + except: + pass + try: + client.execute_command("json.del x .") + except: + pass + b2 = client.info(JSON_INFO_METRICS_SECTION)[ + JSON_INFO_NAMES['total_memory_bytes']] + assert b2 == b0 + + def test_jsonpath_filter_expression(self): + client = self.server.get_new_client() + + store = ''' + { + "store": { + "books": [ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + }, + { + "category": "fiction", + "author": "Evelyn Waugh", + "title": "Sword of Honour", + "price": 12.99, + "movies": [ + { + "title": "Sword of Honour", + "realisator": { + "first_name": "Bill", + "last_name": "Anderson" + } + } + ] + }, + { + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 9 + }, + { + "category": "fiction", + "author": "J. R. R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-395-19395-8", + "price": 22.99 + } + ], + "bicycle": { + "color": "red", + "price": 19.95 + } + } + } + ''' + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', store) + assert b'OK' == client.execute_command( + 'JSON.SET', k2, '.', '[1,2,3,4,5]') + assert b'OK' == client.execute_command( + 'JSON.SET', k3, '.', '[true,false,true,false,null,1,2,3,4]') + assert b'OK' == client.execute_command('JSON.SET', k4, '.', + '{"books": [{"price":5,"sold":true,"in-stock":true,"title":"foo"}, {"price":15,"sold":false,"title":"abc"}]}') + assert b'OK' == client.execute_command( + 'JSON.SET', k5, '.', '[1,2,3,4,5,6,7,8,9]') + + for (key, path, exp) in [ + (k1, '$.store.books[?(@.isbn)].price', + b'[9,22.99]'), + (k1, '$.store.books[?( @.isbn )].price', + b'[9,22.99]'), + (k1, '$.store.books[?(@["isbn"])]["price"]', + b'[9,22.99]'), + (k1, '$.store.books[?(@[ "isbn" ])][ "price" ]', + b'[9,22.99]'), + (k1, '$.store.books[?(@[\'isbn\'])][\'price\']', + b'[9,22.99]'), + (k1, + '$.store.books[?(@.category == "reference")].price', b'[8.95]'), + (k1, '$.store.books[?(@.["category"] == "fiction")].price', + b'[12.99,9,22.99]'), + (k1, '$.store.books[?(@.price<1.0E1)].price', + b'[8.95,9]'), + (k1, '$.store.books[?(@["price"]<1.0E1)]["price"]', + b'[8.95,9]'), + (k1, '$.store.books[?(@.["price"]<1.0E1)]["price"]', + b'[8.95,9]'), + (k1, '$.store.books[?(@[\'price\']<1.0E1)][\'price\']', + b'[8.95,9]'), + (k1, + '$.store.books[?(@.price>-1.23e1&&@.price<1.0E1)].price', b'[8.95,9]'), + (k1, '$.store.books[?(@["price"]>-1.23e1&&@["price"]<1.0E1)]["price"]', + b'[8.95,9]'), + (k1, '$.store.books[?(@.["price"]>-1.23e1&&@.["price"]<1.0E1)].["price"]', b'[8.95,9]'), + (k1, '$.store.books[?(@["price"] > -1.23e1 && @["price"] < 1.0E1)]["price"]', b'[8.95,9]'), + (k1, '$.store.books[?(@.price==22.99)].title', + b'["The Lord of the Rings"]'), + (k1, '$.store.books[?(@["price"]==22.99)].title', + b'["The Lord of the Rings"]'), + (k1, + '$.store.books[?(@.price<10.0&&@.isbn)].price', b'[9]'), + (k1, '$.store.books[?(@.price<9||@.price>20)].price', + b'[8.95,22.99]'), + # precedence test + (k1, '$.store.books[?(@.price<9||@.price>10&&@.isbn)].price', + b'[8.95,22.99]'), + # precedence test + (k1, + '$.store.books[?((@.price<9||@.price>10)&&@.isbn)].price', b'[22.99]'), + # precedence test + (k1, + '$.store.books[?((@.price < 9 || @.price>10) && @.isbn)].price', b'[22.99]'), + # precedence test + (k1, '$.store.books[?((@["price"]<9||@["price"]>10)&&@["isbn"])]["price"]', b'[22.99]'), + # precedence test + (k1, '$.store.books[?((@["price"] < 9 || @["price"] > 10) && @["isbn"])]["price"]', b'[22.99]'), + (k2, '$.*.[?(@>2)]', + b'[3,4,5]'), + (k2, '$.*.[?(@ > 2)]', + b'[3,4,5]'), + (k2, '$.*[?(@>2)]', + b'[3,4,5]'), + (k2, '$[*][?(@>2)]', + b'[3,4,5]'), + (k2, '$[ * ][?( @ > 2 )]', + b'[3,4,5]'), + (k2, '$.*[?(@ == 3)]', + b'[3]'), + (k2, '$.*[?(@ != 3)]', + b'[1,2,4,5]'), + (k3, '$.*.[?(@==true)]', + b'[true,true]'), + (k3, '$[*][?(@==true)]', + b'[true,true]'), + (k3, '$[*][?(@ == true)]', + b'[true,true]'), + (k3, '$.*.[?(@>1)]', + b'[2,3,4]'), + (k3, '$.*.[?( @ > 1 ) ]', + b'[2,3,4]'), + (k3, '$[*][?(@>1)]', + b'[2,3,4]'), + (k3, '$[ * ][?( @ > 1 ) ]', + b'[2,3,4]'), + (k4, '$.books[?(@.price>1&&@.price<20&&@.in-stock)]', + b'[{"price":5,"sold":true,"in-stock":true,"title":"foo"}]'), + (k4, '$.books[?(@[\'price\']>1&&@.price<20&&@["in-stock"])]', + b'[{"price":5,"sold":true,"in-stock":true,"title":"foo"}]'), + (k4, '$.books[?((@.price>1&&@.price<20)&&(@.sold==false))]', + b'[{"price":15,"sold":false,"title":"abc"}]'), + (k4, '$["books"][?((@["price"]>1&&@["price"]<20)&&(@["sold"]==false))]', + b'[{"price":15,"sold":false,"title":"abc"}]'), + (k5, '$.*[?(@ > 7 || @ < 3)]', + b'[8,9,1,2]'), # order test + (k5, '$.*[?(@ < 3 || @ > 7)]', + b'[1,2,8,9]'), # order test + (k5, '$.*[?(@ > 3 && @ < 7)]', + b'[4,5,6]') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path) + + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '$.store.books[?(@.price<10.0)].price', '10.01') + for (key, path, exp) in [ + (k1, '$.store.books[?(@.price<10.0)]', b'[]'), + (k1, '$.store.books[?(@.price<=10.02)].price', + b'[10.01,10.01]'), + (k1, '$.store.books[?(@.price <= 10.02)].price', + b'[10.01,10.01]'), + (k1, '$.store.books[?(@.price==10.01)].title', + b'["Sayings of the Century","Moby Dick"]'), + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path) + + for (key, path, new_val, exp) in [ + (k4, '$.books[?((@.price>1&&@.price<20)&&(@.sold==false))].price', + '13.13', b'[13.13]'), + (k4, '$.books[?((@.price > 1 && @.price < 20) && (@.sold == false))].price', + '13.13', b'[13.13]'), + (k4, '$["books"][?((@["price"]>1&&@["price"]<20)&&(@["sold"]==false))]["price"]', + '13.13', b'[13.13]'), + (k4, '$["books"][?((@["price"] > 1 && @["price"] < 20) && (@["sold"] == false))]["price"]', '13.13', b'[13.13]') + ]: + assert b'OK' == client.execute_command( + 'JSON.SET', key, path, new_val) + assert exp == client.execute_command( + 'JSON.GET', key, path) + + # test delete with filter expression + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', store) + assert b'OK' == client.execute_command( + 'JSON.SET', k2, '.', store) + for (key, path, exp) in [ + (k1, '$.store.books[?(@.["category"] == "fiction")].price', 3), + (k2, '$.store.books[?((@["price"] < 9 || @["price"] > 10) && @["isbn"])]["price"]', 1) + ]: + assert exp == client.execute_command( + 'JSON.DEL', key, path) + assert b'[]' == client.execute_command( + 'JSON.GET', key, path) + + def test_jsonpath_recursive_descent(self): + client = self.server.get_new_client() + + for (key, val) in [ + (k1, '{"a":{"a":1}}'), + (k2, '{"a":{"a":{"a":{"a":1}}}}'), + (k3, '{"x": {}, "y": {"a":"a"}, "z": {"a":"", "b":"b"}}'), + (k4, '{"a":{"b":{"z":{"y":1}}, "c":{"z":{"y":2}}, "z":{"y":3}}}'), + (k5, '{"a":1, "b": {"e":[0,1,2]}, "c":{"e":[10,11,12]}}'), + (k6, '{"a":[1], "b": {"a": [2,3]}, "c": {"a": [4,5,6]}}') + ]: + assert b'OK' == client.execute_command( + 'JSON.SET', key, '.', val) + + for (key, path, exp) in [ + (k1, '$..a', b'[{"a":1},1]'), + (k2, '$..a', + b'[{"a":{"a":{"a":1}}},{"a":{"a":1}},{"a":1},1]'), + (k2, '$..a..a', b'[{"a":{"a":1}},{"a":1},1]'), + (k2, '$..a..a..a', b'[{"a":1},1]'), + (k2, '$..a..a..a..a', b'[1]'), + (k2, '$..a..a..a..a..a', b'[]'), + (k3, '$..a', b'["a",""]'), + (k4, '$.a..z.y', b'[3,1,2]'), + (k4, '$.a..z.*', b'[3,1,2]'), + (k4, '$.a.*..y', b'[1,2,3]'), + (k5, '$..e.[*]', b'[0,1,2,10,11,12]'), + (k5, '$..e[1]', b'[1,11]'), + (k5, '$..e.[1]', b'[1,11]'), + (k5, '$..e.[1]', b'[1,11]'), + (k5, '$..e[0:2]', b'[0,1,10,11]'), + (k5, '$..["e"][1]', b'[1,11]'), + (k5, '$..["e"][1]', b'[1,11]'), + (k5, '$..["e"][0:2]', b'[0,1,10,11]'), + (k5, '$..[ "e" ][ 0 : 2 ]', b'[0,1,10,11]') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path) + + # recursive ARRAPPEND + assert [2, 3, 4] == client.execute_command( + 'JSON.ARRAPPEND', k6, '$..a', 0) + assert b'{"a":[1,0],"b":{"a":[2,3,0]},"c":{"a":[4,5,6,0]}}' == client.execute_command( + 'JSON.GET', k6) + + # recursive delete + assert 2 == client.execute_command( + 'JSON.DEL', k3, '$..a') + assert b'{"x":{},"y":{},"z":{"b":"b"}}' == client.execute_command( + 'JSON.GET', k3) + + # This is the only case that diverges from ReJSON v2. We deleted 4 elements while they deleted 1. + # The divergence comes from the order of deletion. + # Note that "JSON.GET k2 $..a" returns 4 elements. + assert 4 == client.execute_command( + 'JSON.DEL', k2, '$..a') + assert b'{}' == client.execute_command('JSON.GET', k2) + + def test_jsonpath_recursive_insert_update_delete(self): + ''' + Test recursive insert, update and delete. + ''' + client = self.server.get_new_client() + data_store = ''' + { + "store": { + "books": [ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95, + "in-stock": true + }, + { + "category": "fiction", + "author": "Evelyn Waugh", + "title": "Sword of Honour", + "price": 12.99, + "in-stock": true, + "movies": [ + { + "title": "Sword of Honour - movie", + "realisator": { + "first_name": "Bill", + "last_name": "Anderson" + } + } + ] + }, + { + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 9, + "in-stock": false + }, + { + "category": "fiction", + "author": "J. R. R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-115-03266-2", + "price": 22.99, + "in-stock": true + }, + { + "category": "reference", + "author": "William Jr. Strunk", + "title": "The Elements of Style", + "price": 6.99, + "in-stock": false + }, + { + "category": "fiction", + "author": "Leo Tolstoy", + "title": "Anna Karenina", + "price": 22.99, + "in-stock": true + }, + { + "category": "reference", + "author": "Sarah Janssen", + "title": "The World Almanac and Book of Facts 2021", + "isbn": "0-925-23305-2", + "price": 10.69, + "in-stock": false + }, + { + "category": "reference", + "author": "Kate L. Turabian", + "title": "Manual for Writers of Research Papers", + "isbn": "0-675-16695-1", + "price": 8.59, + "in-stock": true + } + ], + "bicycle": { + "color": "red", + "price": 19.64, + "in-stock": true + } + } + } + ''' + data_store2 = ''' + { + "store": { + "title": "foo", + "bicycle": { + "title": "foo2", + "color": "red", + "price": 19.64, + "in-stock": true + } + } + } + ''' + for (key, val) in [ + (store, data_store), + (store2, data_store2) + ]: + assert b'OK' == client.execute_command( + 'JSON.SET', key, '.', val) + + # recursive delete + assert 2 == client.execute_command( + 'JSON.DEL', store2, '$..title') + assert b'[]' == client.execute_command( + 'JSON.GET', store2, '$..title') + + assert b'{"store":{"bicycle":{"color":"red","price":19.64,"in-stock":true}}}' == client.execute_command( + 'JSON.GET', store2) + + # recursive insert, update and delete + assert b'["Sayings of the Century","Sword of Honour","Sword of Honour - movie","Moby Dick","The Lord of the Rings","The Elements of Style","Anna Karenina","The World Almanac and Book of Facts 2021","Manual for Writers of Research Papers"]'\ + == client.execute_command('JSON.GET', store, '$..title') + assert b'OK' == client.execute_command( + 'JSON.SET', store, '$..title', '"foo"') + assert b'["foo","foo","foo","foo","foo","foo","foo","foo","foo"]'\ + == client.execute_command('JSON.GET', store, '$..title') + for (key, path, exp) in [ + (store, '$.title', b'[]'), + (store, '$.store.title', b'[]'), + (store, '$.store.bicycle.title', b'[]'), + (store, '$.store.books[1].title', b'["foo"]'), + (store, '$.store.books[1].movies[0].title', b'["foo"]'), + (store, '$.store.books[1].movies[0].realisator.title', b'[]') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path) + assert 9 == client.execute_command( + 'JSON.DEL', store, '$..title') + assert b'[]' == client.execute_command( + 'JSON.GET', store, '$..title') + assert True == client.execute_command('save') + + def test_jsonpath_recursive_insert_update_delete2(self): + ''' + Test recursive insert, update and delete. + ''' + client = self.server.get_new_client() + data_input = ''' + { + "input": { + "a": 1, + "b": { + "e": [0,1,2] + }, + "c": { + "e": [10,11,12] + } + } + } + ''' + data_input2 = ''' + { + "input": { + "a": 1, + "b": { + "e": [0,1,2] + }, + "c": { + "e": [10,11,12] + } + } + } + ''' + for (key, val) in [ + (input, data_input), + (input2, data_input2) + ]: + assert b'OK' == client.execute_command( + 'JSON.SET', key, '.', val) + + # recursive delete + assert 2 == client.execute_command( + 'JSON.DEL', input2, '$..e') + assert b'[]' == client.execute_command( + 'JSON.GET', input2, '$..e') + assert b'{"input":{"a":1,"b":{},"c":{}}}' == client.execute_command( + 'JSON.GET', input2) + + # recursive insert, update and delete + assert b'[0,1,2,10,11,12]' == client.execute_command( + 'JSON.GET', input, '$..e[*]') + assert b'OK' == client.execute_command( + 'JSON.SET', input, '$..e[*]', '4') + assert b'[4,4,4,4,4,4]' == client.execute_command( + 'JSON.GET', input, '$..e[*]') + assert b'[4,4,4]' == client.execute_command( + 'JSON.GET', input, '$.input.b.e[*]') + for (key, path, exp) in [ + (input, '$.e', b'[]'), + (input, '$.input.e', b'[]'), + (input, '$.input.a.e', b'[]'), + (input, '$.input.b.e[*]', b'[4,4,4]'), + (input, '$.input.c.e[*]', b'[4,4,4]'), + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path) + assert 2 == client.execute_command( + 'JSON.DEL', input, '$..e') + assert 0 == client.execute_command( + 'JSON.DEL', input, '$..e') + assert b'[]' == client.execute_command( + 'JSON.GET', input, '$..e') + assert True == client.execute_command('save') + + def test_jsonpath_compatibility_invalidArrayIndex(self): + client = self.server.get_new_client() + + # Array index is not integer + for (key, path) in [ + (wikipedia, '.phoneNumbers[]'), + (wikipedia, '.phoneNumbers[x]'), + (wikipedia, '$.phoneNumbers[]'), + (wikipedia, '$.phoneNumbers[x]') + ]: + with pytest.raises(ResponseError) as e: + client.execute_command('JSON.GET', key, path) + assert self.error_class.is_syntax_error(str(e.value)) + + def test_jsonpath_compatibility_unquotedMemberName(self): + client = self.server.get_new_client() + + # Unquoted member name can contain any symbol except terminator characters + json = '''{ + "%x22key%x22":1, "+2":2, "-3":3, "/4":4, + ")5":5, "6)":6, "(7":7, "8(":8, + "]9":9, "10]":10, "[11":11, "12[":12, + ">13":13, "14>":14, "<15":15, "16<":16, + "=17":17, "18=":18, "!19":19, "20!":20, + "21.21":21 + }''' + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', json) + + test_cases = [ + (k1, '$.%x22key%x22', b'[1]'), + (k1, '$.+2', b'[2]'), + (k1, '$.-3', b'[3]'), + (k1, '$./4', b'[4]'), + # The following should return empty array because unquoted member name cannot contain terminator characters. + (k1, '$.6)', b'[]'), + (k1, '$.8(', b'[]'), + (k1, '$.10]', b'[]'), + # Bracketed/Quoted member name should work + (k1, '$["6)"]', b'[6]'), + (k1, '$["8("]', b'[8]'), + (k1, '$["10]"]', b'[10]'), + (k1, '$["12["]', b'[12]'), + (k1, '$["14>"]', b'[14]'), + (k1, '$["16<"]', b'[16]'), + (k1, '$["18="]', b'[18]'), + (k1, '$["20!"]', b'[20]'), + (k1, '$["21.21"]', b'[21]'), + (k1, '$.12[', b'[]'), + (k1, '$.14>', b'[]'), + (k1, '$.16<', b'[]'), + (k1, '$.18=', b'[]'), + (k1, '$.20!', b'[]'), + (k1, '$.21.21', b'[]'), + ] + + for (key, path, exp) in test_cases: + assert exp == client.execute_command( + 'JSON.GET', key, path) + + # Unquoted object member cannot start with a character that is a member name terminator. + for (key, path) in [ + (k1, '$.)5'), + (k1, '$.]7'), + (k1, '$.]9'), + (k1, '$.[11'), + (k1, '$.>13'), + (k1, '$.<15'), + (k1, '$.=17'), + (k1, '$.!19') + ]: + with pytest.raises(ResponseError) as e: + client.execute_command('JSON.GET', key, path) + assert self.error_class.is_syntax_error(str(e.value)) + + def test_write_commands_duplicate_values(self): + ''' + Test all WRITE JSON commands for the situation that if the json path results in multiple values + with duplicates, the write operation should apply to all unique values once. + ''' + client = self.server.get_new_client() + + for (key, val) in [ + (k1, '[0,1,2,3,4,5,6,7,8,9]'), + (k2, '[0,1,2,3,4,5,6,7,8,9]'), + (k3, '[0,1,2,3,4,5,6,7,8,9]'), + (k4, '["0","1","2","3","4","5","6","7","8","9"]'), + (k5, '[[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]]'), + (k6, '[true,true,true,true,true]'), + (k7, '[[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]]'), + (k8, '[[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]]'), + (k9, '[[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]]'), + (k10, '[[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4]]'), + (k11, '[0,1,2,3,4,5,6,7,8,9]'), + (k12, '[0,1,2,3,4,5,6,7,8,9]') + ]: + assert b'OK' == client.execute_command( + 'JSON.SET', key, '.', val) + + # NUMINCRBY + assert b'[10]' == client.execute_command( + 'JSON.NUMINCRBY', k1, '$[0,0,0,0,0]', 10) + assert b'[10,1,2,3,4,5,6,7,8,9]' == client.execute_command( + 'JSON.GET', k1) + + # NUMMULTBY + assert b'[18]' == client.execute_command( + 'JSON.NUMMULTBY', k2, '$[9,9,9,9,9]', 2) + assert b'[0,1,2,3,4,5,6,7,8,18]' == client.execute_command( + 'JSON.GET', k2) + + # CLEAR + assert 1 == client.execute_command( + 'JSON.CLEAR', k5, '$[0,0,0,0,0]') + assert b'[[],[1],[2],[3],[4],[5],[6],[7],[8],[9]]' == client.execute_command( + 'JSON.GET', k5) + + # TOGGLE + assert [0] == client.execute_command( + 'JSON.TOGGLE', k6, '$[0,0,0,0]') + assert b'[false,true,true,true,true]' == client.execute_command( + 'JSON.GET', k6) + + # STRAPPEND + assert [4] == client.execute_command( + 'JSON.STRAPPEND', k4, '$[0,0,0,0,0]', '"foo"') + assert b'["0foo","1","2","3","4","5","6","7","8","9"]' == client.execute_command( + 'JSON.GET', k4) + + # ARRAPPEND + assert [3] == client.execute_command( + 'JSON.ARRAPPEND', k7, '$[0,0,0,0,0]', 8, 9) + assert b'[[0,8,9],[1],[2],[3],[4],[5],[6],[7],[8],[9]]' == client.execute_command( + 'JSON.GET', k7) + + # ARRINSERT + assert [2] == client.execute_command( + 'JSON.ARRINSERT', k8, '$[0,0,0,0,0]', 0, 9) + assert b'[[9,0],[1],[2],[3],[4],[5],[6],[7],[8],[9]]' == client.execute_command( + 'JSON.GET', k8) + + # ARRPOP + assert [b"0"] == client.execute_command( + 'JSON.ARRPOP', k9, '$[0,0,0,0,0]') + assert b'[[],[1],[2],[3],[4],[5],[6],[7],[8],[9]]' == client.execute_command( + 'JSON.GET', k9) + + # ARRTRIM + assert [2] == client.execute_command( + 'JSON.ARRTRIM', k10, '$[0,0,0,0,0]', 0, 1) + assert b'[[0,1],[0,1,2,3,4],[0,1,2,3,4]]' == client.execute_command( + 'JSON.GET', k10) + + # DEL + assert 1 == client.execute_command( + 'JSON.DEL', k3, '$[0,0,0,0,0]') + assert b'[1,2,3,4,5,6,7,8,9]' == client.execute_command( + 'JSON.GET', k3) + + # DEL: delete arbitrary elements with duplicates + assert 5 == client.execute_command( + 'JSON.DEL', k12, '$[1,4,7,0,0,3,3]') + assert b'[2,5,6,8,9]' == client.execute_command( + 'JSON.GET', k12) + + # SET + assert b'OK' == client.execute_command( + 'JSON.SET', k11, '$[0,0,0,0,0]', 9) + assert b'[9,1,2,3,4,5,6,7,8,9]' == client.execute_command( + 'JSON.GET', k11) + + def test_jsonpath_compatibility_filterOnObject_and_stringComparison(self): + client = self.server.get_new_client() + + json = ''' + { + "key for key" : "key inside here", + "an object" : { + "weight" : 300, + "a value" : 300, + "my key" : "key inside here" + }, + "another object" : { + "weight" : 400, + "a value" : 400, + "my key" : "key inside there" + }, + "objects": [ + { + "weight" : 100, + "a value" : 100, + "my key" : "key inside here" + }, + { + "weight" : 200, + "a value" : 200, + "my key" : "key inside there" + }, + { + "weight" : 300, + "a value" : 300, + "my key" : "key inside here" + }, + { + "weight" : 400, + "a value" : 400, + "my key" : "key inside there" + } + ] + } + ''' + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '.', json) + + for (key, path, exp) in [ + (k1, '$["an object"].[?(@.weight > 200)].["a value"]', + b'[300]'), + (k1, '$["an object"].[?(@.weight == 300)].["a value"]', + b'[300]'), + (k1, '$["an object"].[?(@.weight > 300)].["a value"]', + b'[]'), + (k1, '$["another object"].[?(@["a value"] > 200)].weight', + b'[400]'), + (k1, '$["another object"].[?(@.["a value"] > 200)].weight', + b'[400]'), + (k1, '$["another object"].[?(@.["my key"] == "key inside there")].weight', b'[400]'), + (k1, '$["objects"].[?(@.weight > 200)].["a value"]', + b'[300,400]'), + (k1, '$["objects"].[?(@.["my key"] == "key inside there")].weight', + b'[200,400]'), + (k1, '$["objects"].[?(@.["my key"] != "key inside there")].weight', + b'[100,300]'), + (k1, '$["objects"].[?(@.["my key"] == "key inside here")].weight', + b'[100,300]'), + (k1, '$["objects"].[?(@.["my key"] != "key inside here")].weight', + b'[200,400]'), + (k1, '$["objects"].[?(@.["my key"] <= "key inside here")].weight', + b'[100,300]'), + (k1, '$["objects"].[?(@.["my key"] >= "key inside here")].weight', + b'[100,200,300,400]'), + (k1, '$["objects"].[?(@.["my key"] < "key inside herf")].weight', + b'[100,300]'), + (k1, '$["objects"].[?(@.["my key"] > "key insidd here")].weight', + b'[100,200,300,400]'), + (k1, '$["key for key"].[?(@.["a value"] > 200)]', + b'[]') + ]: + assert exp == client.execute_command( + 'JSON.GET', key, path) + + def test_jsonpath_compatibility_union_of_object_members(self): + client = self.server.get_new_client() + + test_cases = [ + (wikipedia, '$.["firstName","lastName"]', + b'["John","Smith"]'), + (organism, + '$.animals["2","2"].mammals..weight', b'[]'), + (organism, '$.animals["2","Junk"]', + b'[]'), + (organism, '$.animals["Junk","Junk"]', + b'[]'), + # test unique values in recursive descent + (organism, '$..[?(@.weight>400)].name', + b'["Redwood","Horse"]'), + (organism, '$..[?(@.weight>=60&&@.name=="Chimpanzee")].name', + b'["Chimpanzee"]'), + (wikipedia, '$.[ "firstName", "lastName" ]', + b'["John","Smith"]'), + (wikipedia, '$.address.[ "street", "city", "state", "zipcode" ]', + b'["21 2nd Street","New York","NY","10021-3100"]'), + ] + + assert b'OK' == client.execute_command( + 'JSON.SET', organism, '.', DATA_ORGANISM) + + for (key, path, exp) in test_cases: + assert exp == self.client.execute_command('JSON.GET', key, path) + + def test_jsonpath_malformed_path(self): + client = self.server.get_new_client() + + for (key, val) in [ + (k1, '{"store":{"book":[{"price":5,"sold":true,"in-stock":true,"title":"foo","author":"me","isbn":"978-3-16-148410-0"}]}}'), + (k2, '[1,2,3,4,5,6,7,8,9,10]') + ]: + assert b'OK' == client.execute_command( + 'JSON.SET', key, '.', val) + + test_cases = [ + (k1, '$[0:2]$[0:1]$[0:2]$[0:2]$[0<2065>:2]$[0:2]', b'[]'), + (k1, '$[0,1]', b'[]'), + (k2, '$.[\"a\"].[\"b\"].[\"c\"]', b'[]'), + (k1, '$a.b.c.d', b'[]'), + (k2, '$a$b$c$d', b'[]'), + ] + + for (key, path, exp) in test_cases: + assert exp == client.execute_command( + 'JSON.GET', key, path) + + for (key, path) in [ + (k1, '.[0:2].[0:1].[0:2].[0:2].[0<2065>:2].[0:2]'), + (k1, '.[0:2]$[0:1]$[0:2]$[0:2]$[0<2065>:2]$[0:2]') + ]: + with pytest.raises(ResponseError) as e: + client.execute_command('JSON.GET', key, path) + assert self.error_class.is_wrongtype_error( + str(e.value)) or self.error_class.is_nonexistent_error(str(e.value)) + + for (key, path) in [ + (k1, '&&$.store..price'), + (k1, '!$.store..price'), + (k1, '=$.store..price'), + (k1, '=.store..price'), + (k1, '||.store..price') + ]: + with pytest.raises(ResponseError) as e: + client.execute_command('JSON.GET', key, path) + assert self.error_class.is_syntax_error(str(e.value)) + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', key, path, '0') + assert self.error_class.is_syntax_error(str(e.value)) + + def test_v2_path_limit_recursive_descent(self): + client = self.server.get_new_client() + + depth_limit = 10 + client.execute_command( + 'config set json.max-path-limit ' + str(depth_limit)) + + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '$', '{"a":0}') + + # repeatedly increasing path depth by 1 till the limit is reached. + for _ in range(0, depth_limit-1): + client.execute_command( + 'JSON.SET', k1, '$..a', '{"a":0}') + # verify the path depth reaches the limit + assert depth_limit == client.execute_command( + 'JSON.DEBUG', 'DEPTH', k1) + + # one more increase should exceed the path limit + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', k1, '$..a', '{"a":0}') + assert str(e.value).startswith("LIMIT") + + def test_v2_path_limit_insert_member(self): + client = self.server.get_new_client() + + depth_limit = 3 + client.config_set('json.max-path-limit', depth_limit) + + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '$', '{"a":{"a":{"a":0}}}') + assert depth_limit == client.execute_command( + 'JSON.DEBUG DEPTH', k1) + + # insert a nesting object + with pytest.raises(ResponseError) as e: + client.execute_command( + 'JSON.SET', k1, '$.a.a.b', '{"b":0}') + assert str(e.value).startswith("LIMIT") + + # increase limit by 1 + client.config_set( + 'json.max-path-limit', depth_limit + 1) + assert b'OK' == client.execute_command( + 'JSON.SET', k1, '$.a.a.b', '{"b":0}') + + def test_debug_command_getkeys_api(self): + client = self.server.get_new_client() + + for (subcmd, res) in [ + ('memory k1', [b'k1']), + ('fields k1', [b'k1']), + ('depth k1', [b'k1']), + ('memory k1', [b'k1']) + ]: + assert res == client.execute_command( + 'command getkeys json.debug ' + subcmd) + + for subcmd in [ + '', + 'memory', + 'mem', + 'memo' + 'fields' + 'depth' + 'help', + 'max-depth-key', + 'max-size-key' + ]: + with pytest.raises(ResponseError) as e: + client.execute_command( + 'command getkeys json.debug ' + subcmd) + assert self.error_class.is_wrong_number_of_arguments_error(str(e.value)) or \ + str(e.value).lower().find("invalid command") >= 0 or \ + str(e.value).lower().find( + "the command has no key arguments") >= 0 + + def test_debug_command_depth(self): + client = self.server.get_new_client() + for (key, val, depth) in [ + (k1, '1', 0), + (k2, '"a"', 0), + (k3, 'null', 0), + (k4, 'true', 0), + (k5, '{}', 0), + (k6, '{"a":0}', 1), + (k7, '{"a":{"a":0}}', 2), + (k7, '{"a":{"a":{"a":0}}}', 3), + (k8, '[]', 0), + (k9, '[0]', 1), + (k10, '[[0]]', 2), + (k11, '[[0],[[0]]]', 3), + ]: + assert b'OK' == client.execute_command( + 'JSON.SET', key, '$', val) + assert depth == client.execute_command( + 'JSON.DEBUG DEPTH', key) + + # what if the key does not exist? + assert None == client.execute_command( + 'JSON.DEBUG DEPTH', 'foobar') + + def test_insert_update_delete_mode(self): + client = self.server.get_new_client() + assert b'OK' == client.execute_command( + 'JSON.SET', organism, '.', DATA_ORGANISM) + assert b'OK' == client.execute_command( + 'JSON.SET', organism2, '.', DATA_ORGANISM) + + # delete + key = organism + path = '$.animals[*].mammals[*].primates[*].apes[?(@.weight<400)].weight' + assert b'[130,300]' == client.execute_command( + 'JSON.GET', key, path) + assert 2 == client.execute_command( + 'JSON.DEL', key, path) + assert b'[]' == client.execute_command( + 'JSON.GET', key, path) + + # insert + + assert b'OK' == client.execute_command( + 'JSON.SET', key, path, 25) + assert b'[]' == client.execute_command( + 'JSON.GET', key, path) + + # update + key = organism2 + assert b'[130,300]' == client.execute_command( + 'JSON.GET', key, path) + assert b'OK' == client.execute_command( + 'JSON.SET', key, path, 25) + assert b'[25,25]' == client.execute_command( + 'JSON.GET', key, path) + assert 2 == client.execute_command( + 'JSON.DEL', key, path) + assert b'[]' == client.execute_command( + 'JSON.GET', key, path) + + def test_json_arity_per_command(self): + client = self.server.get_new_client() + + # These commands should only get the single key + cmd_arity = [('SET', -4), ('GET', -2), ('MGET', -3), ('DEL', -2), ('FORGET', -2), ('NUMINCRBY', 4), + ('NUMMULTBY', 4), ('STRLEN', -2), ('STRAPPEND', - + 3), ('TOGGLE', -2), ('OBJLEN', -2), ('OBJKEYS', -2), + ('ARRLEN', -2), ('ARRAPPEND', -4), ('ARRPOP', - + 2), ('ARRINSERT', -5), ('ARRTRIM', 5), ('CLEAR', -2), + ('ARRINDEX', -4), ('TYPE', -2), ('RESP', -2), ('DEBUG', -2)] + + for cmd, arity in cmd_arity: + assert arity == client.execute_command('COMMAND', 'INFO', f'JSON.{cmd}')[ + f'JSON.{cmd}']['arity'] + + cmd_arity = [('MEMORY', -3), ('FIELDS', -3), ('DEPTH', 3), ('HELP', 2), + ('MAX-DEPTH-KEY', 2), ('MAX-SIZE-KEY', + 2), ('KEYTABLE-CHECK', 2), ('KEYTABLE-CORRUPT', 3), + ('KEYTABLE-DISTRIBUTION', 3)] + subcmd_dict = {f'JSON.DEBUG|{cmd}': arity for cmd, arity in cmd_arity} + + output = client.execute_command( + 'COMMAND', 'INFO', f'JSON.DEBUG')[f'JSON.DEBUG']['subcommands'] + assert len(output) == len(subcmd_dict) + + for i in range(len(output)): + assert subcmd_dict[output[i][0].decode('ascii')] == output[i][1] + + def test_hashtable_insert_and_remove(self): + client = self.server.get_new_client() + + def make_path(i): + return '$.' + str(i) + + def make_array(sz, offset): + data = [] + for i in range(sz): + data.append(str(i + offset)) + return data + + def make_array_array(p, q): + data = make_array(p, 0) + for i in range(p): + data[i] = make_array(q, i) + return data + + def make_string(i): + return f"string value {i}" + + # set json.hash-table-min-size + client.execute_command( + 'config set json.hash-table-min-size 5') + + for sz in [10, 50, 100]: + for type in ['array_array', 'array', 'string']: + client.execute_command( + 'json.set', k1, '.', '{}') + + # insert object members + for i in range(sz): + if type == 'array_array': + v = make_array_array(i, i) + elif type == 'array': + v = make_array(i, i) + else: + v = make_string(i) + client.execute_command( + 'json.set', k1, make_path(i), f'{json.dumps(v)}') + + # delete object members + for i in range(sz): + client.execute_command( + 'json.del', k1, make_path(i)) diff --git a/tst/integration/test_rdb.py b/tst/integration/test_rdb.py new file mode 100644 index 0000000..da8b3d8 --- /dev/null +++ b/tst/integration/test_rdb.py @@ -0,0 +1,44 @@ +from utils_json import DEFAULT_MAX_PATH_LIMIT, \ + DEFAULT_STORE_PATH +from valkey.exceptions import ResponseError, NoPermissionError +from valkeytests.conftest import resource_port_tracker +import pytest +import glob +import logging +import os +import random +import struct +import json +from math import isclose, isnan, isinf, frexp +from json_test_case import JsonTestCase + + +class TestRdb(JsonTestCase): + + def setup_data(self): + client = self.server.get_new_client() + client.config_set( + 'json.max-path-limit', DEFAULT_MAX_PATH_LIMIT) + # Need the following line when executing the test against a running Valkey. + # Otherwise, data from previous test cases will interfere current test case. + client.execute_command("FLUSHDB") + + # Load strore sample JSONs. We use strore.json as input to create a document key. Then, use + # strore_compact.json, which does not have indent/space/newline, to verify correctness of serialization. + with open(DEFAULT_STORE_PATH, 'r') as file: + self.data_store = file.read() + assert b'OK' == client.execute_command( + 'JSON.SET', 'store', '.', self.data_store) + + def setup(self): + super(TestRdb, self).setup() + self.setup_data() + + def test_rdb_saverestore(self): + """ + Test RDB saving + """ + client = self.server.get_new_client() + assert True == client.execute_command('save') + client.execute_command('FLUSHDB') + assert b'OK' == client.execute_command('DEBUG', 'RELOAD', 'NOSAVE') diff --git a/tst/integration/utils_json.py b/tst/integration/utils_json.py new file mode 100644 index 0000000..6eab6a0 --- /dev/null +++ b/tst/integration/utils_json.py @@ -0,0 +1,28 @@ +import pytest +import os +import random +import string +from valkey.exceptions import ResponseError + +JSON_MODULE_NAME = 'json' +JSON_INFO_NAMES = { + 'num_documents': JSON_MODULE_NAME + '_num_documents', + 'total_memory_bytes': JSON_MODULE_NAME + '_total_memory_bytes', + 'doc_histogram': JSON_MODULE_NAME + '_doc_histogram', + 'read_histogram': JSON_MODULE_NAME + '_read_histogram', + 'insert_histogram': JSON_MODULE_NAME + '_insert_histogram', + 'update_histogram': JSON_MODULE_NAME + '_update_histogram', + 'delete_histogram': JSON_MODULE_NAME + '_delete_histogram', + 'max_path_depth_ever_seen': JSON_MODULE_NAME + '_max_path_depth_ever_seen', + 'max_document_size_ever_seen': JSON_MODULE_NAME + '_max_document_size_ever_seen', + 'total_malloc_bytes_used': JSON_MODULE_NAME + "_total_malloc_bytes_used", + 'memory_traps_enabled': JSON_MODULE_NAME + "_memory_traps_enabled", +} +DEFAULT_MAX_DOCUMENT_SIZE = 64*1024*1024 +DEFAULT_MAX_PATH_LIMIT = 128 +DEFAULT_WIKIPEDIA_PATH = 'data/wikipedia.json' +DEFAULT_WIKIPEDIA_COMPACT_PATH = 'data/wikipedia_compact.json' +DEFAULT_STORE_PATH = 'data/store.json' +JSON_INFO_METRICS_SECTION = JSON_MODULE_NAME + '_core_metrics' + +JSON_MODULE_NAME = 'json' diff --git a/tst/unit/CMakeLists.txt b/tst/unit/CMakeLists.txt new file mode 100644 index 0000000..9e1e500 --- /dev/null +++ b/tst/unit/CMakeLists.txt @@ -0,0 +1,89 @@ +######################################### +# Define unit tests +######################################### +message("tst/unit/CMakeLists.txt: Define unit tests") + +# This is the set of sources for the basic test +file(GLOB_RECURSE UNIT_TEST_SRC "*.cc") + +######################################### +# Tell Cmake how to run the unit tests +######################################### + +# A brief philosophical thought, about unit tests: if possible, it's preferable to have all unit +# tests in a single (or a low number of) binary executable. This is disk-space efficient for the +# test suite, avoids unnecessary linking steps, and provides a nice, simple way to interface with +# the test suite (should you need to do so manually, or, for instance, with a debugger). Large +# numbers of test binaries are certainly possible, and in some rare cases are even necessary, but +# don't provide many advantages over a single binary in the average case. +# This file defines a single test executable, "unitTests", which uses the Googletest framework. + +# This defines each unit test and associates it with its sources +add_executable(unitTests ${UNIT_TEST_SRC}) + +# Build with C11 & C++17 +set_target_properties( + unitTests + PROPERTIES + C_STANDARD 11 + C_STANDARD_REQUIRED ON + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON +) + +target_include_directories(unitTests + PRIVATE + ${PROJECT_SOURCE_DIR}/src + ${rapidjson_SOURCE_DIR}/include + ) + +# Add dependency to the code under test. +target_link_libraries(unitTests ${JSON_MODULE_LIB}) + +# Link GoogleTest libraries after fetch +target_link_libraries(unitTests + GTest::gtest_main # Link the main GoogleTest library + GTest::gmock_main # Link GoogleMock +) + +# This tells CTest about this unit test executable +# The TEST_PREFIX prepends "unit_" to the name of these tests in the output, +# which makes them easier to identify at a glance +# The TEST_LIST settings creates a CMake list of all of the tests in the +# binary. This is useful for, for instance, the set_tests_properties statement +# below +# For more information, see: https://cmake.org/cmake/help/v3.12/module/GoogleTest.html +# To get this to work properly in a cross-compile environment, you need to set up +# CROSSCOMPILING_EMULATOR (see https://cmake.org/cmake/help/v3.12/prop_tgt/CROSSCOMPILING_EMULATOR.html) +# DISCOVERY_TIMEOUT - number of seconds given for Gtest to discover the tests to run, it should +# be big enough so the tests can start on MacOS and can be any number, 59 is just prime number +# close to 1 minute ;) +gtest_discover_tests(unitTests + TEST_PREFIX unit_ + TEST_LIST unit_gtests + DISCOVERY_TIMEOUT 59 + ) + +# This tells the CTest harness about how it should treat these tests. For +# instance, you can uncomment the RUN_SERIAL line to force the tests to run +# sequentially (e.g. if the tests are not thread-safe... in most cases, tests +# SHOULD be thread-safe). We also set a high-level timeout: if the test takes +# longer than the specified time, it is killed by the harness and reported as a +# failure. And finally, we provide a "label" that is used by CTest when +# reporting result statistics (e.g. "UnitTests: 72 successes, 3 failures"). +# For more properties that can be set, see: +# https://cmake.org/cmake/help/v3.9/manual/cmake-properties.7.html#test-properties +set_tests_properties(${unit_gtests} PROPERTIES + # RUN_SERIAL 1 + TIMEOUT 10 # seconds + LABELS UnitTests + ) + + +add_custom_target(unit + COMMAND ${CMAKE_BINARY_DIR}/tst/unit/unitTests + DEPENDS unitTests + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Running unit tests..." +) \ No newline at end of file diff --git a/tst/unit/CPPLINT.cfg b/tst/unit/CPPLINT.cfg new file mode 100644 index 0000000..460f8cb --- /dev/null +++ b/tst/unit/CPPLINT.cfg @@ -0,0 +1,6 @@ +filter=-build/include_subdir +# STL allocator needs implicit single arg constructor which CPPLINT doesn't like by default +filter=-runtime/explicit +filter=-runtime/string +filter=-runtime/int +linelength=120 diff --git a/tst/unit/dom_test.cc b/tst/unit/dom_test.cc new file mode 100644 index 0000000..ed70b58 --- /dev/null +++ b/tst/unit/dom_test.cc @@ -0,0 +1,2304 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "json/dom.h" +#include "json/alloc.h" +#include "json/stats.h" +#include "json/selector.h" +#include "module_sim.h" + +jsn::string& getReplyString() { + static jsn::string replyString; + return replyString; +} + +static void appendReplyString(const jsn::string& s) { + jsn::string& rs = getReplyString(); + rs = rs + s; +} + +int cs_replyWithBuffer(ValkeyModuleCtx *, const char *buffer, size_t length) { + appendReplyString(jsn::string(buffer, length)); + return 0; +} + +const char *GetString(ReplyBuffer *b) { + b->Reply(); // Send the string :) + return getReplyString().c_str(); +} + +void Clear(rapidjson::StringBuffer *b) { + b->Clear(); +} + +void Clear(ReplyBuffer *b) { + getReplyString().clear(); + b->Clear(); +} + +size_t cobsize(ValkeyModuleCtx *) { + return 0; +} + +extern size_t hash_function(const char *, size_t); + +/* Since unit tests run outside of Valkey server, we need to map Valkey' + * memory management functions to cstdlib functions. */ +void SetupAllocFuncs(size_t numShards) { + setupValkeyModulePointers(); + // + // Now setup the KeyTable, the RapidJson library now depends on it + // + KeyTable::Config c; + c.malloc = dom_alloc; + c.free = dom_free; + c.hash = hash_function; + c.numShards = numShards; + keyTable = new KeyTable(c); + ValkeyModule_ReplyWithStringBuffer = cs_replyWithBuffer; + getReplyString().clear(); +} + +class DomTest : public ::testing::Test { + protected: + const char *json1 = "{" + "\"firstName\":\"John\"," + "\"lastName\":\"Smith\"," + "\"age\":27," + "\"weight\":135.17," + "\"isAlive\":true," + "\"address\":{" + "\"street\":\"21 2nd Street\"," + "\"city\":\"New York\"," + "\"state\":\"NY\"," + "\"zipcode\":\"10021-3100\"" + "}," + "\"phoneNumbers\":[" + "{" + "\"type\":\"home\"," + "\"number\":\"212 555-1234\"" + "}," + "{" + "\"type\":\"office\"," + "\"number\":\"646 555-4567\"" + "}" + "]," + "\"children\":[]," + "\"spouse\":null," + "\"groups\":{}" + "}"; + JDocument *doc1; + const char* json2 = "{" + "\"firstName\":\"John\"," + "\"lastName\":\"Smith\"," + "\"age\":27," + "\"weight\":135.17," + "\"isAlive\":true," + "\"spouse\":null," + "\"children\":[]," + "\"groups\":{}" + "}"; + JDocument *doc2; + const char *json3 = "{" + "\"a\":{}," + "\"b\":{\"a\":\"a\"}," + "\"c\":{\"a\":\"a\", \"b\":1}," + "\"d\":{\"a\":\"a\", \"b\":\"b\"}," + "\"e\":{\"a\":1, \"b\":\"b\", \"c\":3}" + "}"; + JDocument *doc3; + const char *json4 = "{\"a\":[], \"b\":[1], \"c\":[1,2], \"d\":[1,2,3], \"e\":[1,2,3,4,5]}"; + JDocument *doc4; + const char *json5 = "{\"a\":{\"b\":{\"c\":{\"d\":{\"e\":{\"f\":{\"g\":{\"h:\":1}}}}}}}}"; + JDocument *doc5; + const char *json6 = "{" + "\"a\":[" + "[[1,2],[3,4],[5,6]]," + "[[7,8],[9,10],[11,12]]" + "]" + "}"; + JDocument *doc6; + + void SetUp() override { + JsonUtilCode rc = jsonstats_init(); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + SetupAllocFuncs(16); + + rc = dom_parse(nullptr, json1, strlen(json1), &doc1); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + rc = dom_parse(nullptr, json2, strlen(json2), &doc2); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + rc = dom_parse(nullptr, json3, strlen(json3), &doc3); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + rc = dom_parse(nullptr, json4, strlen(json4), &doc4); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + rc = dom_parse(nullptr, json5, strlen(json5), &doc5); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + rc = dom_parse(nullptr, json6, strlen(json6), &doc6); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + } + + void TearDown() override { + dom_free_doc(doc1); + dom_free_doc(doc2); + dom_free_doc(doc3); + dom_free_doc(doc4); + dom_free_doc(doc5); + dom_free_doc(doc6); + delete keyTable; + keyTable = nullptr; + } +}; + +TEST_F(DomTest, testParseObject) { + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, json1, strlen(json1), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + dom_free_doc(doc); +} + +TEST_F(DomTest, testParseArray) { + const char *input = "[1,2,3]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rapidjson::StringBuffer oss; + dom_serialize(doc, nullptr, oss); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(oss.GetString(), input); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testParseString) { + const char *input = "\"abc\""; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rapidjson::StringBuffer oss; + dom_serialize(doc, nullptr, oss); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(oss.GetString(), input); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testParseNumber) { + const char *input = "123"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rapidjson::StringBuffer oss; + dom_serialize(doc, nullptr, oss); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(oss.GetString(), input); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testParseBool) { + const char *input = "false"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rapidjson::StringBuffer oss; + dom_serialize(doc, nullptr, oss); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(oss.GetString(), input); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testParseNull) { + const char *input = "null"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rapidjson::StringBuffer oss; + dom_serialize(doc, nullptr, oss); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(oss.GetString(), input); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testParseInvalidJSON) { + const char *input = "{\"a\"}"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_JSON_PARSE_ERROR); + EXPECT_TRUE(doc == nullptr); +} + +TEST_F(DomTest, testParseDuplicates) { + const char *input = "{\"a\":1, \"b\":2, \"a\":3}"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rapidjson::StringBuffer oss; + dom_serialize(doc, nullptr, oss); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + const char *result = "{\"a\":3,\"b\":2}"; + EXPECT_STREQ(oss.GetString(), result); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testSerialize_DefaultFormat) { + rapidjson::StringBuffer oss; + dom_serialize(doc1, nullptr, oss); + EXPECT_STREQ(oss.GetString(), json1); +} + +TEST_F(DomTest, testSerialize_CustomFormatArray) { + PrintFormat format; + format.newline = "\n"; + format.indent = "\t"; + format.space = "."; + jsn::vector> tests{ + {"[]", "[]"}, + {"[0]", "[\n\t0\n]"}, + {"[0,1]", "[\n\t0,\n\t1\n]"}, + {"[[]]", "[\n\t[]\n]"}, + {"[[0]]", "[\n\t[\n\t\t0\n\t]\n]"}, + {"[[0,1]]", "[\n\t[\n\t\t0,\n\t\t1\n\t]\n]"}, + {"{}", "{}"}, + {"{\"a\":0}", "{\n\t\"a\":.0\n}"}, + {"{\"a\":0,\"b\":1}", "{\n\t\"a\":.0,\n\t\"b\":.1\n}"}, + {"{\"a\":{\"b\":1}}", "{\n\t\"a\":.{\n\t\t\"b\":.1\n\t}\n}"} + }; + for (auto p : tests) { + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, p.first.c_str(), p.first.length(), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rapidjson::StringBuffer oss; + dom_serialize(doc, &format, oss); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(oss.GetString(), p.second); + dom_free_doc(doc); + } +} + +TEST_F(DomTest, testSerialize_CustomFormat) { + PrintFormat format; + format.indent = "\t"; + format.newline = "\n"; + format.space = " "; + const char* exp_json = "{\n\t\"firstName\": \"John\",\n\t\"lastName\": \"Smith\",\n\t\"age\": 27," + "\n\t\"weight\": 135.17,\n\t\"isAlive\": true,\n\t\"spouse\": null," + "\n\t\"children\": [],\n\t\"groups\": {}\n}"; + rapidjson::StringBuffer oss; + dom_serialize(doc2, &format, oss); + EXPECT_STREQ(oss.GetString(), exp_json); + + format.indent = "**"; + format.newline = "\n"; + format.space = "--"; + exp_json = "{\n**\"firstName\":--\"John\",\n**\"lastName\":--\"Smith\",\n**\"age\":--27," + "\n**\"weight\":--135.17,\n**\"isAlive\":--true,\n**\"spouse\":--null," + "\n**\"children\":--[],\n**\"groups\":--{}\n}"; + Clear(&oss); + dom_serialize(doc2, &format, oss); + EXPECT_STREQ(oss.GetString(), exp_json); +} + +TEST_F(DomTest, testSetString) { + const char *new_val = "\"Boston\""; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".address.city", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".address.city", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), new_val); +} + +TEST_F(DomTest, testSetNumber) { + const char *new_val = "37"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".age", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".age", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), new_val); +} + +TEST_F(DomTest, testSetNull) { + const char *new_val = "null"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".address.street", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".address.street", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), new_val); +} + +TEST_F(DomTest, testSet_NX_XX_ErrorConditions) { + // Test NX error condition + const char *new_val = "123"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".firstName", new_val, true, false); + EXPECT_EQ(rc, JSONUTIL_NX_XX_CONDITION_NOT_SATISFIED); + + // Test XX error condition + rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, true); + EXPECT_EQ(rc, JSONUTIL_NX_XX_CONDITION_NOT_SATISFIED); + + // NX and XX must be mutually exclusive + rc = dom_set_value(nullptr, doc1, ".firstName", new_val, true, true); + EXPECT_EQ(rc, JSONUTIL_NX_XX_SHOULD_BE_MUTUALLY_EXCLUSIVE); +} + +TEST_F(DomTest, testGet_ErrorConditions) { + ReplyBuffer oss; + JsonUtilCode rc = dom_get_value_as_str(doc1, ".bar", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_JSON_PATH_NOT_EXIST); + EXPECT_EQ(strlen(GetString(&oss)), 0); +} + +TEST_F(DomTest, testUnicode) { + const char *new_val = "\"hyvää-élève\""; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".firstName", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".firstName", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), new_val); +} + +TEST_F(DomTest, testGetString) { + ReplyBuffer oss; + JsonUtilCode rc = dom_get_value_as_str(doc1, ".address.city", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "\"New York\""); + + Clear(&oss); + rc = dom_get_value_as_str(doc1, ".phoneNumbers[1].number", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "\"646 555-4567\""); +} + +TEST_F(DomTest, testGetNumber) { + ReplyBuffer oss; + JsonUtilCode rc = dom_get_value_as_str(doc1, ".age", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "27"); +} + +TEST_F(DomTest, testGetBool) { + ReplyBuffer oss; + JsonUtilCode rc = dom_get_value_as_str(doc1, ".isAlive", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "true"); +} + +TEST_F(DomTest, testGetNull) { + ReplyBuffer oss; + JsonUtilCode rc = dom_get_value_as_str(doc1, ".spouse", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "null"); +} + +TEST_F(DomTest, testGetObject) { + ReplyBuffer oss; + JsonUtilCode rc = dom_get_value_as_str(doc1, ".address", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "{\"street\":\"21 2nd Street\",\"city\":\"New York\",\"state\":\"NY\"," + "\"zipcode\":\"10021-3100\"}"); +} + +TEST_F(DomTest, testGetArray) { + ReplyBuffer oss; + JsonUtilCode rc = dom_get_value_as_str(doc1, ".phoneNumbers", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[{\"type\":\"home\",\"number\":\"212 555-1234\"},{\"type\":\"office\"," + "\"number\":\"646 555-4567\"}]"); + + Clear(&oss); + rc = dom_get_value_as_str(doc1, ".children", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[]"); +} + +TEST_F(DomTest, testGet_multiPaths) { + const char *paths[] = { ".firstName", ".lastName" }; + ReplyBuffer oss; + JsonUtilCode rc = dom_get_values_as_str(doc1, paths, 2, nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "{\".firstName\":\"John\",\".lastName\":\"Smith\"}"); + + // Test pretty print + PrintFormat format; + format.indent = "\t"; + format.newline = "\n"; + format.space = " "; + const char* exp_json = "{\n\t\".firstName\": \"John\",\n\t\".lastName\": \"Smith\"\n}"; + Clear(&oss); + rc = dom_get_values_as_str(doc1, paths, 2, &format, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), exp_json); +} + +TEST_F(DomTest, testDelete) { + size_t num_vals_deleted; + JsonUtilCode rc = dom_delete_value(doc1, ".spouse", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 1); + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".spouse", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_JSON_PATH_NOT_EXIST); + EXPECT_EQ(oss.GetLength(), 0); + + rc = dom_delete_value(doc1, ".phoneNumbers", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 1); + Clear(&oss); + rc = dom_get_value_as_str(doc1, ".phoneNumbers", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_JSON_PATH_NOT_EXIST); + EXPECT_EQ(oss.GetLength(), 0); +} + +TEST_F(DomTest, testDelete_v2path) { + const char *input = "{\"x\": {}, \"y\": {\"a\":\"a\"}, \"z\": {\"a\":\"\", \"b\":\"b\"}}"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + size_t num_vals_deleted; + rc = dom_delete_value(doc, "$.z.*", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 2); + const char *exp = "{\"x\":{},\"y\":{\"a\":\"a\"},\"z\":{}}"; + ReplyBuffer oss; + rc = dom_get_value_as_str(doc, ".", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), exp); + + rc = dom_delete_value(doc, "$.*", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 3); + exp = "{}"; + Clear(&oss); + rc = dom_get_value_as_str(doc, ".", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), exp); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testDelete_v2path_array) { + const char *input = "[0,1,2,3,4,5,6,7,8,9]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + size_t num_vals_deleted; + rc = dom_delete_value(doc, "$[6:10]", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 4); + const char *exp = "[0,1,2,3,4,5]"; + ReplyBuffer oss; + rc = dom_get_value_as_str(doc, ".", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), exp); + + rc = dom_delete_value(doc, "$[*]", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 6); + exp = "[]"; + Clear(&oss); + rc = dom_get_value_as_str(doc, ".", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), exp); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testNumIncrBy_int) { + jsn::vector res; + bool isV2Path; + JParser parser; + JsonUtilCode rc = dom_increment_by(doc1, ".age", &parser.Parse("1", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 28); + EXPECT_FALSE(isV2Path); + + rc = dom_increment_by(doc1, ".age", &parser.Parse("-5", 2).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 23); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumIncrBy_float1) { + jsn::vector res; + bool isV2Path; + JParser parser; + JsonUtilCode rc = dom_increment_by(doc1, ".age", &parser.Parse("0.5", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 27.5); + EXPECT_FALSE(isV2Path); + + rc = dom_increment_by(doc1, ".age", &parser.Parse("0.5", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 28); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumIncrBy_float2) { + const char *new_val = "1"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, ".foo", &parser.Parse("0.5", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 1.5); + EXPECT_FALSE(isV2Path); + + rc = dom_increment_by(doc1, ".foo", &parser.Parse("0.5", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 2); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumIncrBy_int64_overflow) { + const char *new_val = "9223372036854775807"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, ".foo", &parser.Parse("0", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(isV2Path); + + // The result exceeds max_int64, and is converted to a double number. + rc = dom_increment_by(doc1, ".foo", &parser.Parse("1", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 9223372036854775808.0); + EXPECT_FALSE(isV2Path); + rc = dom_increment_by(doc1, ".foo", &parser.Parse("12", 2).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 9223372036854775820.0); + EXPECT_FALSE(isV2Path); + rc = dom_increment_by(doc1, ".foo", &parser.Parse("12", 2).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 9223372036854775832.0); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumIncrBy_int64_overflow_negative) { + const char *new_val = "-9223372036854775808"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, ".foo", &parser.Parse("0", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], INT64_MIN); + EXPECT_FALSE(isV2Path); + + // The result exceeds min_int64, but is converted to a double number. + rc = dom_increment_by(doc1, ".foo", &parser.Parse("-1", 2).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], -9223372036854775809.0); + EXPECT_FALSE(isV2Path); + rc = dom_increment_by(doc1, ".foo", &parser.Parse("-11", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], -9223372036854775820.0); + EXPECT_FALSE(isV2Path); + rc = dom_increment_by(doc1, ".foo", &parser.Parse("-11", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], -9223372036854775831.0); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumIncrBy_double_overflow) { + const char *new_val = "1.7e308"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, ".foo", &parser.Parse("0", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_EQ(res[0], 1.7e308); + EXPECT_FALSE(isV2Path); + rc = dom_increment_by(doc1, ".foo", &parser.Parse("1.0", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_EQ(res[0], 1.7e308); + EXPECT_FALSE(isV2Path); + + // should overflow + rc = dom_increment_by(doc1, ".foo", &parser.Parse("1.7e308", 7).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + EXPECT_FALSE(isV2Path); + rc = dom_increment_by(doc1, ".foo", &parser.Parse("0.85e308", 8).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumIncrBy_double_overflow_negative) { + const char *new_val = "-1.7e308"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, ".foo", &parser.Parse("0", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], -1.7e308); + EXPECT_FALSE(isV2Path); + rc = dom_increment_by(doc1, ".foo", &parser.Parse("-1.0", 4).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], -1.7e308); + EXPECT_FALSE(isV2Path); + + // should overflow + rc = dom_increment_by(doc1, ".foo", &parser.Parse("-1.7e308", 8).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumIncrBy_string_value) { + Selector selector; + const char *new_val = "-1.5e308"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + rc = selector.getValues(*doc1, ".foo"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_STREQ(selector.getResultSet()[0].first->GetDoubleString(), "-1.5e308"); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, ".foo", &parser.Parse("0", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], -1.5e308); + + rc = selector.getValues(*doc1, ".foo"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + // +0 is not a true no-op, because it will re-calculate and reformat a double + EXPECT_STREQ(selector.getResultSet()[0].first->GetDoubleString(), "-1.5e+308"); +} + +TEST_F(DomTest, testNumIncrMultBy_string_value_overflow) { + Selector selector; + const char *new_val = "-1.7e308"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + rc = selector.getValues(*doc1, ".foo"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_STREQ(selector.getResultSet()[0].first->GetDoubleString(), "-1.7e308"); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, ".foo", &parser.Parse("-1", 2).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], -1.7e308); + + rc = selector.getValues(*doc1, ".foo"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_STREQ(selector.getResultSet()[0].first->GetDoubleString(), "-1.6999999999999999e+308"); + // + // should not overflow + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("0.5", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], -8.5e307); + + rc = selector.getValues(*doc1, ".foo"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_STREQ(selector.getResultSet()[0].first->GetDoubleString(), "-8.4999999999999997e+307"); + + // should overflow + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("1.0e300", 7).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_FALSE(isV2Path); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("1.7e308", 7).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testToggle) { + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foobool", "true", false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_toggle(doc1, ".foobool", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); + + rc = dom_toggle(doc1, ".foobool", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1); + + // test a non-array + rc = dom_set_value(nullptr, doc1, ".foostr", "\"ok\"", false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = dom_toggle(doc1, ".foostr", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_JSON_ELEMENT_NOT_BOOL); +} + +TEST_F(DomTest, testToggle_v2path) { + const char *input1 = "[true, false, 1, null, \"foo\", [], {}]"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input1, strlen(input1), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_toggle(d1, "$[*]", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 7); + EXPECT_EQ(vec[0], 0); + EXPECT_EQ(vec[1], 1); + EXPECT_EQ(vec[2], -1); + EXPECT_EQ(vec[3], -1); + EXPECT_EQ(vec[4], -1); + EXPECT_EQ(vec[5], -1); + EXPECT_EQ(vec[6], -1); + + ReplyBuffer oss; + rc = dom_get_value_as_str(d1, ".", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[false,true,1,null,\"foo\",[],{}]"); + + dom_free_doc(d1); +} + +TEST_F(DomTest, testNumMutiBy_int64) { + jsn::vector res; + bool isV2Path; + JParser parser; + JsonUtilCode rc = dom_multiply_by(doc1, ".age", &parser.Parse("10", 2).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_EQ(res[0], 270); + EXPECT_FALSE(isV2Path); + + rc = dom_multiply_by(doc1, ".age", &parser.Parse("10", 2).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_EQ(res[0], 2700); + EXPECT_FALSE(isV2Path); + + rc = dom_multiply_by(doc1, ".age", &parser.Parse("0.01", 4).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_EQ(res[0], 27); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumMutiBy_double) { + const char *new_val = "1"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("0.5", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 0.5); + EXPECT_FALSE(isV2Path); + + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("0.5", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 0.25); + EXPECT_FALSE(isV2Path); + + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("4", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], 1); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumMutiBy_int64_overflow) { + const char *new_val = "9223372036854775800"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("1", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res[0], INT64_MAX); + EXPECT_FALSE(isV2Path); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("2", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(isV2Path); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("INT64_MAX", 9).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(isV2Path); + + // should overflow + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("1.0e300", 7).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_FALSE(isV2Path); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("1.7e308", 7).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumMutiBy_int64_overflow_negative) { + const char *new_val = "-9223372036854775808"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("1", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_EQ(res[0], INT64_MIN); + EXPECT_FALSE(isV2Path); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("2", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_FALSE(isV2Path); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("INT64_MAX", 9).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_FALSE(isV2Path); + + // should overflow + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("-0.85e308", 9).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_TRUE(res.empty()); + EXPECT_FALSE(isV2Path); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("-1.7e308", 8).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_TRUE(res.empty()); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumMutiBy_double_overflow) { + const char *new_val = "1.7e308"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("1.0", 3).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_EQ(res[0], 1.7e308); + EXPECT_FALSE(isV2Path); + + // should overflow + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("0.85e308", 8).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_TRUE(res.empty()); + EXPECT_FALSE(isV2Path); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("1.7e308", 7).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_TRUE(res.empty()); + EXPECT_FALSE(isV2Path); +} + +TEST_F(DomTest, testNumMutiBy_double_overflow_negative) { + const char *new_val = "1.7e308"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".foo", new_val, false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should not overflow + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("-1.0", 4).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 1); + EXPECT_EQ(res[0], -1.7e308); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("-1.01", 5).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // should overflow + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("-0.85e308", 9).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_TRUE(res.empty()); + rc = dom_multiply_by(doc1, ".foo", &parser.Parse("-1.7e308", 8).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + EXPECT_TRUE(res.empty()); +} + +TEST_F(DomTest, testStrLen) { + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_string_length(doc1, ".address.state", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 2); + + rc = dom_string_length(doc1, ".firstName", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 4); +} + +TEST_F(DomTest, testStrLen_v2path) { + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_string_length(doc1, "$.address.state", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 2); + + rc = dom_string_length(doc1, "$.firstName", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 4); +} + +TEST_F(DomTest, testStrLen_v2path_wildcard) { + const char *input = "{\"x\": {\"a\":\"\", \"b\":\"b\", \"c\":\"cc\", \"d\":\"ddd\"}}"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_string_length(doc, "$.x.*", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 4); + for (size_t i=0; i < vec.size(); i++) { + EXPECT_EQ(vec[i], i); + } + + dom_free_doc(doc); +} + +TEST_F(DomTest, testStrAppend) { + const char *s = "\"son\""; + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_string_append(doc1, ".firstName", s, strlen(s), vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 7); + + s = "\" Senior\""; + rc = dom_string_append(doc1, ".firstName", s, strlen(s), vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 14); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".firstName", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "\"Johnson Senior\""); +} + +TEST_F(DomTest, testStrAppend_v2path) { + const char *s = "\"son\""; + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_string_append(doc1, "$.firstName", s, strlen(s), vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 7); + + s = "\" Senior\""; + rc = dom_string_append(doc1, "$.firstName", s, strlen(s), vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 14); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".firstName", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "\"Johnson Senior\""); +} + +TEST_F(DomTest, testStrAppend_v2path_wildcard) { + const char *input = "{\"x\": {\"a\":\"\", \"b\":\"b\", \"c\":\"cc\", \"d\":\"ddd\"}}"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + const char *s = "\"z\""; + rc = dom_string_append(doc, "$.x.*", s, strlen(s), vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 4); + for (size_t i=0; i < vec.size(); i++) { + EXPECT_EQ(vec[i], i+1); + } + + dom_free_doc(doc); +} + +TEST_F(DomTest, testObjLen) { + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_object_length(doc1, ".address", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 4); +} + +TEST_F(DomTest, testObjLen_v2path_wildcard) { + const char *input = "{\"x\": {}, \"y\": {\"a\":\"a\"}, \"z\": {\"a\":\"\", \"b\":\"b\"}}"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_object_length(doc, "$.*", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 3); + for (size_t i=0; i < vec.size(); i++) { + EXPECT_EQ(vec[i], i); + } + + dom_free_doc(doc); +} + +TEST_F(DomTest, testObjKeys) { + jsn::vector> vec; + bool is_v2_path; + JsonUtilCode rc = dom_object_keys(doc1, ".address", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0].size(), 4); + EXPECT_STREQ(vec[0][0].c_str(), "street"); + EXPECT_STREQ(vec[0][1].c_str(), "city"); + EXPECT_STREQ(vec[0][2].c_str(), "state"); + EXPECT_STREQ(vec[0][3].c_str(), "zipcode"); +} + +TEST_F(DomTest, testObjKeys_v2path_wildcard) { + const char *input = "{\"x\": {}, \"y\": {\"a\":\"a\"}, \"z\": {\"a\":\"\", \"b\":\"b\"}}"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector> vec; + bool is_v2_path; + rc = dom_object_keys(doc, "$.*", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 3); + EXPECT_TRUE(vec[0].empty()); + EXPECT_EQ(vec[1].size(), 1); + EXPECT_EQ(vec[2].size(), 2); + EXPECT_STREQ(vec[1][0].c_str(), "a"); + EXPECT_STREQ(vec[2][0].c_str(), "a"); + EXPECT_STREQ(vec[2][1].c_str(), "b"); + + rc = dom_object_keys(doc, ".x", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_TRUE(vec[0].empty()); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testArrLen) { + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_array_length(doc1, ".children", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); + + rc = dom_array_length(doc1, ".phoneNumbers", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 2); +} + +TEST_F(DomTest, testArrLen_v2path) { + const char *input = "[ [\"Marry\", \"Bob\", \"Tom\"], [\"Peter\", \"Marry\", \"Carol\"]," + "[\"Peter\", \"Jane\"], [] ]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_array_length(doc, "$[*]", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 4); + EXPECT_EQ(vec[0], 3); + EXPECT_EQ(vec[1], 3); + EXPECT_EQ(vec[2], 2); + EXPECT_EQ(vec[3], 0); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testArrAppend) { + const char *jsons[] = { "\"John\"" }; + size_t json_lens[] = { 6 }; + jsn::vector vec; + bool is_v2_path; + + JsonUtilCode rc = dom_array_append(nullptr, doc1, ".children", jsons, json_lens, 1, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1); + + jsons[0] = "\"Mary\""; + json_lens[0] = 6; + rc = dom_array_append(nullptr, doc1, ".children", jsons, json_lens, 1, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 2); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".children", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[\"John\",\"Mary\"]"); +} + +TEST_F(DomTest, testArrAppend_multiValues) { + const char *jsons[] = { "\"John\"", "\"Mary\"", "\"Tom\"" }; + size_t json_lens[] = { 6, 6, 5 }; + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_array_append(nullptr, doc1, ".children", jsons, json_lens, 3, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 3); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".children", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[\"John\",\"Mary\",\"Tom\"]"); +} + +TEST_F(DomTest, testArrAppend_v2path) { + const char *input = "[ [\"Marry\", \"Bob\"], [\"Peter\"], [] ]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + const char *jsons[] = { "\"John\"", "\"Tom\"" }; + size_t json_lens[] = { 6, 5 }; + rc = dom_array_append(nullptr, doc, "$[*]", jsons, json_lens, 2, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 3); + EXPECT_EQ(vec[0], 4); + EXPECT_EQ(vec[1], 3); + EXPECT_EQ(vec[2], 2); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc, "$[*]", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[[\"Marry\",\"Bob\",\"John\",\"Tom\"],[\"Peter\",\"John\",\"Tom\"]," + "[\"John\",\"Tom\"]]"); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testArrPop) { + const char *jsons[] = { "\"John\"", "\"Mary\"", "\"Tom\"" }; + size_t json_lens[] = { 6, 6, 5 }; + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_array_append(nullptr, doc1, ".children", jsons, json_lens, 3, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 3); + + jsn::vector vec_oss; + rc = dom_array_pop(doc1, ".children", 1, vec_oss, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec_oss.size(), 1); + EXPECT_STREQ(vec_oss[0].GetString(), "\"Mary\""); + EXPECT_EQ(vec_oss[0].GetLength(), 6); + + rc = dom_array_pop(doc1, ".children", -1, vec_oss, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec_oss.size(), 1); + EXPECT_STREQ(vec_oss[0].GetString(), "\"Tom\""); + EXPECT_EQ(vec_oss[0].GetLength(), 5); + + rc = dom_array_pop(doc1, ".children", 0, vec_oss, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec_oss.size(), 1); + EXPECT_STREQ(vec_oss[0].GetString(), "\"John\""); + EXPECT_EQ(vec_oss[0].GetLength(), 6); +} + +TEST_F(DomTest, testArrPop_v2path) { + const char *input = "[ [\"Marry\", \"Bob\", \"Tom\"], [\"Peter\", \"Carol\"], [\"Jane\"], [] ]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_array_pop(doc, "$[*]", 0, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 4); + EXPECT_STREQ(vec[0].GetString(), "\"Marry\""); + EXPECT_STREQ(vec[1].GetString(), "\"Peter\""); + EXPECT_STREQ(vec[2].GetString(), "\"Jane\""); + EXPECT_EQ(vec[3].GetLength(), 0); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc, "$[*]", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[[\"Bob\",\"Tom\"],[\"Carol\"],[],[]]"); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testArrInsert) { + const char *vals[1] = {"\"john\""}; + size_t val_lens[1] = { 6 }; + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_array_insert(nullptr, doc1, ".children", 0, vals, val_lens, 1, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1u); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".children", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[\"john\"]"); +} + +TEST_F(DomTest, testArrInsert_v2path) { + const char *input = "[ [\"Marry\", \"Bob\"], [\"Peter\"], [] ]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + const char *jsons[] = { "\"John\"", "\"Tom\"" }; + size_t json_lens[] = { 6, 5 }; + rc = dom_array_insert(nullptr, doc, "$[*]", 0, jsons, json_lens, 2, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 3); + EXPECT_EQ(vec[0], 4); + EXPECT_EQ(vec[1], 3); + EXPECT_EQ(vec[2], 2); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc, "$[*]", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[[\"John\",\"Tom\",\"Marry\",\"Bob\"]," + "[\"John\",\"Tom\",\"Peter\"],[\"John\",\"Tom\"]]"); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testClear) { + const char *jsons[] = { "\"John\"", "\"Mary\"", "\"Tom\"" }; + size_t json_lens[] = { 6, 6, 5 }; + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_array_append(nullptr, doc1, ".children", jsons, json_lens, 3, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 3); + + // return value should be the number of elements deleted, same as above + size_t containers_cleared; + rc = dom_clear(doc1, ".children", containers_cleared); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(containers_cleared, 1); + + // zero elements should remain + rc = dom_array_length(doc1, ".children", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); + + // clear empty array + rc = dom_clear(doc1, ".children", containers_cleared); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(containers_cleared, 0); + + rc = dom_array_length(doc1, ".children", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); + + // clear an object + rc = dom_object_length(doc1, ".address", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 4); + + rc = dom_clear(doc1, ".address", containers_cleared); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(containers_cleared, 1); + + vec.clear(); + rc = dom_object_length(doc1, ".address", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); +} + +TEST_F(DomTest, testClear_v2path) { + const char *input1 = "{\"a\":{}, \"b\":{\"a\": 1, \"b\": null, \"c\": true}, " + "\"c\":1, \"d\":true, \"e\":null, \"f\":\"d\", \"g\": 4, \"h\": 4.5}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input1, strlen(input1), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + size_t elements_cleared; + rc = dom_clear(d1, "$.*", elements_cleared); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(elements_cleared, 6); // everything except the null gets cleared + + ReplyBuffer oss; + rc = dom_get_value_as_str(d1, ".", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "{\"a\":{},\"b\":{},\"c\":0,\"d\":false,\"e\":null,\"f\":\"\",\"g\":0,\"h\":0.0}"); + + const char *input2 = "[[], [0], [0,1], [0,1,2], 1, true, null, \"d\"]"; + JDocument *d2; + rc = dom_parse(nullptr, input2, strlen(input2), &d2); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = dom_clear(d2, "$[*]", elements_cleared); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(elements_cleared, 6); + + Clear(&oss); + rc = dom_get_value_as_str(d2, ".", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[[],[],[],[],0,false,null,\"\"]"); + + dom_free_doc(d1); + dom_free_doc(d2); +} + +TEST_F(DomTest, testArrTrim) { + const char *jsons[] = { "\"John\"", "\"Mary\"", "\"Tom\"" }; + size_t json_lens[] = { 6, 6, 5 }; + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_array_append(nullptr, doc1, ".children", jsons, json_lens, 3, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 3); + + rc = dom_array_trim(doc1, ".children", 1, 2, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 2); + + rc = dom_array_trim(doc1, ".children", 0, 0, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc1, ".children", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[\"Mary\"]"); + + rc = dom_array_trim(doc1, ".children", -1, 5, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1); + + Clear(&oss); + rc = dom_get_value_as_str(doc1, ".children", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[\"Mary\"]"); + + rc = dom_array_trim(doc1, ".phoneNumbers", 2, 0, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); + + rc = dom_array_length(doc1, ".phoneNumbers", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); +} + +TEST_F(DomTest, testArrTrim_v2path) { + const char *input = "[ [\"Marry\", \"Bob\", \"Tom\"], [\"Peter\", \"Carol\"], [\"Jane\"], [] ]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_array_trim(doc, "$[*]", 0, 1, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 4); + EXPECT_EQ(vec[0], 2); + EXPECT_EQ(vec[1], 2); + EXPECT_EQ(vec[2], 1); + EXPECT_EQ(vec[3], 0); + + ReplyBuffer oss; + rc = dom_get_value_as_str(doc, "$[*]", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[[\"Marry\",\"Bob\"],[\"Peter\",\"Carol\"],[\"Jane\"],[]]"); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testArrIndex) { + const char *jsons[] = { "\"John\"", "\"Marry\"", "\"Tom\"" }; + size_t json_lens[] = { 6, 7, 5 }; + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_array_append(nullptr, doc1, ".children", jsons, json_lens, 3, vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec[0], 3); + + jsn::vector indexes; + rc = dom_array_index_of(doc1, ".children", "\"Marry\"", 7, 0, 2, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(indexes.size(), 1); + EXPECT_EQ(indexes[0], 1); + + rc = dom_array_index_of(doc1, ".children", "\"Tom\"", 5, 0, -1, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(indexes.size(), 1); + EXPECT_EQ(indexes[0], 2); +} + +TEST_F(DomTest, testArrIndex_v2path) { + const char *input = "[ [\"Marry\", \"Bob\", \"Tom\"], [\"Peter\", \"Marry\", \"Carol\"], " + "[\"Peter\", \"Jane\"], [] ]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector indexes; + bool is_v2_path; + rc = dom_array_index_of(doc, "$[*]", "\"Marry\"", 7, 0, 2, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 4); + EXPECT_EQ(indexes[0], 0); + EXPECT_EQ(indexes[1], 1); + EXPECT_EQ(indexes[2], -1); + EXPECT_EQ(indexes[3], -1); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testValueType) { + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_value_type(doc1, ".firstName", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "string"); + + rc = dom_value_type(doc1, ".age", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "integer"); + + rc = dom_value_type(doc1, ".weight", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "number"); + + rc = dom_value_type(doc1, ".isAlive", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "boolean"); + + rc = dom_value_type(doc1, ".spouse", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "null"); + + rc = dom_value_type(doc1, ".children", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "array"); + + rc = dom_value_type(doc1, ".phoneNumbers", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "array"); + + rc = dom_value_type(doc1, ".", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "object"); + + rc = dom_value_type(doc1, ".groups", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_STREQ(vec[0].c_str(), "object"); +} + +TEST_F(DomTest, testType_v2path) { + const char *input = "[1, 2.3, \"foo\", true, null, {}, []]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_value_type(doc, "$[*]", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 7); + EXPECT_STREQ(vec[0].c_str(), "integer"); + EXPECT_STREQ(vec[1].c_str(), "number"); + EXPECT_STREQ(vec[2].c_str(), "string"); + EXPECT_STREQ(vec[3].c_str(), "boolean"); + EXPECT_STREQ(vec[4].c_str(), "null"); + EXPECT_STREQ(vec[5].c_str(), "object"); + EXPECT_STREQ(vec[6].c_str(), "array"); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testNumFields) { + jsn::vector vec; + bool is_v2_path; + JsonUtilCode rc = dom_num_fields(doc1, ".", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 20); + + rc = dom_num_fields(doc1, ".firstName", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1); + + rc = dom_num_fields(doc1, ".age", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1); + + rc = dom_num_fields(doc1, ".isAlive", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1); + + rc = dom_num_fields(doc1, ".spouse", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 1); + + rc = dom_num_fields(doc1, ".groups", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); + + rc = dom_num_fields(doc1, ".children", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 0); + + rc = dom_num_fields(doc1, ".address", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 4); + + rc = dom_num_fields(doc1, ".phoneNumbers", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(vec.size(), 1); + EXPECT_EQ(vec[0], 6); +} + +TEST_F(DomTest, testNumFields_v2path) { + const char *input = "[1, 2.3, \"foo\", true, null, {}, [], {\"a\":1, \"b\":2}, [1,2,3]]"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector vec; + bool is_v2_path; + rc = dom_num_fields(doc, "$[*]", vec, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(vec.size(), 9); + EXPECT_EQ(vec[0], 1); + EXPECT_EQ(vec[1], 1); + EXPECT_EQ(vec[2], 1); + EXPECT_EQ(vec[3], 1); + EXPECT_EQ(vec[4], 1); + EXPECT_EQ(vec[5], 0); + EXPECT_EQ(vec[6], 0); + EXPECT_EQ(vec[7], 2); + EXPECT_EQ(vec[8], 3); + + dom_free_doc(doc); +} + +TEST_F(DomTest, testSelector_get_legacyPath_wildcard) { + const char *path = ".address.*"; + Selector selector; + JsonUtilCode rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 4); + rapidjson::StringBuffer oss; + dom_serialize_value(*rs[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"21 2nd Street\""); + Clear(&oss); + dom_serialize_value(*rs[1].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"New York\""); + Clear(&oss); + dom_serialize_value(*rs[2].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"NY\""); + Clear(&oss); + dom_serialize_value(*rs[3].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"10021-3100\""); + + path = ".address.city.*"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_INVALID_JSON_PATH); + EXPECT_TRUE(selector.getResultSet().empty()); +} + +TEST_F(DomTest, testSelector_get_v2path_wildcard) { + const char *path = "$.address.*"; + Selector selector; + JsonUtilCode rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + rapidjson::StringBuffer oss; + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 4); + dom_serialize_value(*rs[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"21 2nd Street\""); + Clear(&oss); + dom_serialize_value(*rs[1].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"New York\""); + Clear(&oss); + dom_serialize_value(*rs[2].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"NY\""); + Clear(&oss); + dom_serialize_value(*rs[3].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"10021-3100\""); + + path = "$.address.city.*"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_INVALID_USE_OF_WILDCARD); + EXPECT_TRUE(selector.getResultSet().empty()); + + path = "$.address.city"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 1); + Clear(&oss); + dom_serialize_value(*rs2[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"New York\""); +} + +TEST_F(DomTest, testSelector_get_array_legacyPath) { + const char *path = ".phoneNumbers[0]"; + Selector selector; + JsonUtilCode rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + rapidjson::StringBuffer oss; + dom_serialize_value(*rs[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "{\"type\":\"home\",\"number\":\"212 555-1234\"}"); + + path = ".phoneNumbers[0].type"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 1); + Clear(&oss); + dom_serialize_value(*rs2[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"home\""); + + path = ".phoneNumbers[2]"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES); + EXPECT_TRUE(selector.getResultSet().empty()); + + path = ".phoneNumbers[2].number"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES); + EXPECT_TRUE(selector.getResultSet().empty()); + + path = ".phoneNumbers[x]"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_ARRAY_INDEX_NOT_NUMBER); + EXPECT_TRUE(selector.getResultSet().empty()); + + path = ".phoneNumbers[x].number"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_ARRAY_INDEX_NOT_NUMBER); + EXPECT_TRUE(selector.getResultSet().empty()); +} + +TEST_F(DomTest, testSelector_get_array_negativeIndex_legacy_and_v2ath) { + const char *path = ".phoneNumbers[-1]"; + Selector selector; + JsonUtilCode rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + rapidjson::StringBuffer oss; + dom_serialize_value(*rs[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "{\"type\":\"office\",\"number\":\"646 555-4567\"}"); + + path = "$['phoneNumbers'][-2]['number']"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 1); + Clear(&oss); + dom_serialize_value(*rs2[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"212 555-1234\""); + + path = "$['phoneNumbers'][-3]['number']"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES); + EXPECT_TRUE(selector.getResultSet().empty()); +} + +TEST_F(DomTest, testSelector_get_array_legacyPath_wildcard) { + const char *path = ".phoneNumbers[*]"; + Selector selector; + JsonUtilCode rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + rapidjson::StringBuffer oss; + dom_serialize_value(*rs[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "{\"type\":\"home\",\"number\":\"212 555-1234\"}"); + + path = ".phoneNumbers[*].number"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + Clear(&oss); + dom_serialize_value(*rs2[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"212 555-1234\""); +} + +TEST_F(DomTest, testSelector_get_array_v2path_wildcard) { + const char *path = "$.phoneNumbers[*]"; + Selector selector; + JsonUtilCode rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + rapidjson::StringBuffer oss; + dom_serialize_value(*rs[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "{\"type\":\"home\",\"number\":\"212 555-1234\"}"); + Clear(&oss); + dom_serialize_value(*rs[1].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "{\"type\":\"office\",\"number\":\"646 555-4567\"}"); + + path = "$.phoneNumbers[*].number"; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + Clear(&oss); + dom_serialize_value(*rs2[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"212 555-1234\""); + Clear(&oss); + dom_serialize_value(*rs2[1].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), "\"646 555-4567\""); +} + +// Test array slice +TEST_F(DomTest, testSelector_get_array_slice_v2path_wildcard) { + const char *path = "$.e[1:4]"; + Selector selector; + JsonUtilCode rc = selector.getValues(*doc4, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 3); + EXPECT_EQ(rs[0].first->GetInt(), 2); + + path = "$.e[2:]"; + rc = selector.getValues(*doc4, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 3); + EXPECT_EQ(rs2[2].first->GetInt(), 5); + + path = "$.e[:4]"; + rc = selector.getValues(*doc4, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 4); + EXPECT_EQ(rs3[3].first->GetInt(), 4); + + path = "$.e[0:5:2]"; + rc = selector.getValues(*doc4, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs4 = selector.getResultSet(); + EXPECT_EQ(rs4.size(), 3); + EXPECT_EQ(rs4[1].first->GetInt(), 3); + + path = "$.e[:5:2]"; + rc = selector.getValues(*doc4, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs5 = selector.getResultSet(); + EXPECT_EQ(rs5.size(), 3); + EXPECT_EQ(rs5[1].first->GetInt(), 3); + + path = "$.e[4:0:-2]"; + rc = selector.getValues(*doc4, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs6 = selector.getResultSet(); + EXPECT_EQ(rs6.size(), 2); + EXPECT_EQ(rs6[0].first->GetInt(), 5); +} + +// Test array union +TEST_F(DomTest, testSelector_get_array_union_v2path) { + const char *path = "$.e[0,2]"; + Selector selector; + JsonUtilCode rc = selector.getValues(*doc4, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + EXPECT_EQ(rs[0].first->GetInt(), 1); + EXPECT_EQ(rs[1].first->GetInt(), 3); +} + +TEST_F(DomTest, testSelector_set_v2path_part1) { + const char *path = ".address.*"; + const char *new_val = "\"foo\""; + JsonUtilCode rc = dom_set_value(nullptr, doc1, path, new_val); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 4); + for (auto &vInfo : rs) { + rapidjson::StringBuffer oss; + dom_serialize_value(*vInfo.first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), new_val); + } + + path = ".address.city.foo"; + rc = dom_set_value(nullptr, doc1, path, new_val); + EXPECT_EQ(rc, JSONUTIL_CANNOT_INSERT_MEMBER_INTO_NON_OBJECT_VALUE); + + path = ".address.foo.city"; + rc = dom_set_value(nullptr, doc1, path, new_val); + EXPECT_EQ(rc, JSONUTIL_JSON_PATH_NOT_EXIST); +} + +TEST_F(DomTest, testSelector_set_v2path_part2) { + const char *path = "$.phoneNumbers[*].number"; + const char *new_val = "\"123\""; + JsonUtilCode rc = dom_set_value(nullptr, doc1, path, new_val); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + for (auto &vInfo : rs) { + rapidjson::StringBuffer oss; + dom_serialize_value(*vInfo.first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), new_val); + } + + path = "$.phoneNumbers[x].number"; + rc = dom_set_value(nullptr, doc1, path, new_val); + EXPECT_EQ(rc, JSONUTIL_ARRAY_INDEX_NOT_NUMBER); + + path = "$.phoneNumbers[2].number"; + rc = dom_set_value(nullptr, doc1, path, new_val); + EXPECT_EQ(rc, JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES); +} + +TEST_F(DomTest, testSelector_set_v2path_part3) { + const char *path = "$.phoneNumbers[-1].number"; + const char *new_val = "\"123\""; + JsonUtilCode rc = dom_set_value(nullptr, doc1, path, new_val); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*doc1, path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + rapidjson::StringBuffer oss; + dom_serialize_value(*rs[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), new_val); + + path = "$.phoneNumbers[-3].number"; + rc = dom_set_value(nullptr, doc1, path, new_val); + EXPECT_EQ(rc, JSONUTIL_INDEX_OUT_OF_ARRAY_BOUNDARIES); +} + +TEST_F(DomTest, testSelector_set_v2path_part4) { + const char *new_val = "\"z\""; + const char *path1 = "['address']['z']"; + JsonUtilCode rc = dom_set_value(nullptr, doc1, path1, new_val); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*doc1, path1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + rapidjson::StringBuffer oss; + dom_serialize_value(*rs[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), new_val); + + Clear(&oss); + ReplyBuffer oss2; + rc = dom_get_value_as_str(doc1, path1, nullptr, oss2, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss2), new_val); + + const char *path2 = ".address.z"; + rc = selector.getValues(*doc1, path2); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 1); + Clear(&oss); + dom_serialize_value(*rs2[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), new_val); + + Clear(&oss); + Clear(&oss2); + rc = dom_get_value_as_str(doc1, path2, nullptr, oss2, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss2), new_val); + + const char *path3 = "$.address.z"; + rc = selector.getValues(*doc1, path3); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 1); + Clear(&oss); + dom_serialize_value(*rs3[0].first, nullptr, oss); + EXPECT_STREQ(oss.GetString(), new_val); + + const char *exp = "[\"z\"]"; + Clear(&oss2); + rc = dom_get_value_as_str(doc1, path3, nullptr, oss2, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss2), exp); +} + +TEST_F(DomTest, testSelector_v2path_pathDepth) { + Selector selector; + JsonUtilCode rc = selector.getValues(*doc1, ".address"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getMaxPathDepth(), 1); + + rc = selector.getValues(*doc1, ".address.city"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getMaxPathDepth(), 2); + + rc = selector.getValues(*doc1, ".address.*"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getMaxPathDepth(), 2); + + rc = selector.getValues(*doc1, "$.address.*"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getMaxPathDepth(), 2); + + rc = selector.getValues(*doc1, "$.*.*"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getMaxPathDepth(), 2); + + rc = selector.getValues(*doc1, "$.phoneNumbers[*].type"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getMaxPathDepth(), 3); + + rc = selector.getValues(*doc1, "$.phoneNumbers[*].*"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getMaxPathDepth(), 3); +} + +TEST_F(DomTest, test_v2path_NumIncrBy1) { + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".k1", "[1, 2, 3]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, "$.k1[*]", &parser.Parse("1", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 3); + EXPECT_EQ(res[0], 2); + EXPECT_EQ(res[1], 3); + EXPECT_EQ(res[2], 4); + + Selector selector; + rc = selector.getValues(*doc1, "$.k1[*]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 3); + EXPECT_EQ(rs[0].first->GetInt(), 2); + EXPECT_EQ(rs[1].first->GetInt(), 3); + EXPECT_EQ(rs[2].first->GetInt(), 4); +} + +TEST_F(DomTest, test_v2path_NumIncrBy2) { + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".k1", "{\"a\":1, \"b\":2, \"c\":3}"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_increment_by(doc1, "$.k1.*", &parser.Parse("1", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 3); + EXPECT_EQ(res[0], 2); + EXPECT_EQ(res[1], 3); + EXPECT_EQ(res[2], 4); + + Selector selector; + rc = selector.getValues(*doc1, "$.k1.*"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 3); + EXPECT_EQ(rs[0].first->GetInt(), 2); + EXPECT_EQ(rs[1].first->GetInt(), 3); + EXPECT_EQ(rs[2].first->GetInt(), 4); +} + +TEST_F(DomTest, test_v2path_NumMultBy1) { + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".k1", "[1, 2, 3]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_multiply_by(doc1, "$.k1[*]", &parser.Parse("2", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 3); + EXPECT_EQ(res[0], 2); + EXPECT_EQ(res[1], 4); + EXPECT_EQ(res[2], 6); + + Selector selector; + rc = selector.getValues(*doc1, "$.k1[*]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 3); + EXPECT_EQ(rs[0].first->GetInt(), 2); + EXPECT_EQ(rs[1].first->GetInt(), 4); + EXPECT_EQ(rs[2].first->GetInt(), 6); +} + +TEST_F(DomTest, test_v2path_NumMultBy2) { + JsonUtilCode rc = dom_set_value(nullptr, doc1, ".k1", "{\"a\":1, \"b\":2, \"c\":3}"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector res; + bool isV2Path; + JParser parser; + rc = dom_multiply_by(doc1, "$.k1.*", &parser.Parse("2", 1).GetJValue(), res, isV2Path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res.size(), 3); + EXPECT_EQ(res[0], 2); + EXPECT_EQ(res[1], 4); + EXPECT_EQ(res[2], 6); + + Selector selector; + rc = selector.getValues(*doc1, "$.k1.*"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 3); + EXPECT_EQ(rs[0].first->GetInt(), 2); + EXPECT_EQ(rs[1].first->GetInt(), 4); + EXPECT_EQ(rs[2].first->GetInt(), 6); +} + +class HTTest : public ::testing::Test { + void SetUp() override { + JsonUtilCode rc = jsonstats_init(); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + setShards(0x10); + } + void TearDown() override { + delete keyTable; + keyTable = nullptr; + } + void setShards(size_t numShards) { + if (keyTable) delete keyTable; + SetupAllocFuncs(numShards); + } +}; + +TEST_F(HTTest, hashfunc) { + enum {key_count = 1 << 18, max_dups = 7}; + std::unordered_map hashes; + for (size_t i = 0; i < key_count; ++i) { + std::string s = std::to_string(i); + size_t h = hash_function(s.c_str(), s.size()) & 0x7FFFF; + hashes[h]++; + } + // Now sort by frequency + std::map> by_frequency; + for (auto [h, f] : hashes) { + by_frequency[f].insert(h); + } + ASSERT_LE(by_frequency.begin()->first, max_dups); +} + +TEST_F(HTTest, HTIngestTest) { + for (auto num_keys : { 1<<18 }) { + rapidjson::hashTableStats.reset(); + std::ostringstream os; + os << '{'; + for (int i = 0; i < num_keys; ++i) { + if (i != 0) os << ','; + os << '"' << i << '"' << ':' << i; + } + os << '}'; + + JDocument *doc; + EXPECT_EQ(JSONUTIL_SUCCESS, dom_parse(nullptr, os.str().c_str(), os.str().size(), &doc)); + KeyTable::Stats s = keyTable->getStats(); + EXPECT_EQ(s.handles, num_keys); + EXPECT_EQ(rapidjson::hashTableStats.rehashUp, 0); + EXPECT_EQ(rapidjson::hashTableStats.rehashDown, 0); + EXPECT_EQ(rapidjson::hashTableStats.convertToHT, 0); + EXPECT_EQ(rapidjson::hashTableStats.reserveHT, 1); + EXPECT_EQ("", keyTable->validate()); + EXPECT_EQ("", validate(doc)); + // Now make a second identical document + JDocument *doc2; + std::cerr << "***** Start second parse ****\n"; + EXPECT_EQ(JSONUTIL_SUCCESS, dom_parse(nullptr, os.str().c_str(), os.str().size(), &doc2)); + s = keyTable->getStats(); + EXPECT_EQ(s.rehashes, 0); + EXPECT_EQ(s.handles, 2*num_keys); + EXPECT_EQ(rapidjson::hashTableStats.rehashUp, 0); + EXPECT_EQ(rapidjson::hashTableStats.rehashDown, 0); + EXPECT_EQ(rapidjson::hashTableStats.convertToHT, 0); + EXPECT_EQ(rapidjson::hashTableStats.reserveHT, 2); + EXPECT_EQ("", keyTable->validate()); + EXPECT_EQ("", validate(doc)); + EXPECT_EQ("", validate(doc2)); + EXPECT_EQ(s.size, num_keys); + dom_free_doc(doc2); + EXPECT_EQ(keyTable->getStats().handles, num_keys); + EXPECT_EQ(keyTable->getStats().size, num_keys); + dom_free_doc(doc); + EXPECT_EQ(keyTable->getStats().handles, 0); + EXPECT_EQ(keyTable->getStats().size, 0); + } +} diff --git a/tst/unit/hashtable_test.cc b/tst/unit/hashtable_test.cc new file mode 100644 index 0000000..ea798bb --- /dev/null +++ b/tst/unit/hashtable_test.cc @@ -0,0 +1,209 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "json/dom.h" +#include "json/alloc.h" +#include "json/stats.h" +#include "json/keytable.h" +#include "module_sim.h" + +// Cheap, predictable hash +static size_t hash1(const char *ptr, size_t len) { + (void)ptr; + return len; +} + +class HashTableTest : public ::testing::Test { + protected: + void SetUp() override { + } + size_t original_malloced; + void TearDown() override { + if (keyTable) { + malloced = original_malloced; + EXPECT_EQ(keyTable->validate(), ""); + } + delete keyTable; + } + void Setup1(size_t numShards = 1, size_t htsize = 0, size_t (*h)(const char *, size_t) = hash1) { + setupValkeyModulePointers(); + KeyTable::Config c; + c.malloc = dom_alloc; + c.free = dom_free; + c.hash = h; + c.numShards = numShards; + keyTable = new KeyTable(c); + rapidjson::hashTableFactors.minHTSize = htsize; + original_malloced = malloced; + malloced = 0; // Ignore startup memory consumption + } +}; + +TEST_F(HashTableTest, simple) { + Setup1(); + { + JValue v; + v.SetObject(); + v.AddMember(JValue("True"), JValue(true), allocator); + EXPECT_EQ(v.MemberCount(), 1u); + EXPECT_TRUE(v["True"].IsBool()); + EXPECT_GT(malloced, 0); + } + EXPECT_EQ(malloced, 0); +} + +static JValue makeKey(size_t i) { + return std::move(JValue().SetString(std::to_string(i), allocator)); +} + +static JValue makeArray(size_t sz, size_t offset = 0) { + JValue j; + j.SetArray(); + for (size_t i = 0; i < sz; ++i) { + j.PushBack(JValue(i + offset), allocator); + } + return j; +} + +static JValue makeArrayArray(size_t p, size_t q) { + JValue j = makeArray(p); + for (size_t i = 0; i < p; ++i) { + j[i] = makeArray(q, i); + } + return j; +} + +TEST_F(HashTableTest, checkeq) { + Setup1(); + for (size_t i : {0, 1, 10}) { + ASSERT_EQ(makeArrayArray(i, i), makeArrayArray(i, i)); + } +} + +TEST_F(HashTableTest, insertAndRemoveMany) { + Setup1(1, 5); + for (size_t sz : {10, 50, 100}) { + EXPECT_EQ(malloced, 0); + rapidjson::hashTableStats.reset(); + { + JValue v; + v.SetObject(); + EXPECT_EQ(v.Validate(), ""); + for (size_t i = 0; i < sz; ++i) { + v.AddMember(makeKey(i), makeArrayArray(i, i), allocator); + EXPECT_EQ(v.Validate(), ""); + } + EXPECT_EQ(v.MemberCount(), sz); + EXPECT_GT(rapidjson::hashTableStats.rehashUp, 0); + EXPECT_EQ(rapidjson::hashTableStats.convertToHT, 1); + auto s = keyTable->getStats(); + EXPECT_EQ(s.size, sz); + for (size_t i = 0; i < sz; ++i) EXPECT_EQ(v[makeKey(i)], makeArrayArray(i, i)); + for (size_t i = 0; i < sz; ++i) { + v.RemoveMember(makeKey(i)); + EXPECT_EQ(v.Validate(), ""); + } + EXPECT_GT(rapidjson::hashTableStats.rehashDown, 0); + EXPECT_EQ(v.MemberCount(), 0); + s = keyTable->getStats(); + EXPECT_EQ(s.size, 0); // All entries should be gone. + } + EXPECT_EQ(malloced, 0); + } +} + +TEST_F(HashTableTest, SetObjectRawHT) { + Setup1(); + std::ostringstream os; + os << "{\"a\":1"; + for (size_t i = 0; i < 100; ++i) os << ",\"" << i << "\":" << i; + os << "}"; + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, os.str().c_str(), os.str().size(), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(rapidjson::hashTableStats.reserveHT, 1); + rapidjson::StringBuffer oss; + dom_serialize(doc, nullptr, oss); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(oss.GetString(), os.str()); + + dom_free_doc(doc); + auto s = keyTable->getStats(); + EXPECT_EQ(s.size, 0); // All entries should be gone. + EXPECT_EQ(malloced, 0); +} + +TEST_F(HashTableTest, CopyMembers) { + Setup1(1, 5); + for (size_t sz : {10, 50, 100}) { + rapidjson::hashTableStats.reset(); + JValue v; + v.SetObject(); + EXPECT_EQ(v.Validate(), ""); + for (size_t i = 0; i < sz; ++i) { + v.AddMember(makeKey(i), makeArrayArray(i, i), allocator); + EXPECT_EQ(v.Validate(), ""); + } + EXPECT_EQ(v.MemberCount(), sz); + EXPECT_GT(rapidjson::hashTableStats.rehashUp, 0); + EXPECT_EQ(rapidjson::hashTableStats.convertToHT, 1); + auto s = keyTable->getStats(); + EXPECT_EQ(s.size, sz); + EXPECT_EQ(s.handles, sz); + { + rapidjson::hashTableStats.reset(); + JValue v2(v, allocator); // Invokes copymembers + EXPECT_EQ(v2.Validate(), ""); + EXPECT_EQ(v2.MemberCount(), sz); + EXPECT_EQ(rapidjson::hashTableStats.rehashUp, 0); + EXPECT_EQ(rapidjson::hashTableStats.rehashDown, 0); + EXPECT_EQ(rapidjson::hashTableStats.convertToHT, 0); + s = keyTable->getStats(); + EXPECT_EQ(s.size, sz); + EXPECT_EQ(s.handles, sz*2); + for (size_t i = 0; i < sz; ++i) { + EXPECT_EQ(v[makeKey(i)].GetArray(), makeArrayArray(i, i)); + EXPECT_EQ(v2[makeKey(i)].GetArray(), makeArrayArray(i, i)); + } + } + } + EXPECT_EQ(malloced, 0); +} + +// +// Test that hash tables > 2^19 are properly handled. +// +TEST_F(HashTableTest, DistributionTest) { + extern size_t hash_function(const char *, size_t); + Setup1(1, 0, hash_function); + enum { TABLE_SIZE_BITS = 22 }; // LOG2(Table Size) + enum { TABLE_SIZE = 1ull << TABLE_SIZE_BITS }; + JValue v; + v.SetObject(); + for (size_t i = 0; i < TABLE_SIZE; ++i) { + v.AddMember(makeKey(i), JValue(true), allocator); + } + // + // Now, compute the distribution stats, make sure the longest run is sufficiently small + // + std::map runs; + v.getObjectDistribution(runs, 5); + // std::cout << "Dist:"; + // for (auto& x : runs) std::cout << x.first << ":" << x.second << ","; + // std::cout << std::endl; + ASSERT_NE(runs.size(), 0u); + EXPECT_LT(runs.rbegin()->first, 0.0001 * TABLE_SIZE); +} diff --git a/tst/unit/json_test.cc b/tst/unit/json_test.cc new file mode 100644 index 0000000..d17c129 --- /dev/null +++ b/tst/unit/json_test.cc @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "json/dom.h" +#include "json/alloc.h" +#include "json/stats.h" +#include "json/selector.h" + +extern void SetupAllocFuncs(size_t numShards); + +class JsonTest : public ::testing::Test { + void SetUp() override { + JsonUtilCode rc = jsonstats_init(); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + SetupAllocFuncs(16); + } + void TearDown() override { + delete keyTable; + keyTable = nullptr; + } + void setShards(size_t numShards) { + if (keyTable) delete keyTable; + SetupAllocFuncs(numShards); + } +}; + +TEST_F(JsonTest, testArrIndex_fullobjects) { + const char *input = "[5, 6, {\"a\":\"b\"}, [99,100], [\"c\"]]"; + + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector indexes; + bool is_v2_path; + rc = dom_array_index_of(doc, ".", "{\"a\":\"b\"}", 9, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(indexes.size(), 1); + EXPECT_EQ(indexes[0], 2); + + rc = dom_array_index_of(doc, ".", "[\"c\"]", 5, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(indexes.size(), 1); + EXPECT_EQ(indexes[0], 4); + + rc = dom_array_index_of(doc, ".", "[99,100]", 8, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_FALSE(is_v2_path); + EXPECT_EQ(indexes.size(), 1); + EXPECT_EQ(indexes[0], 3); + + dom_free_doc(doc); +} + +TEST_F(JsonTest, testArrIndex_arr) { + const char *input = "{\"a\":[1,2,[15,50],3], \"nested\": {\"a\": [3,4,[5,5]]}}"; + + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector indexes; + bool is_v2_path; + rc = dom_array_index_of(doc, "$..a", "[15,50]", 7, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 2); + EXPECT_EQ(indexes[0], 2); + EXPECT_EQ(indexes[1], -1); + + rc = dom_array_index_of(doc, "$..a", "3", 1, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 2); + EXPECT_EQ(indexes[0], 3); + EXPECT_EQ(indexes[1], 0); + + rc = dom_array_index_of(doc, "$..a", "[5,5]", 5, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 2); + EXPECT_EQ(indexes[0], -1); + EXPECT_EQ(indexes[1], 2); + + rc = dom_array_index_of(doc, "$..a", "35", 2, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 2); + EXPECT_EQ(indexes[0], -1); + EXPECT_EQ(indexes[0], -1); + + dom_free_doc(doc); +} + +TEST_F(JsonTest, testArrIndex_object) { + const char *input = "{\"a\":{\"b\":[2,4,{\"a\":4},false,true,{\"b\":false}]}}"; + + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector indexes; + bool is_v2_path; + rc = dom_array_index_of(doc, "$.a.b", "{\"a\":4}", 7, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 1); + EXPECT_EQ(indexes[0], 2); + + rc = dom_array_index_of(doc, "$.a.b", "{\"b\":false}", 11, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 1); + EXPECT_EQ(indexes[0], 5); + + rc = dom_array_index_of(doc, "$.a.b", "false", 5, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 1); + EXPECT_EQ(indexes[0], 3); + + rc = dom_array_index_of(doc, "$..a", "{\"a\":4}", 7, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 2); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], INT64_MAX); + + rc = dom_array_index_of(doc, "$..a..", "{\"a\":4}", 7, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 4); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], 2); + EXPECT_EQ(indexes[2], INT64_MAX); + EXPECT_EQ(indexes[3], INT64_MAX); + + dom_free_doc(doc); +} + +TEST_F(JsonTest, testArrIndex_nested_search) { + const char *input = "{\"level0\":{\"level1_0\":{\"level2\":" + "[1,2,3, [25, [4,5,{\"c\":\"d\"}]]]}," + "\"level1_1\":{\"level2\": [[{\"a\":[2,5]}, true, null]]}}}"; + + JDocument *doc; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &doc); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + jsn::vector indexes; + bool is_v2_path; + rc = dom_array_index_of(doc, "$..level0.level1_0..", "[4,5,{\"c\":\"d\"}]", 15, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 5); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], -1); + EXPECT_EQ(indexes[2], 1); + EXPECT_EQ(indexes[3], -1); + EXPECT_EQ(indexes[4], INT64_MAX); + + rc = dom_array_index_of(doc, "$..level0.level1_0..", "[25, [4,5,{\"c\":\"d\"}]]", 21, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 5); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], 3); + EXPECT_EQ(indexes[2], -1); + EXPECT_EQ(indexes[3], -1); + EXPECT_EQ(indexes[4], INT64_MAX); + + rc = dom_array_index_of(doc, "$..level0.level1_0..", "{\"c\":\"d\"}", 9, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 5); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], -1); + EXPECT_EQ(indexes[2], -1); + EXPECT_EQ(indexes[3], 2); + EXPECT_EQ(indexes[4], INT64_MAX); + + rc = dom_array_index_of(doc, "$..level0.level1_0..", "[4,5,{\"a\":\"b\"}]", 15, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 5); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], -1); + EXPECT_EQ(indexes[2], -1); + EXPECT_EQ(indexes[3], -1); + EXPECT_EQ(indexes[4], INT64_MAX); + + rc = dom_array_index_of(doc, "$..level0.level1_1..", "[null,true,{\"a\":[2,5]}]", 23, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 5); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], -1); + EXPECT_EQ(indexes[2], -1); + EXPECT_EQ(indexes[3], INT64_MAX); + EXPECT_EQ(indexes[4], -1); + + rc = dom_array_index_of(doc, "$..level0.level1_1..", "[{\"a\":[2,5]},true,null]", 23, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 5); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], 0); + EXPECT_EQ(indexes[2], -1); + EXPECT_EQ(indexes[3], INT64_MAX); + EXPECT_EQ(indexes[4], -1); + + rc = dom_array_index_of(doc, "$..level0.level1_1..", "[{\"a\":[2,5]},true]", 18, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 5); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], -1); + EXPECT_EQ(indexes[2], -1); + EXPECT_EQ(indexes[3], INT64_MAX); + EXPECT_EQ(indexes[4], -1); + + rc = dom_array_index_of(doc, "$..level0.level1_0..", "[4,{\"c\":\"d\"}]", 13, 0, 0, indexes, is_v2_path); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(is_v2_path); + EXPECT_EQ(indexes.size(), 5); + EXPECT_EQ(indexes[0], INT64_MAX); + EXPECT_EQ(indexes[1], -1); + EXPECT_EQ(indexes[2], -1); + EXPECT_EQ(indexes[3], -1); + EXPECT_EQ(indexes[4], INT64_MAX); + + dom_free_doc(doc); +} diff --git a/tst/unit/keytable_test.cc b/tst/unit/keytable_test.cc new file mode 100644 index 0000000..90a98e6 --- /dev/null +++ b/tst/unit/keytable_test.cc @@ -0,0 +1,393 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "json/dom.h" +#include "json/alloc.h" +#include "json/stats.h" +#include "json/keytable.h" +#include "json/memory.h" +#include "module_sim.h" + +class PtrWithMetaDataTest : public ::testing::Test { +}; + +TEST_F(PtrWithMetaDataTest, t) { + memory_traps_control(false); // Necessary so that MEMORY_VALIDATE buried in getPointer doesn't croak on bad memory + EXPECT_EQ(0x7FFFF, PtrWithMetaData::METADATA_MASK); + for (size_t i = 1; i & 0x7FFFF; i <<= 1) { + size_t var = 0xdeadbeeffeedfeddull; + PtrWithMetaData p(&var, i); + EXPECT_EQ(&*p, &var); + EXPECT_EQ(*p, var); + EXPECT_EQ(size_t(p.getMetaData()), i); + p.clear(); + EXPECT_EQ(p.getMetaData(), 0); + p.setMetaData(i); + EXPECT_EQ(size_t(p.getMetaData()), i); + } + for (size_t i = 8; i & 0x0000FFFFFFFFFFF8ull; i <<= 1) { + PtrWithMetaData p(reinterpret_cast(i), 0x7FFFF); + EXPECT_EQ(size_t(&*p), i); + EXPECT_EQ(p.getMetaData(), 0x7FFFF); + } +} + +// Cheap, predictable hash +static size_t hash1(const char *ptr, size_t len) { + (void)ptr; + return len; +} + +extern size_t MAX_FAST_TABLE_SIZE; // in keytable.cc + +class KeyTableTest : public ::testing::Test { + protected: + void SetUp() override { + } + + void TearDown() override { + if (t) { + EXPECT_EQ(t->validate(), ""); + } + delete t; + } + + void Setup1(size_t numShards = 1, size_t (*hf)(const char *, size_t) = hash1) { + setupValkeyModulePointers(); + KeyTable::Config c; + c.malloc = dom_alloc; + c.free = dom_free; + c.hash = hf; + c.numShards = numShards; + t = new KeyTable(c); + } + + KeyTable *t = nullptr; +}; + +TEST_F(KeyTableTest, layoutTest) { + Setup1(); + + size_t bias = 10; + for (size_t slen : {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 0xFF, 0x100, 0xFFFF, 0x10000, 0xFFFFFF, 0x1000000}) { + std::string s; + s.resize(slen); + for (size_t i = 0; i < slen; ++i) { + s[i] = i + bias; + } + KeyTable_Layout *l = KeyTable_Layout::makeLayout(dom_alloc, s.data(), s.length(), 0, false); + ASSERT_EQ(l->getLength(), slen); + for (size_t i = 0; i < slen; ++i) { + ASSERT_EQ(0xFF & (i + bias), 0xFF & (l->getText()[i])); + } + dom_free(l); + bias++; + } +} + +TEST_F(KeyTableTest, testInitialization) { + Setup1(); + EXPECT_EQ(t->validate(), ""); + EXPECT_GT(malloced, 0); + EXPECT_EQ(t->validate(), ""); + auto s = t->getStats(); + EXPECT_EQ(s.size, 0); + EXPECT_EQ(s.handles, 0); + EXPECT_EQ(s.bytes, 0); + EXPECT_GT(s.maxTableSize, 0); + EXPECT_GT(s.totalTable, 0); + EXPECT_EQ(s.rehashes, 0); + EXPECT_EQ(s.maxSearch, 0); + delete t; + t = nullptr; + EXPECT_EQ(malloced, 0); +} + +TEST_F(KeyTableTest, testDuplication) { + std::string e = "Empty"; + Setup1(); + auto f = t->getFactors(); + f.maxLoad = 1.0; // No rehashes until we're full.... + f.minLoad = std::numeric_limits::min(); + t->setFactors(f); + KeyTable_Handle h1 = t->makeHandle(e); + EXPECT_TRUE(h1); + EXPECT_EQ(t->validate(), ""); + KeyTable_Handle h2 = t->makeHandle(e); + EXPECT_EQ(t->validate(), ""); + EXPECT_TRUE(h2); + EXPECT_EQ(h1, h2); + EXPECT_EQ(&*h1, &*h2); + auto s = t->getStats(); + EXPECT_EQ(s.size, 1); + EXPECT_EQ(s.handles, 2); + EXPECT_EQ(s.bytes, 5); + t->destroyHandle(h1); + EXPECT_TRUE(!h1); + EXPECT_EQ(t->validate(), ""); + s = t->getStats(); + EXPECT_EQ(s.size, 1); + EXPECT_EQ(s.handles, 1); + EXPECT_EQ(s.bytes, 5); + t->destroyHandle(h2); + EXPECT_TRUE(!h2); + EXPECT_EQ(t->validate(), ""); + s = t->getStats(); + EXPECT_EQ(s.rehashes, 0); +} + +TEST_F(KeyTableTest, testClone) { + std::string e = "Empty"; + Setup1(); + auto f = t->getFactors(); + f.maxLoad = 1.0; // No rehashes until we're full.... + f.minLoad = std::numeric_limits::min(); + t->setFactors(f); + KeyTable_Handle h1 = t->makeHandle(e); + EXPECT_TRUE(h1); + EXPECT_EQ(t->validate(), ""); + KeyTable_Handle h2 = t->clone(h1); + EXPECT_EQ(t->validate(), ""); + EXPECT_TRUE(h2); + EXPECT_EQ(h1, h2); + EXPECT_EQ(&*h1, &*h2); + auto s = t->getStats(); + EXPECT_EQ(s.size, 1); + EXPECT_EQ(s.handles, 2); + EXPECT_EQ(s.bytes, 5); + t->destroyHandle(h1); + EXPECT_TRUE(!h1); + EXPECT_EQ(t->validate(), ""); + s = t->getStats(); + EXPECT_EQ(s.size, 1); + EXPECT_EQ(s.handles, 1); + EXPECT_EQ(s.bytes, 5); + t->destroyHandle(h2); + EXPECT_TRUE(!h2); + EXPECT_EQ(t->validate(), ""); + s = t->getStats(); + EXPECT_EQ(s.rehashes, 0); +} + +TEST_F(KeyTableTest, SimpleRehash) { + Setup1(1); // 4 element table is the minimum. + auto f = t->getFactors(); + f.maxLoad = 1.0; // No rehashes until we're full.... + f.minLoad = std::numeric_limits::min(); + f.grow = 1.0; + f.shrink = 0.5; + t->setFactors(f); + std::vector h; + std::vector keys; + std::string k = ""; + for (size_t i = 0; i < 4; ++i) { + h.push_back(t->makeHandle(k)); + keys.push_back(k); + auto s = t->getStats(); + EXPECT_EQ(s.size, i+1); + EXPECT_EQ(s.rehashes, 0); + EXPECT_EQ(t->validate(), ""); + k += '*'; + } + for (size_t i = 4; i < 8; ++i) { + auto f = t->getFactors(); + f.maxLoad = i == 4 ? .5 : 1.0; // No rehashes until we're full.... + t->setFactors(f); + + h.push_back(t->makeHandle(k)); + keys.push_back(k); + auto s = t->getStats(); + EXPECT_EQ(s.size, i+1); + EXPECT_EQ(s.rehashes, i == 4 ? 1 : 0); + EXPECT_EQ(t->validate(), ""); + k += '*'; + } + // + // Now shrink + // + for (size_t i = 0; i < 4; ++i) { + t->destroyHandle(h.back()); + h.pop_back(); + auto s = t->getStats(); + EXPECT_EQ(s.rehashes, 0); + EXPECT_EQ(t->validate(), ""); + } + // Next destroyHandle should case a rehash. + for (size_t i = 0; i < 4; ++i) { + auto f = t->getFactors(); + f.minLoad = i == 0 ? .5f : std::numeric_limits::min(); // No rehashes until we're full.... + t->setFactors(f); + t->destroyHandle(h.back()); + h.pop_back(); + auto s = t->getStats(); + EXPECT_EQ(s.maxTableSize, 4); + EXPECT_EQ(s.rehashes, i == 0 ? 1 : 0); + EXPECT_EQ(t->validate(), ""); + } +} + +// +// Generate some strings, duplicates are ok. +// Because the hash is the length + the last character the total number of unique strings +// is only 10x of the max length (from the random distribution) +// +std::default_random_engine generator(0); +std::uniform_int_distribution dice(0, 10000); // there are actually ~10x this number of unique strings +size_t make_rand() { + return dice(generator); +} + +std::string make_key() { + size_t len = make_rand(); + size_t lastDigit = make_rand() % 10; + std::string k; + for (size_t i = 0; i < len; ++i) k += '*'; + k += '0' + lastDigit; + return k; +} + +TEST_F(KeyTableTest, BigTest) { + // + // Make a zillion keys, Yes, there will be lots of duplicates -> Intentionally + // + for (size_t ft : { 1 << 8, 1 << 10, 1 << 12}) { + MAX_FAST_TABLE_SIZE = ft; + for (size_t numShards : {1, 2}) { + for (size_t numKeys : {1000}) { + Setup1(numShards); + auto f = t->getFactors(); + f.grow = 1.1; // Grow slowly + f.maxLoad = .95; // Let the table get REALLY full between hashes + t->setFactors(f); + std::vector h; + std::vector k; + for (size_t i = 0; i < numKeys; ++i) { + k.push_back(make_key()); + h.push_back(t->makeHandle(k.back().c_str(), k.back().length())); + if (0 == (i & 0xFF)) { + EXPECT_EQ(t->validate(), ""); + } + } + auto s = t->getStats(); + EXPECT_EQ(s.handles, k.size()); + EXPECT_LT(s.size, k.size()); // must have at least one duplicate + EXPECT_GT(s.rehashes, 5); // should have had several rehashes + // + // now delete them SLOWLY with lots of rehashes + // + f = t->getFactors(); + f.shrink = .05; // Shrink slowly + f.minLoad = .9; // Let the table get REALLY full between hashes + t->setFactors(f); + for (size_t i = 0; i < numKeys; ++i) { + t->destroyHandle(h[i]); + if (0 == (i & 0xFF)) { + EXPECT_EQ(t->validate(), ""); + } + } + // + // Teardown. + // + EXPECT_EQ(t->validate(), ""); + s = t->getStats(); + EXPECT_GT(s.rehashes, 10); + EXPECT_EQ(s.size, 0); + delete t; + t = nullptr; + } + } + } +} + +TEST_F(KeyTableTest, StuckKeys) { + Setup1(1); + KeyTable_Layout::setMaxRefCount(3); + std::string e = "Empty"; + KeyTable_Handle h1 = t->makeHandle(e); + KeyTable_Handle h2 = t->makeHandle(e); + KeyTable_Handle h3 = t->makeHandle(e); + KeyTable_Handle h4 = t->makeHandle(e); + EXPECT_EQ(t->validate(), ""); + auto s = t->getStats(); + EXPECT_EQ(s.size, 1); + EXPECT_EQ(s.stuckKeys, 1); + EXPECT_EQ(s.handles, 4); + t->destroyHandle(h1); + t->destroyHandle(h2); + t->destroyHandle(h3); + t->destroyHandle(h4); + s = t->getStats(); + EXPECT_EQ(s.stuckKeys, 1); + EXPECT_EQ(s.size, 1); + EXPECT_EQ(s.handles, 0); +} + +// +// Make a very large shard, check some stats, delete the elements and see if it shrinks +// +extern size_t hash_function(const char *, size_t); + +TEST_F(KeyTableTest, BigShard) { + memory_traps_control(false); + Setup1(1, hash_function); + enum { TABLE_SIZE_BITS = 22 }; // LOG2(Table Size) + enum { TABLE_SIZE = 1ull << TABLE_SIZE_BITS }; + std::vector handles1; + std::vector handles2; + // + // Fill up the table + // + for (size_t i = 0; i < TABLE_SIZE; ++i) { + handles1.push_back(t->makeHandle(std::to_string(i))); + } + auto s = t->getStats(); + EXPECT_EQ(s.size, TABLE_SIZE); + EXPECT_EQ(s.handles, TABLE_SIZE); + EXPECT_LE(s.rehashes, TABLE_SIZE_BITS); + // + // Check hash table distribution + // + auto ls = t->getLongStats(2); + EXPECT_EQ(ls.runs.size(), 2); + EXPECT_LT(ls.runs.rbegin()->first, 100); // Only look at second longest run + // + // Duplicate add of Handle + // + for (size_t i = 0; i < TABLE_SIZE; ++i) { + handles2.push_back(t->makeHandle(std::to_string(i))); + EXPECT_EQ(handles1[i], handles2[i]); + } + s = t->getStats(); + EXPECT_EQ(s.size, TABLE_SIZE); + EXPECT_LE(s.rehashes, 0); + EXPECT_EQ(s.handles, 2*TABLE_SIZE); + // + // Now, delete each handle once. Basically nothing about the table should change + // + for (auto& h : handles1) { t->destroyHandle(h); } + s = t->getStats(); + EXPECT_EQ(s.size, TABLE_SIZE); + EXPECT_EQ(s.handles, TABLE_SIZE); + EXPECT_EQ(s.maxSearch, 0); + EXPECT_EQ(s.rehashes, 0); + // + // Now empty the table + // + for (auto& h : handles2) { t->destroyHandle(h); } + s = t->getStats(); + EXPECT_EQ(s.size, 0); + EXPECT_EQ(s.handles, 0); + EXPECT_GT(s.rehashes, TABLE_SIZE_BITS - 3); // Minimum table size +} diff --git a/tst/unit/module_sim.cc b/tst/unit/module_sim.cc new file mode 100644 index 0000000..8bbc927 --- /dev/null +++ b/tst/unit/module_sim.cc @@ -0,0 +1,101 @@ +#undef NDEBUG +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "json/alloc.h" +#include "json/dom.h" +#include "json/stats.h" +#include "json/selector.h" +#include "module_sim.h" + +// +// Simulate underlying zmalloc stuff, including malloc-size +// +static std::map malloc_sizes; +size_t malloced = 0; +std::string logtext; + +static void *test_malloc(size_t s) { + void *ptr = malloc(s); + assert(malloc_sizes.find(ptr) == malloc_sizes.end()); + malloc_sizes[ptr] = s; + malloced += s; + return ptr; +} + +static size_t test_malloc_size(void *ptr) { + if (!ptr) return 0; + assert(malloc_sizes.find(ptr) != malloc_sizes.end()); + return malloc_sizes[ptr]; +} + +static void test_free(void *ptr) { + if (!ptr) return; + assert(malloc_sizes.find(ptr) != malloc_sizes.end()); + ASSERT_GE(malloced, malloc_sizes[ptr]); + malloced -= malloc_sizes[ptr]; + malloc_sizes.erase(malloc_sizes.find(ptr)); + free(ptr); +} + +static void *test_realloc(void *old_ptr, size_t new_size) { + if (old_ptr == nullptr) return test_malloc(new_size); + assert(malloc_sizes.find(old_ptr) != malloc_sizes.end()); + assert(malloced >= malloc_sizes[old_ptr]); + malloced -= malloc_sizes[old_ptr]; + malloc_sizes.erase(malloc_sizes.find(old_ptr)); + void *new_ptr = realloc(old_ptr, new_size); + assert(malloc_sizes.find(new_ptr) == malloc_sizes.end()); + malloc_sizes[new_ptr] = new_size; + malloced += new_size; + return new_ptr; +} + +std::string test_getLogText() { + std::string result = logtext; + logtext.resize(0); + return result; +} + +static void test_log(ValkeyModuleCtx *ctx, const char *level, const char *fmt, ...) { + (void)ctx; + char buffer[256]; + va_list arg; + va_start(arg, fmt); + int len = vsnprintf(buffer, sizeof(buffer), fmt, arg); + va_end(arg); + std::cerr << "Log(" << level << "): " << std::string(buffer, len) << "\n"; // make visible to ASSERT_EXIT +} + +static void test__assert(const char *estr, const char *file, int line) { + ASSERT_TRUE(0) << "Assert(" << file << ":" << line << "): " << estr; +} + +static long long test_Milliseconds() { + struct timespec t; + clock_gettime(CLOCK_REALTIME, &t); + return (t.tv_sec * 1000) + (t.tv_nsec / 1000000); +} + +void setupValkeyModulePointers() { + ValkeyModule_Alloc = test_malloc; + ValkeyModule_Free = test_free; + ValkeyModule_Realloc = test_realloc; + ValkeyModule_MallocSize = test_malloc_size; + ValkeyModule_Log = test_log; + ValkeyModule__Assert = test__assert; + ValkeyModule_Strdup = strdup; + ValkeyModule_Milliseconds = test_Milliseconds; + memory_traps_control(true); +} + diff --git a/tst/unit/module_sim.h b/tst/unit/module_sim.h new file mode 100644 index 0000000..652ecad --- /dev/null +++ b/tst/unit/module_sim.h @@ -0,0 +1,19 @@ +// +// Simulate the Valkey Module Environment +// +#ifndef VALKEYJSONMODULE_TST_UNIT_MODULE_SIM_H_ +#define VALKEYJSONMODULE_TST_UNIT_MODULE_SIM_H_ + +#include +#include + +extern size_t malloced; // Total currently allocated memory +void setupValkeyModulePointers(); +std::string test_getLogText(); + +#endif // VALKEYJSONMODULE_TST_UNIT_MODULE_SIM_H_ + + + + + diff --git a/tst/unit/selector_test.cc b/tst/unit/selector_test.cc new file mode 100644 index 0000000..13f9e85 --- /dev/null +++ b/tst/unit/selector_test.cc @@ -0,0 +1,1344 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "json/dom.h" +#include "json/selector.h" + +extern size_t dummy_malloc_size(void *); +extern void SetupAllocFuncs(size_t numShards); +extern std::string& getReplyString(); +extern const char *GetString(ReplyBuffer *b); + +class SelectorTest : public ::testing::Test { + protected: + const char *store = "{\n" + " \"budget\": 10.00,\n" + " \"favorite\": \"Sword of Honour\",\n" + " \"store\": {\n" + " \"books\": [\n" + " {\n" + " \"category\": \"reference\",\n" + " \"author\": \"Nigel Rees\",\n" + " \"title\": \"Sayings of the Century\",\n" + " \"price\": 8.95\n" + " },\n" + " {\n" + " \"category\": \"fiction\",\n" + " \"author\": \"Evelyn Waugh\",\n" + " \"title\": \"Sword of Honour\",\n" + " \"price\": 12.99,\n" + " \"movies\": [\n" + " {\n" + " \"title\": \"Sword of Honour\",\n" + " \"realisator\": {\n" + " \"first_name\": \"Bill\",\n" + " \"last_name\": \"Anderson\"\n" + " }\n" + " }\n" + " ]\n" + " },\n" + " {\n" + " \"category\": \"fiction\",\n" + " \"author\": \"Herman Melville\",\n" + " \"title\": \"Moby Dick\",\n" + " \"isbn\": \"0-553-21311-3\",\n" + " \"price\": 9\n" + " },\n" + " {\n" + " \"category\": \"fiction\",\n" + " \"author\": \"J. R. R. Tolkien\",\n" + " \"title\": \"The Lord of the Rings\",\n" + " \"isbn\": \"0-395-19395-8\",\n" + " \"price\": 22.99\n" + " }\n" + " ],\n" + " \"bicycle\": {\n" + " \"color\": \"red\",\n" + " \"price\": 19.95\n" + " }\n" + " }\n" + "}"; + + const char *node_accounts = "{\n" + " \"clientName\": \"jim\",\n" + " \"nameSpace\": \"BobSpace\",\n" + " \"codeName\": \"codeName\",\n" + " \"codeId\": 5555,\n" + " \"codeData\": {\n" + " \"uTaskQueue_CodeData\": [\n" + " {\n" + " \"stuff\": 99\n" + " }\n" + " ]\n" + " },\n" + " \"nodeData\": [\n" + " {\n" + " \"selfNodeId\": 1,\n" + " \"selfAndChildNodeIds\": [\n" + " 1,\n" + " 2,\n" + " 3\n" + " ],\n" + " \"uTaskQueue_NodeData\": [\n" + " {\n" + " \"hidden\": \"1+2+3\",\n" + " \"usercreate\": -1000\n" + " }\n" + " ]\n" + " },\n" + " {\n" + " \"selfNodeId\": 10,\n" + " \"selfAndChildNodeIds\": [\n" + " 10,\n" + " 11,\n" + " 12\n" + " ],\n" + " \"uTaskQueue_NodeData\": [\n" + " {\n" + " \"hidden\": \"10+11+12\",\n" + " \"other_stuff\": 1000\n" + " }\n" + " ]\n" + " }\n" + " ]\n" + "}"; + + void SetUp() override { + SetupAllocFuncs(16); + } + + void TearDown() override { + delete keyTable; + keyTable = nullptr; + } +}; + +TEST_F(SelectorTest, test_filterExpr_attributeFilter) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.store.books[?(@.isbn)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filterExpr_expression_part1) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.store.books[?(@.price<10.0)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = dom_set_value(nullptr, d1, "$.store.books[?(@.price<10.0)].price", "10.01"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<10.0)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<=1.01e+1)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.price==10.01)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.category==\"fiction\")]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 3); + + rc = selector.getValues(*d1, "$.store.books[?(@.category=='fiction')]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 3); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filterExpr_expression_part2) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.store.books[?(@.price<9||@.price>10&&@.isbn)].price"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + EXPECT_EQ(rs[0].first->GetDouble(), 8.95); + EXPECT_EQ(rs[1].first->GetDouble(), 22.99); + + rc = selector.getValues(*d1, "$.store.books[?((@.price<9||@.price>10)&&@.isbn)].price"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 1); + EXPECT_EQ(rs2[0].first->GetDouble(), 22.99); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filterExpr_expression_part3) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$[\"budget\"]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + EXPECT_EQ(rs[0].first->GetDouble(), 10.00); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<10.0)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<$[\"budget\"])]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<$.store.books[1].price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<$.store.books[-3].price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<$.store.books[+1].price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<$['store'][\"books\"][1].price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<$.store.[\"books\"][1].price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(@.price<$.['store'].books[1].price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?($['store']..books[1].price>@.price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$[\"favorite\"]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 1); + EXPECT_STREQ(rs2[0].first->GetString(), "Sword of Honour"); + + rc = selector.getValues(*d1, "$.store.books[?(@.title==\"Sword of Honour\")]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$.store.books[?(@.title==$.favorite)].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$.store.books[?(@.title==$[\"favorite\"])].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$.store.books[?(@.title==$.[\"favorite\"])].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$.store.books[?(@.title==$[\"store\"])]"); + EXPECT_NE(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d1, "$.store.books[?(@.title==$[\"author\"])]"); + EXPECT_NE(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d1, "$.store.books[?(@.title==$[\"nothing\"])]"); + EXPECT_NE(rc, JSONUTIL_SUCCESS); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filterExpr_expression_part4) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.store.books[?(10.0>@.price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?($.favorite==@.title)].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$.store.books[?($[\"favorite\"]==@.title)].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$.store.books[?($.[\"favorite\"]==@.title)].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$.store.books[?(9>@.price || 10<@.price && @.isbn)].price"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + EXPECT_EQ(rs[0].first->GetDouble(), 8.95); + EXPECT_EQ(rs[1].first->GetDouble(), 22.99); + + rc = selector.getValues(*d1, "$.store.books[?(9>@.price||10<@.price&&@.isbn)].price"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + EXPECT_EQ(rs2[0].first->GetDouble(), 8.95); + EXPECT_EQ(rs2[1].first->GetDouble(), 22.99); + + rc = selector.getValues(*d1, "$.store.books[?((9>@.price||10<@.price)&&@.isbn)].price"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 1); + EXPECT_EQ(rs3[0].first->GetDouble(), 22.99); + + rc = selector.getValues(*d1, "$.store.books[?($[\"budget\"]>=@.price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = dom_set_value(nullptr, d1, "$.store.books[?(10.0>@.price)].price", "10.01"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d1, "$.store.books[?(10.0>@.price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + + rc = selector.getValues(*d1, "$.store.books[?(1.01e+1>=@.price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + rc = selector.getValues(*d1, "$.store.books[?(10.01==@.price)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 2); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filterExpr_expression_part5) { + const char *input = "[1,2,3,4,5]"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.*.[?(@>2)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 3); + EXPECT_EQ(rs[0].first->GetInt(), 3); + EXPECT_EQ(rs[1].first->GetInt(), 4); + EXPECT_EQ(rs[2].first->GetInt(), 5); + + rc = selector.getValues(*d1, "$.*.[?(2<@)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 3); + EXPECT_EQ(rs2[0].first->GetInt(), 3); + EXPECT_EQ(rs2[1].first->GetInt(), 4); + EXPECT_EQ(rs2[2].first->GetInt(), 5); + + rc = selector.getValues(*d1, "$.*.[?(2<@&&@<5)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 2); + EXPECT_EQ(rs3[0].first->GetInt(), 3); + EXPECT_EQ(rs3[1].first->GetInt(), 4); + + const char *input2 = "[true,false,true]"; + JDocument *d2; + rc = dom_parse(nullptr, input2, strlen(input2), &d2); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d2, "$..[?(@==true)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs4 = selector.getResultSet(); + EXPECT_EQ(rs4.size(), 2); + EXPECT_EQ(rs4[0].first->GetBool(), true); + EXPECT_EQ(rs4[1].first->GetBool(), true); + + rc = selector.getValues(*d2, "$..[?(@==false)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs5 = selector.getResultSet(); + EXPECT_EQ(rs5.size(), 1); + EXPECT_EQ(rs5[0].first->GetBool(), false); + + rc = selector.getValues(*d2, "$..[?(@!=false)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs6 = selector.getResultSet(); + EXPECT_EQ(rs6.size(), 2); + EXPECT_EQ(rs6[0].first->GetBool(), true); + EXPECT_EQ(rs6[1].first->GetBool(), true); + + rc = selector.getValues(*d2, "$.*.[?(@!=true)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs7 = selector.getResultSet(); + EXPECT_EQ(rs7.size(), 1); + EXPECT_EQ(rs7[0].first->GetBool(), false); + + rc = selector.getValues(*d2, "$..[?(@>=true)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs8 = selector.getResultSet(); + EXPECT_EQ(rs8.size(), 2); + EXPECT_EQ(rs8[0].first->GetBool(), true); + EXPECT_EQ(rs8[1].first->GetBool(), true); + + rc = selector.getValues(*d2, "$..[?(@ <= true)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs9 = selector.getResultSet(); + EXPECT_EQ(rs9.size(), 3); + EXPECT_EQ(rs9[0].first->GetBool(), true); + EXPECT_EQ(rs9[1].first->GetBool(), false); + EXPECT_EQ(rs9[2].first->GetBool(), true); + + rc = selector.getValues(*d2, "$..[?(@>false)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs10 = selector.getResultSet(); + EXPECT_EQ(rs10.size(), 2); + EXPECT_EQ(rs10[0].first->GetBool(), true); + EXPECT_EQ(rs10[1].first->GetBool(), true); + + rc = selector.getValues(*d2, "$..[?(@GetBool(), false); + + dom_free_doc(d1); + dom_free_doc(d2); +} + +TEST_F(SelectorTest, test_filterExpr_expression_part6) { + const char *input = "[{\"NumEntry\":1},{\"NumEntry\":2},{\"NumEntry\":3}," + "{\"NumEntry\":4},{\"NumEntry\":5},{\"NumEntry\":6}]"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + Selector selector; + + rc = selector.getValues(*d1, "$..[?(@.NumEntry>4)].NumEntry"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs0 = selector.getResultSet(); + EXPECT_EQ(rs0.size(), 2); + EXPECT_EQ(rs0[0].first->GetInt(), 5); + EXPECT_EQ(rs0[1].first->GetInt(), 6); + + rc = selector.getValues(*d1, "$..[?(4<@.NumEntry||@.NumEntry<3)].NumEntry"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs1 = selector.getResultSet(); + EXPECT_EQ(rs1.size(), 4); + EXPECT_EQ(rs1[0].first->GetInt(), 5); + EXPECT_EQ(rs1[1].first->GetInt(), 6); + EXPECT_EQ(rs1[2].first->GetInt(), 1); + EXPECT_EQ(rs1[3].first->GetInt(), 2); + + rc = selector.getValues(*d1, "$..NumEntry[?(@>4)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + EXPECT_EQ(rs2[0].first->GetInt(), 5); + EXPECT_EQ(rs2[1].first->GetInt(), 6); + + rc = selector.getValues(*d1, "$..[\"NumEntry\"][?(6>@&&@>3)]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 2); + EXPECT_EQ(rs3[0].first->GetInt(), 4); + EXPECT_EQ(rs3[1].first->GetInt(), 5); + + rc = selector.getValues(*d1, "$..NumEntry[?(@.NumEntry)]"); + EXPECT_EQ(rc, JSONUTIL_INVALID_JSON_PATH); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filterExpr_expression_part7) { + const char *input = "{" + " \"key for key\" : \"key inside here\"," + " \"key$for$key\" : \"key inside here\"," + " \"key'for'key\" : \"key inside here\"," + " \"key\\\"for\\\"key\" : \"key inside here\"," + " \"an object\" : {" + " \"weight\" : 300," + " \"a value\" : 300," + " \"poquo value\" : \"\\\"\"," + " \"my key\" : \"key inside here\"" + " }," + " \"anonther object\" : {" + " \"weight\" : 400," + " \"a value\" : 400," + " \"poquo value\" : \"'\"," + " \"my key\" : \"key inside there\"" + " }" + "}"; + + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..[?(@[\"my key\"]==\"key inside here\")]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$[\"key for key\"]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$..[?(@[\"my key\"]==$[\"key for key\"])]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + + rc = selector.getValues(*d1, "$..[?(@[\"my key\"]==$[\"key$for$key\"])].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + EXPECT_EQ(rs[0].first->GetInt(), 300); + + rc = selector.getValues(*d1, "$..[?(@[\"my key\"]==$[\"key'for'key\"])].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 1); + EXPECT_EQ(rs2[0].first->GetInt(), 300); + + rc = selector.getValues(*d1, "$..[?(@[\"my key\"]==$[\"key\\\"for\\\"key\"])].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 1); + EXPECT_EQ(rs3[0].first->GetInt(), 300); + + rc = selector.getValues(*d1, "$..[?(@[\"my key\"]==$[\"key for key\"])].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs4 = selector.getResultSet(); + EXPECT_EQ(rs4.size(), 1); + EXPECT_EQ(rs4[0].first->GetInt(), 300); + + rc = selector.getValues(*d1, "$..[?($[\"key for key\"]==@[\"my key\"])].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs5 = selector.getResultSet(); + EXPECT_EQ(rs5.size(), 1); + EXPECT_EQ(rs5[0].first->GetInt(), 300); + + rc = selector.getValues(*d1, "$..[?(@[\"poquo value\"]=='\"')].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs6 = selector.getResultSet(); + EXPECT_EQ(rs6.size(), 1); + EXPECT_EQ(rs6[0].first->GetInt(), 300); + + rc = selector.getValues(*d1, "$..[?(@[\"poquo value\"]==\"'\")].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs7 = selector.getResultSet(); + EXPECT_EQ(rs7.size(), 1); + EXPECT_EQ(rs7[0].first->GetInt(), 400); + + rc = selector.getValues(*d1, "$..[?(@[\"poquo value\"]=='\\\'')].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs8 = selector.getResultSet(); + EXPECT_EQ(rs8.size(), 1); + EXPECT_EQ(rs8[0].first->GetInt(), 400); + + rc = selector.getValues(*d1, "$..[?(@[\"my key\"]==$.\"key'for'key\")].weight"); + EXPECT_NE(rc, JSONUTIL_SUCCESS); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filterExpr_single_recursion_array) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, node_accounts, strlen(node_accounts), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[?(@==10)])]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs0 = selector.getResultSet(); + EXPECT_EQ(rs0.size(), 1); + EXPECT_STREQ(rs0[0].first->GetString(), "10+11+12"); + + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[?(2==@)])]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs1 = selector.getResultSet(); + EXPECT_EQ(rs1.size(), 1); + EXPECT_STREQ(rs1[0].first->GetString(), "1+2+3"); + + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[?(100>=@)])]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + EXPECT_STREQ(rs2[0].first->GetString(), "1+2+3"); + EXPECT_STREQ(rs2[1].first->GetString(), "10+11+12"); + + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[?(100<=@)])]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 0); + + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[?(@<10)])]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs4 = selector.getResultSet(); + EXPECT_EQ(rs4.size(), 1); + EXPECT_STREQ(rs4[0].first->GetString(), "1+2+3"); + + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[?(@<11)])]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs5 = selector.getResultSet(); + EXPECT_EQ(rs5.size(), 2); + EXPECT_STREQ(rs5[0].first->GetString(), "1+2+3"); + EXPECT_STREQ(rs5[1].first->GetString(), "10+11+12"); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filertExpr_array_index_single_recursion) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, node_accounts, strlen(node_accounts), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[0]==10)]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs0 = selector.getResultSet(); + EXPECT_EQ(rs0.size(), 1); + EXPECT_STREQ(rs0[0].first->GetString(), "10+11+12"); + + rc = selector.getValues(*d1, "$..nodeData[?(2==@.selfAndChildNodeIds[1])]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs1 = selector.getResultSet(); + EXPECT_EQ(rs1.size(), 1); + EXPECT_STREQ(rs1[0].first->GetString(), "1+2+3"); + + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[0]>0)]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + EXPECT_STREQ(rs2[0].first->GetString(), "1+2+3"); + EXPECT_STREQ(rs2[1].first->GetString(), "10+11+12"); + + rc = selector.getValues(*d1, "$..nodeData[?(-5>=@.selfAndChildNodeIds[0])]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 0); + + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[-1]==3)]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs4 = selector.getResultSet(); + EXPECT_EQ(rs4.size(), 1); + EXPECT_STREQ(rs4[0].first->GetString(), "1+2+3"); + + rc = selector.getValues(*d1, "$..nodeData[?(@.selfAndChildNodeIds[2]!=17)]..hidden"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs5 = selector.getResultSet(); + EXPECT_EQ(rs5.size(), 2); + EXPECT_STREQ(rs5[0].first->GetString(), "1+2+3"); + EXPECT_STREQ(rs5[1].first->GetString(), "10+11+12"); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_recursiveDescent_get_part1) { + const char *input = "{\"x\": {}, \"y\": {\"a\":\"a\"}, \"z\": {\"a\":\"\", \"b\":\"b\"}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..a"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + EXPECT_STREQ(rs[0].first->GetString(), "a"); + EXPECT_STREQ(rs[1].first->GetString(), ""); + + input = "{\"a\":{\"b\":{\"z\":{\"y\":1}}, \"c\":{\"z\":{\"y\":2}}, \"z\":{\"y\":3}}}"; + JDocument *d2; + rc = dom_parse(nullptr, input, strlen(input), &d2); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d2, "$.a..z.y"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 3); + EXPECT_EQ(rs2[0].first->GetInt(), 3); + EXPECT_EQ(rs2[1].first->GetInt(), 1); + EXPECT_EQ(rs2[2].first->GetInt(), 2); + + rc = selector.getValues(*d1, "$...a"); + EXPECT_EQ(rc, JSONUTIL_INVALID_DOT_SEQUENCE); + + // note explicit check for odd number of dots + rc = selector.getValues(*d2, "$.a...z.y"); + EXPECT_EQ(rc, JSONUTIL_INVALID_DOT_SEQUENCE); + + // note explicit check for even number of dots + rc = selector.getValues(*d2, "$.a.z....y"); + EXPECT_EQ(rc, JSONUTIL_INVALID_DOT_SEQUENCE); + + rc = selector.getValues(*d1, "$........a"); + EXPECT_EQ(rc, JSONUTIL_INVALID_DOT_SEQUENCE); + + rc = selector.getValues(*d2, "$.a........z.y"); + EXPECT_EQ(rc, JSONUTIL_INVALID_DOT_SEQUENCE); + + dom_free_doc(d1); + dom_free_doc(d2); +} + +TEST_F(SelectorTest, test_recursiveDescent_get_part2) { + const char *input = "{\"a\":1, \"b\": {\"e\":[0,1,2]}, \"c\":{\"e\":[10,11,12]}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..e.[*]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 6); + EXPECT_EQ(rs[0].first->GetInt(), 0); + EXPECT_EQ(rs[1].first->GetInt(), 1); + EXPECT_EQ(rs[2].first->GetInt(), 2); + EXPECT_EQ(rs[3].first->GetInt(), 10); + EXPECT_EQ(rs[4].first->GetInt(), 11); + EXPECT_EQ(rs[5].first->GetInt(), 12); + + rc = selector.getValues(*d1, "$..e.[1]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + EXPECT_EQ(rs2[0].first->GetInt(), 1); + EXPECT_EQ(rs2[1].first->GetInt(), 11); + + rc = selector.getValues(*d1, "$..e.[0:2]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 4); + EXPECT_EQ(rs3[0].first->GetInt(), 0); + EXPECT_EQ(rs3[1].first->GetInt(), 1); + EXPECT_EQ(rs3[2].first->GetInt(), 10); + EXPECT_EQ(rs3[3].first->GetInt(), 11); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_recursiveDescent_get_part3) { + const char *input = "{\"a\":1, \"b\": {\"e\":[0,1,2]}, \"c\":{\"e\":[10,11,12]}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..e[*]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 6); + EXPECT_EQ(rs[0].first->GetInt(), 0); + EXPECT_EQ(rs[1].first->GetInt(), 1); + EXPECT_EQ(rs[2].first->GetInt(), 2); + EXPECT_EQ(rs[3].first->GetInt(), 10); + EXPECT_EQ(rs[4].first->GetInt(), 11); + EXPECT_EQ(rs[5].first->GetInt(), 12); + + rc = selector.getValues(*d1, "$..e[1]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + EXPECT_EQ(rs2[0].first->GetInt(), 1); + EXPECT_EQ(rs2[1].first->GetInt(), 11); + + rc = selector.getValues(*d1, "$..e[0:2]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 4); + EXPECT_EQ(rs3[0].first->GetInt(), 0); + EXPECT_EQ(rs3[1].first->GetInt(), 1); + EXPECT_EQ(rs3[2].first->GetInt(), 10); + EXPECT_EQ(rs3[3].first->GetInt(), 11); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_recursiveDescent_get_part4) { + const char *input = "{\"a\":1, \"b\": {\"e\":[0,1,2]}, \"c\":{\"e\":[10,11,12]}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..[\"e\"][*]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 6); + EXPECT_EQ(rs[0].first->GetInt(), 0); + EXPECT_EQ(rs[1].first->GetInt(), 1); + EXPECT_EQ(rs[2].first->GetInt(), 2); + EXPECT_EQ(rs[3].first->GetInt(), 10); + EXPECT_EQ(rs[4].first->GetInt(), 11); + EXPECT_EQ(rs[5].first->GetInt(), 12); + + rc = selector.getValues(*d1, "$..[\"e\"][1]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + EXPECT_EQ(rs2[0].first->GetInt(), 1); + EXPECT_EQ(rs2[1].first->GetInt(), 11); + + rc = selector.getValues(*d1, "$..[\"e\"][0:2]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 4); + EXPECT_EQ(rs3[0].first->GetInt(), 0); + EXPECT_EQ(rs3[1].first->GetInt(), 1); + EXPECT_EQ(rs3[2].first->GetInt(), 10); + EXPECT_EQ(rs3[3].first->GetInt(), 11); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_recursiveDescent_get_part5) { + const char *input = "{\"a\":{\"a\":{\"a\":{\"a\":1}}}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..a"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 4); + + rapidjson::StringBuffer sb; + dom_serialize_value(*rs[0].first, nullptr, sb); + EXPECT_STREQ(sb.GetString(), "{\"a\":{\"a\":{\"a\":1}}}"); + sb.Clear(); + dom_serialize_value(*rs[1].first, nullptr, sb); + EXPECT_STREQ(sb.GetString(), "{\"a\":{\"a\":1}}"); + sb.Clear(); + dom_serialize_value(*rs[2].first, nullptr, sb); + EXPECT_STREQ(sb.GetString(), "{\"a\":1}"); + sb.Clear(); + dom_serialize_value(*rs[3].first, nullptr, sb); + EXPECT_STREQ(sb.GetString(), "1"); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_recursiveInsertUpdateDelete) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 5); + + // recursive insert and update + rc = dom_set_value(nullptr, d1, "$..title", "\"foo\"", false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + rc = selector.getValues(*d1, "$..title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 5); + rc = selector.getValues(*d1, "$.title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + rc = selector.getValues(*d1, "$.store.title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + EXPECT_STREQ(selector.getResultSet()[0].first->GetString(), "foo"); + rc = selector.getValues(*d1, "$.store.books[1].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + rc = selector.getValues(*d1, "$.store.books[1].movies[0].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + rc = selector.getValues(*d1, "$.store.books[1].movies[0].realisator.title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + rc = selector.getValues(*d1, "$.store.books[1].movies[0].title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_STREQ(selector.getResultSet()[0].first->GetString(), "foo"); + + // recursive delete + size_t num_vals_deleted; + rc = dom_delete_value(d1, "$..title", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 5); + rc = selector.getValues(*d1, "$..title"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(selector.getResultSet().empty()); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_recursiveInsertUpdateDelete2) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..category"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 4); + + // recursive insert and update + rc = dom_set_value(nullptr, d1, "$..category", "\"foo\"", false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + rc = selector.getValues(*d1, "$..category"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 4); + rc = selector.getValues(*d1, "$.category"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + rc = selector.getValues(*d1, "$.store.category"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + EXPECT_STREQ(selector.getResultSet()[0].first->GetString(), "foo"); + rc = selector.getValues(*d1, "$.store.books[1].category"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_STREQ(selector.getResultSet()[0].first->GetString(), "foo"); + + // recursive delete + size_t num_vals_deleted; + rc = dom_delete_value(d1, "$..category", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 4); + rc = selector.getValues(*d1, "$..category"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(selector.getResultSet().empty()); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_recursiveInsertUpdateDelete3) { + const char *input = "{\"a\":1, \"b\": {\"e\":[0,1,2]}, \"c\":{\"e\":[10,11,12]}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$..e[*]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 6); + EXPECT_EQ(rs[0].first->GetInt(), 0); + EXPECT_EQ(rs[1].first->GetInt(), 1); + EXPECT_EQ(rs[2].first->GetInt(), 2); + EXPECT_EQ(rs[3].first->GetInt(), 10); + EXPECT_EQ(rs[4].first->GetInt(), 11); + EXPECT_EQ(rs[5].first->GetInt(), 12); + + // recursive insert and update + rc = dom_set_value(nullptr, d1, "$..e[*]", "4", false, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + rc = selector.getValues(*d1, "$..e[*]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(rs.size(), 6); + EXPECT_EQ(rs[0].first->GetInt(), 4); + EXPECT_EQ(rs[1].first->GetInt(), 4); + EXPECT_EQ(rs[2].first->GetInt(), 4); + EXPECT_EQ(rs[3].first->GetInt(), 4); + EXPECT_EQ(rs[4].first->GetInt(), 4); + EXPECT_EQ(rs[5].first->GetInt(), 4); + rc = selector.getValues(*d1, "$.e"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + rc = selector.getValues(*d1, "$.input.e"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + + // recursive delete + size_t num_vals_deleted; + rc = dom_delete_value(d1, "$..e", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 2); + rc = selector.getValues(*d1, "$..e"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_TRUE(selector.getResultSet().empty()); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_deep_recursive_update) { + const char *input = "{\"a\":{\"a\":{\"a\":{\"b\":0}}}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + + rc = selector.getValues(*d1, "$..b"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_EQ(selector.getResultSet()[0].first->GetInt(), 0); + + rc = dom_set_value(nullptr, d1, "$..b", "1"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d1, "$..b"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_EQ(selector.getResultSet()[0].first->GetInt(), 1); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_deep_recursive_update2) { + const char *input = "{\"a\":{\"a\":{\"a\":{\"a\":{\"z\":\"Z\"}}}}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = dom_set_value(nullptr, d1, "$..a", "\"R\""); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d1, "$"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + rapidjson::StringBuffer sb; + dom_serialize_value(*rs[0].first, nullptr, sb); + EXPECT_STREQ(sb.GetString(), "{\"a\":\"R\"}"); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_deep_recursive_update3) { + const char *input = "{\"a\":{\"a\":{\"b\":{\"a\":{\"z\":\"Z\"}}}}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = dom_set_value(nullptr, d1, "$..a", "\"R\""); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d1, "$"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + rapidjson::StringBuffer sb; + dom_serialize_value(*rs[0].first, nullptr, sb); + EXPECT_STREQ(sb.GetString(), "{\"a\":\"R\"}"); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_deep_recursive_update4) { + const char *input = "{\"b\":{\"a\":{\"a\":{\"a\":{\"z\":\"Z\"}}}}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = dom_set_value(nullptr, d1, "$..a", "\"R\""); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d1, "$"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + rapidjson::StringBuffer sb; + dom_serialize_value(*rs[0].first, nullptr, sb); + EXPECT_STREQ(sb.GetString(), "{\"b\":{\"a\":\"R\"}}"); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filter_on_object) { + const char *input = "{\"an object\" : {\n" + " \"weight\" : 300,\n" + " \"a value\" : 300,\n" + " \"my key\" : \"key inside here\"\n" + "}}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.[\"an object\"].[?(@.weight > 200)].[\"a value\"]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 1); + EXPECT_EQ(selector.getResultSet()[0].first->GetInt(), 300); + + rc = selector.getValues(*d1, "$.[\"an object\"].[?(@.weight > 300)].[\"a value\"]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(selector.getResultSet().size(), 0); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_filter_string_comparison) { + const char *input = "{\"objects\": [" + " {" + " \"weight\" : 100," + " \"a value\" : 100," + " \"my key\" : \"key inside here\"" + " }," + " {" + " \"weight\" : 200," + " \"a value\" : 200," + " \"my key\" : \"key inside there\"" + " }," + " {" + " \"weight\" : 300," + " \"a value\" : 300," + " \"my key\" : \"key inside here\"" + " }," + " {" + " \"weight\" : 400," + " \"a value\" : 400," + " \"my key\" : \"key inside there\"" + " }" + "]}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.[\"objects\"].[?(@.[\"my key\"] == \"key inside there\")].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + EXPECT_EQ(rs[0].first->GetInt(), 200); + EXPECT_EQ(rs[1].first->GetInt(), 400); + + rc = selector.getValues(*d1, "$.[ \"objects\" ].[?(@.[ \"my key\" ] < \"key inside herf\")].weight"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 2); + EXPECT_EQ(rs2[0].first->GetInt(), 100); + EXPECT_EQ(rs2[1].first->GetInt(), 300); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_union_member_names) { + const char *input = "{\"a\":1, \"b\": 2, \"c\":3}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.[\"a\",\"b\",\"c\"]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 3); + EXPECT_EQ(rs[0].first->GetInt(), 1); + EXPECT_EQ(rs[1].first->GetInt(), 2); + EXPECT_EQ(rs[2].first->GetInt(), 3); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_malformed_jsonpath) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$[0:2]$[0:1]$[0:2]$[0:2]$[0<2065>:2]$[0:2]"); + EXPECT_EQ(rc, JSONUTIL_JSON_ELEMENT_NOT_ARRAY); + EXPECT_EQ(selector.getResultSet().size(), 0); + + rc = selector.getValues(*d1, ".[0:2].[0:1].[0:2].[0:2].[0<2065>:2].[0:2]"); + EXPECT_EQ(rc, JSONUTIL_JSON_ELEMENT_NOT_ARRAY); + EXPECT_EQ(selector.getResultSet().size(), 0); + + rc = selector.getValues(*d1, "$[0,1]"); + EXPECT_EQ(rc, JSONUTIL_JSON_ELEMENT_NOT_ARRAY); + EXPECT_EQ(selector.getResultSet().size(), 0); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_escaped_controlCharacters) { + // escaped backslashes, quotes and control characters + const char *input = "{\"a\\\\a\":1, \"b\\tb\":2, \"c\\nc\":3, \"d\\rd\":4, \"e\\be\":5," + " \"f\\\"f\": 6, \"g g\": 7, \"\": 8, \"\'\":9}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.[\"a\\\\a\",\"b\\tb\",\"c\\nc\",\"d\\rd\"," + "\"e\\be\",\"f\\\"f\",\"g g\",\"\",\"\'\"]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 9); + EXPECT_EQ(rs[0].first->GetInt(), 1); + EXPECT_EQ(rs[1].first->GetInt(), 2); + EXPECT_EQ(rs[2].first->GetInt(), 3); + EXPECT_EQ(rs[3].first->GetInt(), 4); + EXPECT_EQ(rs[4].first->GetInt(), 5); + EXPECT_EQ(rs[5].first->GetInt(), 6); + EXPECT_EQ(rs[6].first->GetInt(), 7); + EXPECT_EQ(rs[7].first->GetInt(), 8); + EXPECT_EQ(rs[8].first->GetInt(), 9); + + input = "{\"value_1\": {\"value\" : 10, \"key\": \"linebreak\\n\"}, \"value_2\" : " + "{\"value\" : 20, \"key\" : \"nolinebreak\"}}"; + JDocument *d2; + rc = dom_parse(nullptr, input, strlen(input), &d2); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + rc = selector.getValues(*d2, "$..[?(@.key==\"nolinebreak\")].value"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs2 = selector.getResultSet(); + EXPECT_EQ(rs2.size(), 1); + EXPECT_EQ(rs2[0].first->GetInt(), 20); + + rc = selector.getValues(*d2, "$..[?(@.key=='nolinebreak')].value"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs3 = selector.getResultSet(); + EXPECT_EQ(rs3.size(), 1); + EXPECT_EQ(rs3[0].first->GetInt(), 20); + + rc = selector.getValues(*d2, "$..[?(@.key==\"linebreak\n\")].value"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs4 = selector.getResultSet(); + EXPECT_EQ(rs4.size(), 0); + + rc = selector.getValues(*d2, "$..[?(@.key=='linebreak\n')].value"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs5 = selector.getResultSet(); + EXPECT_EQ(rs5.size(), 0); + + rc = selector.getValues(*d2, "$..[?(@.key==\"linebreak\\n\")].value"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs6 = selector.getResultSet(); + EXPECT_EQ(rs6.size(), 1); + EXPECT_EQ(rs6[0].first->GetInt(), 10); + + rc = selector.getValues(*d2, "$..[?(@.key=='linebreak\\n')].value"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs7 = selector.getResultSet(); + EXPECT_EQ(rs7.size(), 1); + EXPECT_EQ(rs7[0].first->GetInt(), 10); + + dom_free_doc(d2); + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_escaped_unicode) { + // escaped unicode + const char *input = "{\"key\\u0000\":\"value\\\\u0000\", \"key\\u001F\":\"value\\\\u001F\"}"; + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, input, strlen(input), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "$.[\"key\\u0000\",\"key\\u001F\"]"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto &rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 2); + EXPECT_STREQ(rs[0].first->GetString(), "value\\u0000"); + EXPECT_STREQ(rs[1].first->GetString(), "value\\u001F"); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_malformed_query) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + Selector selector; + rc = selector.getValues(*d1, "&&$.store..price"); + EXPECT_EQ(rc, JSONUTIL_INVALID_MEMBER_NAME); + EXPECT_TRUE(selector.getResultSet().empty()); + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_delete) { + JDocument *d1; + JsonUtilCode rc = dom_parse(nullptr, store, strlen(store), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // delete + size_t num_vals_deleted; + rc = dom_delete_value(d1, "$.store.books[?(@.category==\"fiction\")]", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 3); + + dom_free_doc(d1); +} + +TEST_F(SelectorTest, test_delete_insert) { + JDocument *d1; + const char *json = "{\"a\": { \"b\": { \"c1\": \"abc\", \"c2\": \"foo bar\", \"c3\": \"just a test\" }}}"; + JsonUtilCode rc = dom_parse(nullptr, json, strlen(json), &d1); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // delete + size_t num_vals_deleted; + rc = dom_delete_value(d1, "$[\"a\"][\"b\"][\"c2\"]", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 1); + + // insert + rc = dom_set_value(nullptr, d1, "$[\"a\"][\"b\"][\"c4\"]", "\"good morning\""); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // get + Selector selector; + rc = selector.getValues(*d1, "$.a.b.*"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs1 = selector.getResultSet(); + EXPECT_EQ(rs1.size(), 3); + + // delete + rc = dom_delete_value(d1, "$[\"a\"][\"b\"][*]", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 3); + + for (int i=0; i < 10; i++) { + // insert + rc = dom_set_value(nullptr, d1, "$[\"a\"][\"b\"][\"c\"]", "\"good afternoon\""); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + + // delete + rc = dom_delete_value(d1, "$[\"a\"][\"b\"][*]", num_vals_deleted); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(num_vals_deleted, 1); + } + + // get + rc = selector.getValues(*d1, "$.a.b"); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + auto& rs = selector.getResultSet(); + EXPECT_EQ(rs.size(), 1); + EXPECT_EQ(rs[0].first->MemberCount(), 0); + + ReplyBuffer oss; + rc = dom_get_value_as_str(d1, "$", nullptr, oss, false); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_STREQ(GetString(&oss), "[{\"a\":{\"b\":{}}}]"); + + dom_free_doc(d1); +} diff --git a/tst/unit/stats_test.cc b/tst/unit/stats_test.cc new file mode 100644 index 0000000..c36b5e6 --- /dev/null +++ b/tst/unit/stats_test.cc @@ -0,0 +1,32 @@ +#include +#include "json/stats.h" + +class StatsTest : public ::testing::Test { +}; + +TEST_F(StatsTest, testFindBucket) { + EXPECT_EQ(jsonstats_find_bucket(0), 0); + EXPECT_EQ(jsonstats_find_bucket(200), 0); + EXPECT_EQ(jsonstats_find_bucket(256), 1); + EXPECT_EQ(jsonstats_find_bucket(500), 1); + EXPECT_EQ(jsonstats_find_bucket(1024), 2); + EXPECT_EQ(jsonstats_find_bucket(2000), 2); + EXPECT_EQ(jsonstats_find_bucket(4*1024), 3); + EXPECT_EQ(jsonstats_find_bucket(5000), 3); + EXPECT_EQ(jsonstats_find_bucket(16*1024), 4); + EXPECT_EQ(jsonstats_find_bucket(50000), 4); + EXPECT_EQ(jsonstats_find_bucket(64*1024), 5); + EXPECT_EQ(jsonstats_find_bucket(100000), 5); + EXPECT_EQ(jsonstats_find_bucket(256*1024), 6); + EXPECT_EQ(jsonstats_find_bucket(1000000), 6); + EXPECT_EQ(jsonstats_find_bucket(1024*1024), 7); + EXPECT_EQ(jsonstats_find_bucket(4000000), 7); + EXPECT_EQ(jsonstats_find_bucket(4*1024*1024), 8); + EXPECT_EQ(jsonstats_find_bucket(5000000), 8); + EXPECT_EQ(jsonstats_find_bucket(16*1024*1024), 9); + EXPECT_EQ(jsonstats_find_bucket(20000000), 9); + EXPECT_EQ(jsonstats_find_bucket(60*1024*1024), 9); + EXPECT_EQ(jsonstats_find_bucket(64*1024*1024), 10); + EXPECT_EQ(jsonstats_find_bucket(90000000), 10); + EXPECT_EQ(jsonstats_find_bucket(1024*1024*1024), 10); +} diff --git a/tst/unit/traps_test.cc b/tst/unit/traps_test.cc new file mode 100644 index 0000000..4c30bc6 --- /dev/null +++ b/tst/unit/traps_test.cc @@ -0,0 +1,180 @@ +#undef NDEBUG +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "json/alloc.h" +#include "json/dom.h" +#include "json/stats.h" +#include "json/selector.h" +#include "module_sim.h" + +extern size_t hash_function(const char *, size_t); + +/* Since unit tests run outside of Valkey server, we need to map Valkey' + * memory management functions to cstdlib functions. */ +static void SetupAllocFuncs(size_t numShards) { + setupValkeyModulePointers(); + // + // Now setup the KeyTable, the RapidJson library now depends on it + // + KeyTable::Config c; + c.malloc = memory_alloc; + c.free = memory_free; + c.hash = hash_function; + c.numShards = numShards; + keyTable = new KeyTable(c); +} + +class TrapsTest : public ::testing::Test { + protected: + void SetUp() override { + JsonUtilCode rc = jsonstats_init(); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + SetupAllocFuncs(16); + } + + void TearDown() override { + delete keyTable; + keyTable = nullptr; + } +}; + +// +// See if we can startup and shutdown with no failures +// +TEST_F(TrapsTest, sanity) { + void *ptr = dom_alloc(15); + dom_free(ptr); +} + +enum JTYPE { + JT_BOOLEAN, + JT_INTEGER, + JT_SHORT_STRING, + JT_LONG_STRING, + JT_SHORT_DOUBLE, + JT_LONG_DOUBLE, + JT_ARRAY, + JT_OBJECT, + JT_OBJECT_HT, + JT_NUM_TYPES +}; + +static void makeValue(JValue *v, JTYPE jt) { + std::string json; + switch (jt) { + case JT_BOOLEAN: + json = "true"; + break; + case JT_INTEGER: + json = "1"; + break; + case JT_SHORT_STRING: + json = "\"short\""; + break; + case JT_LONG_STRING: + json = "\"string of length large\""; + break; + case JT_SHORT_DOUBLE: + json = "1.2"; + break; + case JT_LONG_DOUBLE: + json = "1.23456789101112"; + break; + case JT_ARRAY: + json = "[1,2,3,4,5]"; + break; + case JT_OBJECT: + json = "{\"a\":1}"; + break; + case JT_OBJECT_HT: + json = "{"; + for (auto s = 0; s < 1000; ++s) { + if (s != 0) json += ','; + json += '\"'; + json += std::to_string(s); + json += "\":1"; + } + json += '}'; + break; + default: + ASSERT_TRUE(0); + } + JParser parser; + *v = parser.Parse(json.c_str(), json.length()).GetJValue(); +} + +// +// Test that keys properly honor corruption +// +TEST_F(TrapsTest, handle_corruption) { + for (auto corruption : {CORRUPT_PREFIX, CORRUPT_LENGTH, CORRUPT_SUFFIX}) { + for (auto jt : {JT_OBJECT, JT_OBJECT_HT}) { + JValue *v = new JValue; + makeValue(v, jt); + auto first = v->MemberBegin(); + auto trap_pointer = &*(first->name); + memory_corrupt_memory(trap_pointer, corruption); + // + // Serialize this object + // + rapidjson::StringBuffer oss; + ASSERT_EXIT(dom_serialize_value(*v, nullptr, oss), testing::ExitedWithCode(1), "Validation Failure"); + // + // Destruct it + // + ASSERT_EXIT(delete v, testing::ExitedWithCode(1), "Validation Failure"); + // + // Cleanup + // + memory_uncorrupt_memory(trap_pointer, corruption); + delete v; + } + } +} + +// +// Test out the JValue validate and dump functions +// +TEST_F(TrapsTest, jvalue_validation) { + std::string json = + "{ \"a\":1, \"b\":[1,2,\"this is a long string\",\"shortstr\",false,true,1.0,1.23456789012345,null]}"; + JParser parser; + JValue *v = new JValue; + *v = parser.Parse(json.c_str(), json.length()).GetJValue(); + std::ostringstream os; + DumpRedactedJValue(os, *v); + std::cerr << os.str() << "\n"; + delete v; +} + +// +// Test Log Stream +// +TEST_F(TrapsTest, test_log_stream) { + JValue v, v0; + v.SetArray(); + v.PushBack(v0, allocator); + DumpRedactedJValue(v, nullptr, "level"); + std::string log = test_getLogText(); + std::cerr << log; +} diff --git a/tst/unit/util_test.cc b/tst/unit/util_test.cc new file mode 100644 index 0000000..c905e30 --- /dev/null +++ b/tst/unit/util_test.cc @@ -0,0 +1,213 @@ +#include +#include +#include +#include +#include +#include "json/util.h" +#include "json/dom.h" +#include "json/alloc.h" +#include "json/stats.h" +#include "module_sim.h" + +extern size_t dummy_malloc_size(void *); + +class UtilTest : public ::testing::Test { + protected: + void SetUp() override { + JsonUtilCode rc = jsonstats_init(); + ASSERT_EQ(rc, JSONUTIL_SUCCESS); + setupValkeyModulePointers(); + } +}; + +TEST_F(UtilTest, testCodeToMessage) { + for (JsonUtilCode code=JSONUTIL_SUCCESS; code < JSONUTIL_LAST; code = JsonUtilCode(code + 1)) { + const char *msg = jsonutil_code_to_message(code); + EXPECT_TRUE(msg != nullptr); + if (code == JSONUTIL_SUCCESS || code == JSONUTIL_WRONG_NUM_ARGS || + code == JSONUTIL_NX_XX_CONDITION_NOT_SATISFIED) { + EXPECT_STREQ(msg, ""); + } else { + EXPECT_GT(strlen(msg), 0); + } + } +} + +TEST_F(UtilTest, testDoubleToString) { + double v = 189.31; + char buf[BUF_SIZE_DOUBLE_JSON]; + size_t len = jsonutil_double_to_string(v, buf, sizeof(buf)); + EXPECT_STREQ(buf, "189.31"); + EXPECT_EQ(len, strlen(buf)); +} + +TEST_F(UtilTest, testDoubleToStringRapidJson) { + double v = 189.31; + char buf[BUF_SIZE_DOUBLE_RAPID_JSON]; + size_t len = jsonutil_double_to_string_rapidjson(v, buf, sizeof(buf)); + EXPECT_STREQ(buf, "189.31"); + EXPECT_EQ(len, strlen(buf)); +} + +TEST_F(UtilTest, testIsInt64) { + EXPECT_TRUE(jsonutil_is_int64(0)); + EXPECT_TRUE(jsonutil_is_int64(1)); + EXPECT_TRUE(jsonutil_is_int64(INT8_MAX)); + EXPECT_TRUE(jsonutil_is_int64(INT8_MIN)); + EXPECT_TRUE(jsonutil_is_int64(INT16_MAX)); + EXPECT_TRUE(jsonutil_is_int64(INT16_MIN)); + EXPECT_TRUE(jsonutil_is_int64(INT32_MAX)); + EXPECT_TRUE(jsonutil_is_int64(INT32_MIN)); + EXPECT_TRUE(jsonutil_is_int64(INT64_MAX >> 1)); + EXPECT_TRUE(jsonutil_is_int64(8223372036854775807LL)); + EXPECT_TRUE(jsonutil_is_int64(INT64_MIN)); + EXPECT_FALSE(jsonutil_is_int64(1e28)); // out of range of int64 + EXPECT_FALSE(jsonutil_is_int64(1.7e308)); // out of range of int64 + EXPECT_FALSE(jsonutil_is_int64(-1e28)); // out of range of int64 + EXPECT_FALSE(jsonutil_is_int64(-1.7e308)); // out of range of int64 + EXPECT_TRUE(jsonutil_is_int64(108.0)); + EXPECT_FALSE(jsonutil_is_int64(108.9)); + EXPECT_FALSE(jsonutil_is_int64(108.0000001)); + EXPECT_TRUE(jsonutil_is_int64(-108.0)); + EXPECT_FALSE(jsonutil_is_int64(-108.9)); + EXPECT_FALSE(jsonutil_is_int64(-108.0000001)); +} + +TEST_F(UtilTest, testMultiplyInt64_overflow) { + // should not overflow + int64_t res; + JsonUtilCode rc = jsonutil_multiply_int64(INT64_MAX, 1, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, INT64_MAX); + + // should overflow + rc = jsonutil_multiply_int64(INT64_MAX, 2, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + rc = jsonutil_multiply_int64(INT64_MAX, INT64_MAX >> 1, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + rc = jsonutil_multiply_int64(INT64_MAX, INT64_MAX, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); +} + +TEST_F(UtilTest, testMultiplyInt64_overflow_negative) { + // should not overflow + int64_t res; + JsonUtilCode rc = jsonutil_multiply_int64(INT64_MIN, 1, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, INT64_MIN); + + // should overflow + rc = jsonutil_multiply_int64(INT64_MIN, 2, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + rc = jsonutil_multiply_int64(INT64_MIN, INT64_MIN >> 1, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + rc = jsonutil_multiply_int64(INT64_MIN, INT64_MAX, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + rc = jsonutil_multiply_int64(INT64_MIN, INT64_MIN, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); +} + +TEST_F(UtilTest, testMultiplyDouble) { + double res; + JsonUtilCode rc = jsonutil_multiply_double(5e30, 2, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, 1e31); + + rc = jsonutil_multiply_double(5.0e30, 2.0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, 1.0e31); +} + +TEST_F(UtilTest, testMultiplyDouble_overflow) { + // should not overflow + double res; + JsonUtilCode rc = jsonutil_multiply_double(1.7e308, 1.0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, 1.7e308); + + // should overflow + rc = jsonutil_multiply_double(1.7e308, 2.0, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + rc = jsonutil_multiply_double(1.7e308, 1.7e308, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); +} + +TEST_F(UtilTest, testMultiplyDouble_overflow_negative) { + // should not overflow + double res; + JsonUtilCode rc = jsonutil_multiply_double(-1.7e308, 1.0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, -1.7e308); + + // should overflow + rc = jsonutil_multiply_double(-1.7e308, 2.0, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); + rc = jsonutil_multiply_double(-1.7e308, 1.7e308, &res); + EXPECT_EQ(rc, JSONUTIL_MULTIPLICATION_OVERFLOW); +} + +TEST_F(UtilTest, testAddInt64_overflow) { + // should not overflow + int64_t res; + JsonUtilCode rc = jsonutil_add_int64(INT64_MAX, 0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, INT64_MAX); + + // should overflow + rc = jsonutil_add_int64(INT64_MAX, 1, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + rc = jsonutil_add_int64(INT64_MAX, INT64_MAX >> 1, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + rc = jsonutil_add_int64(INT64_MAX, INT64_MAX, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); +} + +TEST_F(UtilTest, testAddInt64_overflow_negative) { + // should not overflow + int64_t res; + JsonUtilCode rc = jsonutil_add_int64(INT64_MIN, 0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, INT64_MIN); + + // should overflow + rc = jsonutil_add_int64(INT64_MIN, -1, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + rc = jsonutil_add_int64(INT64_MIN, INT64_MIN >> 1, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + rc = jsonutil_add_int64(INT64_MIN, INT64_MIN, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); +} + +TEST_F(UtilTest, testAddDouble_overflow) { + // should not overflow + double res; + JsonUtilCode rc = jsonutil_add_double(1.7e308, 0.0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, 1.7e308); + rc = jsonutil_add_double(1.7e308, 1.0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, 1.7e308); + + // should overflow + rc = jsonutil_add_double(1.7e308, 0.85e308, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + rc = jsonutil_add_double(1.7e308, 1.7e308, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); +} + +TEST_F(UtilTest, testAddDouble_overflow_negative) { + // should not overflow + double res; + JsonUtilCode rc = jsonutil_add_double(-1.7e308, 0.0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, -1.7e308); + rc = jsonutil_add_double(-1.7e308, -1.0, &res); + EXPECT_EQ(rc, JSONUTIL_SUCCESS); + EXPECT_EQ(res, -1.7e308); + + // should overflow + rc = jsonutil_add_double(-1.7e308, -0.85e308, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); + rc = jsonutil_add_double(-1.7e308, -1.7e308, &res); + EXPECT_EQ(rc, JSONUTIL_ADDITION_OVERFLOW); +} From 0c6f0da0691464e55c7a797cf94190eb8d9162b3 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Mon, 18 Nov 2024 23:43:04 +0000 Subject: [PATCH 2/8] Update readme files. Signed-off-by: Roshan Khatri --- README.md | 81 +++++++++++++++++++++++++++++++-------- tst/integration/README.md | 10 +++++ 2 files changed, 75 insertions(+), 16 deletions(-) create mode 100644 tst/integration/README.md diff --git a/README.md b/README.md index 858b7f9..dd37084 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,26 @@ # ValkeyJSON -ValkeyJSON introduces a native JSON data type to Valkey open source. -JSON data interchange standard. With this feature, users can store, query, and modify JSON data structures in Valkey using a comprehensive JSONPath query language. The feature will be compatible with the API and RDB formats supported by Valkey +ValkeyJSON is a C++ Valkey-Module that provides native JSON (JavaScript Object Notation) support for Valkey. The implementation complies with RFC7159 and ECMA-404 JSON data interchange standards. Users can natively store, query, and modify JSON data structures using the JSONPath query language. The query expressions support advanced capabilities including wildcard selections, filter expressions, array slices, union operations, and recursive searches. -## Pre-requisite: -Python - 3.9 -Pytest - 4 +ValkeyJSON leverages [RapidJSON](https://rapidjson.org/), a high-performance JSON parser and generator for C++, chosen for its small footprint and exceptional performance and memory efficiency. As a header-only library with no external dependencies, RapidJSON provides robust Unicode support while maintaining a compact memory profile of just 16 bytes per JSON value on most 32/64-bit machines. -## Building ValkeyJSON module and run tests. +## Motivation +While Valkey core lacks native JSON support, there's significant community demand for JSON capabilities. ValkeyJSON provides a comprehensive open-source solution with extensive JSON manipulation features. -To build the module and the tests +## Building and Testing + +#### To build the module and run tests ```text +# Builds the valkey-server (unstable) for integration testing. +SERVER_VERSION=unstable ./build.sh -``` -## Building ValkeyJSON module only. +# Builds the valkey-server (8.0.0) for integration testing. +SERVER_VERSION=8.0.0 +./build.sh +``` -To build just the module +#### To build just the module ```text mdkir build cd build @@ -24,17 +28,62 @@ cmake .. -DVALKEY_VERSION=unstable make ``` -## Unit Tests - -To run all unit tests: +#### To run all unit tests: ```text cd build make -j unit ``` -## Integration Tests - -To run all integration tests: +#### To run all integration tests: ```text make -j test ``` + +## Load the Module +To test the module with a Valkey, you can load the module using any of the following ways: + +#### Using valkey.conf: +``` +1. Add the following to valkey.conf: + loadmodule /path/to/libjson.so +2. Start valkey-server: + valkey-server /path/to/valkey.conf +``` + +#### Starting valkey with --loadmodule option: +```text +valkey-server --loadmodule /path/to/libjson.so +``` + +#### Using Valkey command MODULE LOAD: +``` +1. Connect to a running Valkey instance using valkey-cli +2. Execute Valkey command: + MODULE LOAD /path/to/libjson.so +``` +## Supported Module Commands +```text +JSON.ARRAPPEND +JSON.ARRINDEX +JSON.ARRINSERT +JSON.ARRLEN +JSON.ARRPOP +JSON.ARRTRIM +JSON.CLEAR +JSON.DEBUG +JSON.DEL +JSON.FORGET +JSON.GET +JSON.MGET +JSON.MSET +JSON.NUMINCRBY +JSON.NUMMULTBY +JSON.OBJLEN +JSON.OBJKEYS +JSON.RESP +JSON.SET +JSON.STRAPPEND +JSON.STRLEN +JSON.TOGGLE +JSON.TYPE +``` \ No newline at end of file diff --git a/tst/integration/README.md b/tst/integration/README.md new file mode 100644 index 0000000..14955e3 --- /dev/null +++ b/tst/integration/README.md @@ -0,0 +1,10 @@ +# Integration Tests + +This directory contains integration tests that verify the interaction between vlkaye-server and valkeyJSON module features working together. Unlike unit tests that test individual components in isolation, these tests validate the system's behavior as a whole. + +## Requirements + +```text +python 3.9 +pytest 4 +``` \ No newline at end of file From c4d2b2f4a1dd208542abc792b6bf19e7a13383f5 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 19 Nov 2024 01:47:04 +0000 Subject: [PATCH 3/8] update readme and build script. Signed-off-by: Roshan Khatri --- CMakeLists.txt | 5 +++-- README.md | 4 ++-- tst/integration/run.sh | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 85061ee..edec7a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ set(JSON_MODULE_LIB json) # Define the Valkey directories set(VALKEY_DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/_deps/valkey-src") -set(VALKEY_BIN_DIR "${CMAKE_BINARY_DIR}/_deps/valkey-src/src/valkey/bin") +set(VALKEY_BIN_DIR "${CMAKE_BINARY_DIR}/_deps/valkey-src/src/valkey/src") # Download and build Valkey ExternalProject_Add( @@ -34,7 +34,8 @@ ExternalProject_Add( GIT_REPOSITORY https://github.com/valkey-io/valkey.git # Replace with actual URL GIT_TAG ${VALKEY_VERSION} PREFIX ${VALKEY_DOWNLOAD_DIR} - BUILD_COMMAND make -j + CONFIGURE_COMMAND "" + BUILD_COMMAND make distclean && make -j INSTALL_COMMAND "" BUILD_IN_SOURCE 1 ) diff --git a/README.md b/README.md index dd37084..172ea54 100644 --- a/README.md +++ b/README.md @@ -12,11 +12,11 @@ While Valkey core lacks native JSON support, there's significant community deman #### To build the module and run tests ```text # Builds the valkey-server (unstable) for integration testing. -SERVER_VERSION=unstable +export SERVER_VERSION=unstable ./build.sh # Builds the valkey-server (8.0.0) for integration testing. -SERVER_VERSION=8.0.0 +export SERVER_VERSION=8.0.0 ./build.sh ``` diff --git a/tst/integration/run.sh b/tst/integration/run.sh index 983ae7c..6d92e9b 100755 --- a/tst/integration/run.sh +++ b/tst/integration/run.sh @@ -12,8 +12,8 @@ pkill -9 -f "valkey-benchmark" || true DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "${DIR}" -export MODULE_PATH=$2/build/src/libjson.so \ -export SERVER_VERSION="unstable" +export MODULE_PATH=$2/build/src/libjson.so +echo "Running integration tests against Valkey version: $SERVER_VERSION" if [[ ! -z "${TEST_PATTERN}" ]] ; then export TEST_PATTERN="-k ${TEST_PATTERN}" From 2060b45e91ba649bd3abf3c366f575386e8ee07f Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 26 Nov 2024 19:32:04 +0000 Subject: [PATCH 4/8] update json_api.h Signed-off-by: Roshan Khatri --- src/json/json_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/json/json_api.h b/src/json/json_api.h index 6ac3630..782ba0d 100644 --- a/src/json/json_api.h +++ b/src/json/json_api.h @@ -1,5 +1,5 @@ /** - * JSON C API for Search Module + * JSON C API */ #ifndef VALKEYJSONMODULE_JSON_API_H_ #define VALKEYJSONMODULE_JSON_API_H_ From 127d913d3cb5a97edbeb35a6046def7c784f7739 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 26 Nov 2024 20:46:33 +0000 Subject: [PATCH 5/8] Adds ci.yml Signed-off-by: Roshan Khatri --- .github/workflows/ci.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3bfbedf --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,27 @@ +name: ci + +on: + push: + pull_request: + +jobs: + build-ubuntu-latest: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + server_version: ['unstable', '8.0.0'] + steps: + - uses: actions/checkout@v4 + - name: Set the server verison for python integeration tests + run: echo "SERVER_VERSION=${{ matrix.server_version }}" >> $GITHUB_ENV + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Build and Run tests. + run: ./build.sh \ No newline at end of file From 7d831ae3c4d439810cb7866f4c5370b5bf9a89ed Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 26 Nov 2024 20:54:15 +0000 Subject: [PATCH 6/8] update ci.yml Signed-off-by: Roshan Khatri --- .github/workflows/ci.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3bfbedf..4c2eda9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,8 +1,6 @@ -name: ci +name: CI -on: - push: - pull_request: +on: [push, pull_request] jobs: build-ubuntu-latest: From e3c192d98d048c73a4b9c34fd5a4d9644c9927a0 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 28 Nov 2024 01:18:50 +0000 Subject: [PATCH 7/8] Address feedbacks Signed-off-by: Roshan Khatri --- CMakeLists.txt | 6 ++---- README.md | 3 --- src/json/json.cc | 15 +++++++-------- src/json/stats.h | 2 -- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index edec7a0..f56fe3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,8 +115,8 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) # Always include debug symbols and optimize the code -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -g") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -g") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -g -fno-omit-frame-pointer") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -fno-omit-frame-pointer") # RapidJSON SIMD optimization if("${ARCHITECTURE}" STREQUAL "x86_64") @@ -150,10 +150,8 @@ set(RAPIDJSON_BUILD_DOC OFF CACHE BOOL "Build rapidjson documentation" FORCE) # Make Rapidjson available FetchContent_MakeAvailable(rapidjson) -# Add the src subdirectory for building add_subdirectory(src) -# Add the src subdirectory for building add_subdirectory(tst) add_custom_target(test diff --git a/README.md b/README.md index 172ea54..d464963 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,6 @@ ValkeyJSON is a C++ Valkey-Module that provides native JSON (JavaScript Object N ValkeyJSON leverages [RapidJSON](https://rapidjson.org/), a high-performance JSON parser and generator for C++, chosen for its small footprint and exceptional performance and memory efficiency. As a header-only library with no external dependencies, RapidJSON provides robust Unicode support while maintaining a compact memory profile of just 16 bytes per JSON value on most 32/64-bit machines. -## Motivation -While Valkey core lacks native JSON support, there's significant community demand for JSON capabilities. ValkeyJSON provides a comprehensive open-source solution with extensive JSON manipulation features. - ## Building and Testing #### To build the module and run tests diff --git a/src/json/json.cc b/src/json/json.cc index a8d7cca..8a74b49 100644 --- a/src/json/json.cc +++ b/src/json/json.cc @@ -8,16 +8,15 @@ * 4. register commands that are all prefixed with "JSON.". * * Design Considerations: - * 1. Command API: see API.md. - * 2. All JSON CRUD operations should be delegated to the DOM module. - * 3. Shared utility/helper code should reside in the UTIL module. - * 4. When invoking a DOM or UTIL method tha returns a heap-allocated object, the caller must release the memory + * 1. All JSON CRUD operations should be delegated to the DOM module. + * 2. Shared utility/helper code should reside in the UTIL module. + * 3. When invoking a DOM or UTIL method tha returns a heap-allocated object, the caller must release the memory * after consuming it. - * 5. The first line of every command handler should be: "ValkeyModule_AutoMemory(ctx);". This is for enabling + * 4. The first line of every command handler should be: "ValkeyModule_AutoMemory(ctx);". This is for enabling * auto memory management for the command. - * 6. Every write command must support replication. Call "ValkeyModule_ReplicateVerbatim(ctx)" to tell Valkey to + * 5. Every write command must support replication. Call "ValkeyModule_ReplicateVerbatim(ctx)" to tell Valkey to * replicate the command. - * 7. Any write command that increases total memory utilization, should be created using "write deny-oom" flags. + * 6. Any write command that increases total memory utilization, should be created using "write deny-oom" flags. * e.g., JSON.SET should be defined as "write deny-oom", while JSON.DEL does not need "deny-oom" as it can't * increase the total memory. * @@ -2634,7 +2633,7 @@ bool checkString(ValkeyModuleIO *ctx, const char *value, const char *caller) { } /* - * Check an integer value, fail + * Check an integer value, fail if the value doesn't match the expected one */ bool checkInt(ValkeyModuleIO *ctx, uint64_t value, const char *caller) { uint64_t val = ValkeyModule_LoadUnsigned(ctx); diff --git a/src/json/stats.h b/src/json/stats.h index d2a6f73..cce6756 100644 --- a/src/json/stats.h +++ b/src/json/stats.h @@ -3,8 +3,6 @@ * 1. Core metrics: * json_total_memory_bytes: total memory allocated to JSON objects * json_num_documents: number of document keys in Valkey - * json_num_reads: number of reads - * json_num_writes: number of writes * 2. Histograms: * json_doc_histogram: static histogram showing document size distribution. Value of the i_th element is * number of documents whose size fall into bucket i. From 9d9ebcb07b7bf7264c5c15840fe4cfa800bf0232 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 28 Nov 2024 01:28:25 +0000 Subject: [PATCH 8/8] printout actual ${ARCHITECTURE} Signed-off-by: Roshan Khatri --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f56fe3c..b29ac9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ if("${ARCHITECTURE}" STREQUAL "x86_64") elseif("${ARCHITECTURE}" STREQUAL "aarch64") message("Building JSON for aarch64") else() - message(FATAL_ERROR "Unsupported architecture. JSON is only supported on x86_64 and aarch64.") + message(FATAL_ERROR "Unsupported architecture: ${ARCHITECTURE}. JSON is only supported on x86_64 and aarch64.") endif() # Project definition @@ -124,7 +124,7 @@ if("${ARCHITECTURE}" STREQUAL "x86_64") elseif("${ARCHITECTURE}" STREQUAL "aarch64") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a") else() - message(FATAL_ERROR "Unsupported architecture. JSON is only supported on x86_64 and aarch64.") + message(FATAL_ERROR "Unsupported architecture: ${ARCHITECTURE}. JSON is only supported on x86_64 and aarch64.") endif() # Additional flags for all architectures