diff --git a/.clang-tidy b/.clang-tidy index ecb8ac6dcbf4..ddd0ee6d911c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -142,6 +142,7 @@ Checks: '-*, clang-analyzer-cplusplus.PlacementNewChecker, clang-analyzer-cplusplus.SelfAssignment, clang-analyzer-deadcode.DeadStores, + clang-analyzer-cplusplus.Move, clang-analyzer-optin.cplusplus.VirtualCall, clang-analyzer-security.insecureAPI.UncheckedReturn, clang-analyzer-security.insecureAPI.bcmp, diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 82a16d0589f8..6540b60476f9 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -13,13 +13,4 @@ Changelog entry (a user-readable short description of the changes that goes to C ... -Detailed description / Documentation draft: -... - - -> By adding documentation, you'll allow users to try your new feature immediately, not when someone else will have time to document it later. Documentation is necessary for all features that affect user experience in any way. You can add brief documentation draft above, or add documentation right into your patch as Markdown files in [docs](https://github.com/ClickHouse/ClickHouse/tree/master/docs) folder. - -> If you are doing this for the first time, it's recommended to read the lightweight [Contributing to ClickHouse Documentation](https://github.com/ClickHouse/ClickHouse/tree/master/docs/README.md) guide first. - - > Information about CI checks: https://clickhouse.tech/docs/en/development/continuous-integration/ diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 8083186117fa..0f88f30d42c0 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -1,8 +1,9 @@ self-hosted-runner: labels: - builder + - func-tester + - func-tester-aarch64 - fuzzer-unit-tester - stress-tester - style-checker - - func-tester-aarch64 - - func-tester + - style-checker-aarch64 diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 749c248af829..75f8a63368d8 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -10,7 +10,7 @@ on: # yamllint disable-line rule:truthy - 'backport/**' jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 633e654d6561..d5b56bfef32d 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -30,7 +30,7 @@ jobs: python3 run_check.py DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index 54e1f27ab7cb..66838a055527 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -20,7 +20,7 @@ on: # yamllint disable-line rule:truthy workflow_dispatch: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 3970e64f9590..c32896205d5e 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -9,8 +9,20 @@ on: # yamllint disable-line rule:truthy branches: - 'master' jobs: + PythonUnitTests: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Python unit tests + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -44,7 +56,7 @@ jobs: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json DockerHubPush: - needs: [DockerHubPushAmd64, DockerHubPushAarch64] + needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - name: Clear repository @@ -74,6 +86,7 @@ jobs: StyleCheck: needs: DockerHubPush runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} steps: - name: Set envs run: | @@ -81,6 +94,8 @@ jobs: TEMP_PATH=${{ runner.temp }}/style_check EOF - name: Download changed images + # even if artifact does not exist, e.g. on `do not test` label or failed Docker job + continue-on-error: true uses: actions/download-artifact@v2 with: name: changed_images diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 0bd02de48d09..81a0cb68bd94 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -31,9 +31,22 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 run_check.py + PythonUnitTests: + needs: CheckLabels + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Python unit tests + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -68,7 +81,7 @@ jobs: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json DockerHubPush: - needs: [DockerHubPushAmd64, DockerHubPushAarch64] + needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - name: Clear repository @@ -98,6 +111,7 @@ jobs: StyleCheck: needs: DockerHubPush runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} steps: - name: Set envs run: | @@ -105,6 +119,8 @@ jobs: TEMP_PATH=${{ runner.temp }}/style_check EOF - name: Download changed images + # even if artifact does not exist, e.g. on `do not test` label or failed Docker job + continue-on-error: true uses: actions/download-artifact@v2 with: name: changed_images diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 77bc285196c2..46e36c846d04 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,7 +22,6 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Download packages and push to Artifactory - env: run: | rm -rf "$TEMP_PATH" && mkdir -p "$REPO_COPY" cp -r "$GITHUB_WORKSPACE" "$REPO_COPY" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 4ab2638069c3..d916699acc2c 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml new file mode 100644 index 000000000000..30b6bfb027e7 --- /dev/null +++ b/.github/workflows/tags_stable.yml @@ -0,0 +1,38 @@ +name: TagsStableWorkflow +# - Gets artifacts from S3 +# - Sends it to JFROG Artifactory +# - Adds them to the release assets + +on: # yamllint disable-line rule:truthy + push: + tags: + - 'v*-stable' + - 'v*-lts' + + +jobs: + UpdateVersions: + runs-on: [self-hosted, style-checker] + steps: + - name: Get tag name + run: echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" + - name: Check out repository code + uses: actions/checkout@v2 + with: + ref: master + - name: Generate versions + run: | + git fetch --tags + ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv + - name: Create Pull Request + uses: peter-evans/create-pull-request@v3 + with: + commit-message: Update version_date.tsv after ${{ env.GITHUB_TAG }} + branch: auto/${{ env.GITHUB_TAG }} + delete-branch: true + title: Update version_date.tsv after ${{ env.GITHUB_TAG }} + body: | + Update version_date.tsv after ${{ env.GITHUB_TAG }} + + Changelog category (leave one): + - Not for changelog (changelog entry is not required) diff --git a/.gitmodules b/.gitmodules index 04d32f4af40e..91f4ddb20071 100644 --- a/.gitmodules +++ b/.gitmodules @@ -217,6 +217,9 @@ [submodule "contrib/yaml-cpp"] path = contrib/yaml-cpp url = https://github.com/ClickHouse-Extras/yaml-cpp.git +[submodule "contrib/cld2"] + path = contrib/cld2 + url = https://github.com/ClickHouse-Extras/cld2.git [submodule "contrib/libstemmer_c"] path = contrib/libstemmer_c url = https://github.com/ClickHouse-Extras/libstemmer_c.git @@ -247,9 +250,15 @@ [submodule "contrib/sysroot"] path = contrib/sysroot url = https://github.com/ClickHouse-Extras/sysroot.git +[submodule "contrib/nlp-data"] + path = contrib/nlp-data + url = https://github.com/ClickHouse-Extras/nlp-data.git [submodule "contrib/hive-metastore"] path = contrib/hive-metastore url = https://github.com/ClickHouse-Extras/hive-metastore [submodule "contrib/azure"] path = contrib/azure url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git +[submodule "contrib/minizip-ng"] + path = contrib/minizip-ng + url = https://github.com/zlib-ng/minizip-ng diff --git a/CHANGELOG.md b/CHANGELOG.md index 87860deea9d1..1e4ea95c08c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ * A tool for collecting diagnostics data if you need support. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)). * Automatic cluster discovery via Zoo/Keeper. It allows to add replicas to the cluster without changing configuration on every server. [#31442](https://github.com/ClickHouse/ClickHouse/pull/31442) ([vdimir](https://github.com/vdimir)). * Implement hive table engine to access apache hive from clickhouse. This implements: [#29245](https://github.com/ClickHouse/ClickHouse/issues/29245). [#31104](https://github.com/ClickHouse/ClickHouse/pull/31104) ([taiyang-li](https://github.com/taiyang-li)). -* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorical values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([alexey-milovidov](https://github.com/alexey-milovidov)). Initial implementation by TODO +* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorical values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([alexey-milovidov](https://github.com/alexey-milovidov)). Initial implementation by [Vanyok-All-is-OK](https://github.com/Vanyok-All-is-OK) and [antikvist](https://github.com/antikvist). * Added table function `hdfsCluster` which allows processing files from HDFS in parallel from many nodes in a specified cluster, similarly to `s3Cluster`. [#32400](https://github.com/ClickHouse/ClickHouse/pull/32400) ([Zhichang Yu](https://github.com/yuzhichang)). * Adding support for disks backed by Azure Blob Storage, in a similar way it has been done for disks backed by AWS S3. [#31505](https://github.com/ClickHouse/ClickHouse/pull/31505) ([Jakub Kuklis](https://github.com/jkuklis)). * Allow `COMMENT` in `CREATE VIEW` (for all VIEW kinds). [#31062](https://github.com/ClickHouse/ClickHouse/pull/31062) ([Vasily Nemkov](https://github.com/Enmk)). @@ -23,7 +23,6 @@ * Added function `arrayLast`. Closes [#33390](https://github.com/ClickHouse/ClickHouse/issues/33390). [#33415](https://github.com/ClickHouse/ClickHouse/pull/33415) Added function `arrayLastIndex`. [#33465](https://github.com/ClickHouse/ClickHouse/pull/33465) ([Maksim Kita](https://github.com/kitaisreal)). * Add function `decodeURLFormComponent` slightly different to `decodeURLComponent`. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). * Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). -* Potential issue, cannot be exploited: integer overflow may happen in array resize. [#33024](https://github.com/ClickHouse/ClickHouse/pull/33024) ([varadarajkumar](https://github.com/varadarajkumar)). #### Performance Improvement @@ -78,7 +77,6 @@ * Validate config keys for external dictionaries. [#33095](https://github.com/ClickHouse/ClickHouse/issues/33095#issuecomment-1000577517). [#33130](https://github.com/ClickHouse/ClickHouse/pull/33130) ([Kseniia Sumarokova](https://github.com/kssenii)). * Send profile info inside `clickhouse-local`. Closes [#33093](https://github.com/ClickHouse/ClickHouse/issues/33093). [#33097](https://github.com/ClickHouse/ClickHouse/pull/33097) ([Kseniia Sumarokova](https://github.com/kssenii)). * Short circuit evaluation: support for function `throwIf`. Closes [#32969](https://github.com/ClickHouse/ClickHouse/issues/32969). [#32973](https://github.com/ClickHouse/ClickHouse/pull/32973) ([Maksim Kita](https://github.com/kitaisreal)). -* Added `Date32` date type support in dictionaries. Closes [#32913](https://github.com/ClickHouse/ClickHouse/issues/32913). [#32971](https://github.com/ClickHouse/ClickHouse/pull/32971) ([Maksim Kita](https://github.com/kitaisreal)). * (This only happens in unofficial builds). Fixed segfault when inserting data into compressed Decimal, String, FixedString and Array columns. This closes [#32939](https://github.com/ClickHouse/ClickHouse/issues/32939). [#32940](https://github.com/ClickHouse/ClickHouse/pull/32940) ([N. Kolotov](https://github.com/nkolotov)). * Added support for specifying subquery as SQL user defined function. Example: `CREATE FUNCTION test AS () -> (SELECT 1)`. Closes [#30755](https://github.com/ClickHouse/ClickHouse/issues/30755). [#32758](https://github.com/ClickHouse/ClickHouse/pull/32758) ([Maksim Kita](https://github.com/kitaisreal)). * Improve gRPC compression support for [#28671](https://github.com/ClickHouse/ClickHouse/issues/28671). [#32747](https://github.com/ClickHouse/ClickHouse/pull/32747) ([Vitaly Baranov](https://github.com/vitlibar)). @@ -100,6 +98,7 @@ * Use `--database` option for clickhouse-local. [#32797](https://github.com/ClickHouse/ClickHouse/pull/32797) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix surprisingly bad code in SQL ordinary function `file`. Now it supports symlinks. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Updating `modification_time` for data part in `system.parts` after part movement [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). +* Potential issue, cannot be exploited: integer overflow may happen in array resize. [#33024](https://github.com/ClickHouse/ClickHouse/pull/33024) ([varadarajkumar](https://github.com/varadarajkumar)). #### Build/Testing/Packaging Improvement diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c9a972a4e46..f27e9cdbea4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,7 +67,7 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURC message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive") endif () -include (cmake/find/ccache.cmake) +include (cmake/ccache.cmake) # Take care to add prlimit in command line before ccache, or else ccache thinks that # prlimit is compiler, and clang++ is its input file, and refuses to work with @@ -104,9 +104,8 @@ message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON) -option(MAKE_STATIC_LIBRARIES "Disable to make shared libraries" ${USE_STATIC_LIBRARIES}) -if (NOT MAKE_STATIC_LIBRARIES) +if (NOT USE_STATIC_LIBRARIES) # DEVELOPER ONLY. # Faster linking if turned on. option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files") @@ -115,11 +114,11 @@ if (NOT MAKE_STATIC_LIBRARIES) "Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled") endif () -if (MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) - message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without MAKE_STATIC_LIBRARIES=0 has no effect.") +if (USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) + message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without USE_STATIC_LIBRARIES=0 has no effect.") endif() -if (NOT MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) +if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "") endif () @@ -139,7 +138,6 @@ if (ENABLE_FUZZING) set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF) set (ENABLE_LIBRARIES 0) set (ENABLE_SSL 1) - set (USE_INTERNAL_SSL_LIBRARY 1) set (USE_UNWIND ON) set (ENABLE_EMBEDDED_COMPILER 0) set (ENABLE_EXAMPLES 0) @@ -152,7 +150,6 @@ if (ENABLE_FUZZING) # For codegen_select_fuzzer set (ENABLE_PROTOBUF 1) - set (USE_INTERNAL_PROTOBUF_LIBRARY 1) endif() # Global libraries @@ -185,7 +182,7 @@ if (COMPILER_CLANG) if (HAS_USE_CTOR_HOMING) # For more info see https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/ - if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO") + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing") endif() @@ -203,21 +200,13 @@ endif () option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) -if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL) +if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL) # Only for Linux, x86_64 or aarch64. option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON) elseif(GLIBC_COMPATIBILITY) message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") endif () -if (GLIBC_COMPATIBILITY) - # NOTE: we may also want to check glibc version and add -include only for 2.32+ - # however this is extra complexity, especially for cross compiling. - # And anyway it should not break anything for <2.32. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/base/glibc-compatibility/glibc-compat-2.32.h") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/base/glibc-compatibility/glibc-compat-2.32.h") -endif() - # Make sure the final executable has symbols exported set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") @@ -258,8 +247,6 @@ endif() if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") set(USE_DEBUG_HELPERS ON) -else () - set(USE_DEBUG_HELPERS ON) endif() option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) @@ -414,17 +401,6 @@ else () option(WERROR "Enable -Werror compiler option" ON) endif () -if (WERROR) - # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks. - # Instead, adopt modern cmake usage requirement. - target_compile_options(global-libs INTERFACE "-Werror") -endif () - -# Make this extra-checks for correct library dependencies. -if (OS_LINUX AND NOT SANITIZE) - target_link_options(global-libs INTERFACE "-Wl,--no-undefined") -endif () - # Increase stack size on Musl. We need big stack for our recursive-descend parser. if (USE_MUSL) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152") @@ -432,6 +408,7 @@ endif () include(cmake/dbms_glob_sources.cmake) +add_library(global-group INTERFACE) if (OS_LINUX OR OS_ANDROID) include(cmake/linux/default_libs.cmake) elseif (OS_DARWIN) @@ -439,6 +416,18 @@ elseif (OS_DARWIN) elseif (OS_FREEBSD) include(cmake/freebsd/default_libs.cmake) endif () +link_libraries(global-group) + +if (WERROR) + # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks. + # Instead, adopt modern cmake usage requirement. + target_compile_options(global-group INTERFACE "-Werror") +endif () + +# Make this extra-checks for correct library dependencies. +if (OS_LINUX AND NOT SANITIZE) + target_link_options(global-group INTERFACE "-Wl,--no-undefined") +endif () ###################################### ### Add targets below this comment ### @@ -446,7 +435,7 @@ endif () set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") -if (MAKE_STATIC_LIBRARIES) +if (USE_STATIC_LIBRARIES) set (CMAKE_POSITION_INDEPENDENT_CODE OFF) if (OS_LINUX AND NOT ARCH_ARM) # Slightly more efficient code can be generated @@ -482,87 +471,10 @@ endif () message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} - MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}") include (GNUInstallDirs) -include (cmake/contrib_finder.cmake) - -find_contrib_lib(double-conversion) # Must be before parquet -include (cmake/find/ssl.cmake) -include (cmake/find/ldap.cmake) # after ssl -include (cmake/find/icu.cmake) -include (cmake/find/xz.cmake) -include (cmake/find/zlib.cmake) -include (cmake/find/zstd.cmake) -include (cmake/find/ltdl.cmake) # for odbc -# openssl, zlib before poco -include (cmake/find/sparsehash.cmake) -include (cmake/find/re2.cmake) -include (cmake/find/krb5.cmake) -include (cmake/find/libgsasl.cmake) -include (cmake/find/cyrus-sasl.cmake) -include (cmake/find/rdkafka.cmake) -include (cmake/find/libuv.cmake) # for amqpcpp and cassandra -include (cmake/find/amqpcpp.cmake) -include (cmake/find/capnp.cmake) -include (cmake/find/llvm.cmake) -include (cmake/find/h3.cmake) -include (cmake/find/libxml2.cmake) -include (cmake/find/brotli.cmake) -include (cmake/find/protobuf.cmake) -include (cmake/find/thrift.cmake) -include (cmake/find/grpc.cmake) -include (cmake/find/pdqsort.cmake) -include (cmake/find/miniselect.cmake) -include (cmake/find/hdfs3.cmake) # uses protobuf -include (cmake/find/poco.cmake) -include (cmake/find/curl.cmake) -include (cmake/find/s3.cmake) -include (cmake/find/blob_storage.cmake) -include (cmake/find/base64.cmake) -include (cmake/find/parquet.cmake) # uses protobuf and thrift -include (cmake/find/simdjson.cmake) -include (cmake/find/fast_float.cmake) -include (cmake/find/rapidjson.cmake) -include (cmake/find/fastops.cmake) -include (cmake/find/odbc.cmake) -include (cmake/find/nanodbc.cmake) -include (cmake/find/sqlite.cmake) -include (cmake/find/rocksdb.cmake) -include (cmake/find/libpqxx.cmake) -include (cmake/find/nuraft.cmake) -include (cmake/find/yaml-cpp.cmake) -include (cmake/find/s2geometry.cmake) -include (cmake/find/nlp.cmake) -include (cmake/find/bzip2.cmake) -include (cmake/find/filelog.cmake) - -if(NOT USE_INTERNAL_PARQUET_LIBRARY) - set (ENABLE_ORC OFF CACHE INTERNAL "") -endif() -include (cmake/find/orc.cmake) - -include (cmake/find/avro.cmake) -include (cmake/find/msgpack.cmake) -include (cmake/find/cassandra.cmake) -include (cmake/find/sentry.cmake) -include (cmake/find/datasketches.cmake) -include (cmake/find/libprotobuf-mutator.cmake) -include (cmake/find/hive-metastore.cmake) - -set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "") -find_contrib_lib(cityhash) - -find_contrib_lib(farmhash) - -if (ENABLE_TESTS) - include (cmake/find/gtest.cmake) -endif () - -# Need to process before "contrib" dir: -include (cmake/find/mysqlclient.cmake) # When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc. @@ -606,7 +518,7 @@ macro (add_executable target) # - _je_zone_register due to JEMALLOC_PRIVATE_NAMESPACE=je_ under OS X. # - but jemalloc-cmake does not run private_namespace.sh # so symbol name should be _zone_register - if (ENABLE_JEMALLOC AND MAKE_STATIC_LIBRARIES AND OS_DARWIN) + if (ENABLE_JEMALLOC AND USE_STATIC_LIBRARIES AND OS_DARWIN) set_property(TARGET ${target} APPEND PROPERTY LINK_OPTIONS -u_zone_register) endif() endif() @@ -625,6 +537,4 @@ add_subdirectory (programs) add_subdirectory (tests) add_subdirectory (utils) -include (cmake/print_include_directories.cmake) - include (cmake/sanitize_target_link_libraries.cmake) diff --git a/LICENSE b/LICENSE index 80dbd30140b3..8b0ac080f01b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2016-2021 ClickHouse, Inc. +Copyright 2016-2022 ClickHouse, Inc. Apache License Version 2.0, January 2004 @@ -188,7 +188,7 @@ Copyright 2016-2021 ClickHouse, Inc. same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2016-2021 ClickHouse, Inc. + Copyright 2016-2022 ClickHouse, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index e12238577a7d..f433b4578619 100644 --- a/README.md +++ b/README.md @@ -10,5 +10,6 @@ ClickHouse® is an open-source column-oriented database management system that a * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. -* [Code Browser](https://clickhouse.com/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. +* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. +* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. * [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any. diff --git a/SECURITY.md b/SECURITY.md index f002dd53ca9d..ca3c8b439fd7 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -22,9 +22,10 @@ The following versions of ClickHouse server are currently being supported with s | 21.7 | :x: | | 21.8 | ✅ | | 21.9 | :x: | -| 21.10 | ✅ | +| 21.10 | :x: | | 21.11 | ✅ | | 21.12 | ✅ | +| 22.1 | ✅ | ## Reporting a Vulnerability diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index bc82e502e798..3e6f174c6dca 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -24,7 +24,7 @@ if (ENABLE_REPLXX) endif () if (USE_DEBUG_HELPERS) - get_target_property(MAGIC_ENUM_INCLUDE_DIR magic_enum INTERFACE_INCLUDE_DIRECTORIES) + get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES) # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. # Prefixing "SHELL:" will force it to use the original text. set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/base/base/iostream_debug_helpers.h\"") @@ -40,29 +40,25 @@ else () target_compile_definitions(common PUBLIC WITH_COVERAGE=0) endif () -if (USE_INTERNAL_CCTZ) - set_source_files_properties(DateLUTImpl.cpp PROPERTIES COMPILE_DEFINITIONS USE_INTERNAL_CCTZ) -endif() - target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") -if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES) +if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) target_link_libraries(common PUBLIC -Wl,-U,_inside_main) endif() target_link_libraries (common PUBLIC - ${CITYHASH_LIBRARIES} + ch_contrib::cityhash boost::headers_only boost::system Poco::Net Poco::Net::SSL Poco::Util Poco::Foundation - replxx - cctz - fmt - magic_enum + ch_contrib::replxx + ch_contrib::cctz + ch_contrib::fmt + ch_contrib::magic_enum ) if (ENABLE_TESTS) diff --git a/base/base/LineReader.cpp b/base/base/LineReader.cpp index 9491f9577627..686d70f247da 100644 --- a/base/base/LineReader.cpp +++ b/base/base/LineReader.cpp @@ -2,7 +2,9 @@ #include #include +#include +#include #include #include #include @@ -34,13 +36,37 @@ bool hasInputData() return select(1, &fds, nullptr, nullptr, &timeout) == 1; } -} +struct NoCaseCompare +{ + bool operator()(const std::string & str1, const std::string & str2) + { + return std::lexicographical_compare(begin(str1), end(str1), begin(str2), end(str2), [](const char c1, const char c2) + { + return std::tolower(c1) < std::tolower(c2); + }); + } +}; -std::optional LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const +using Words = std::vector; +template +void addNewWords(Words & to, const Words & from, Compare comp) { - if (!ready) - return std::nullopt; + size_t old_size = to.size(); + size_t new_size = old_size + from.size(); + + to.reserve(new_size); + to.insert(to.end(), from.begin(), from.end()); + auto middle = to.begin() + old_size; + std::inplace_merge(to.begin(), middle, to.end(), comp); + auto last_unique = std::unique(to.begin(), to.end()); + to.erase(last_unique, to.end()); +} + +} + +replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) +{ std::string_view last_word; auto last_word_pos = prefix.find_last_of(word_break_characters); @@ -48,21 +74,45 @@ std::optional LineReader::Suggest::getCompletio last_word = prefix; else last_word = std::string_view(prefix).substr(last_word_pos + 1, std::string::npos); - /// last_word can be empty. + std::pair range; + + std::lock_guard lock(mutex); + /// Only perform case sensitive completion when the prefix string contains any uppercase characters - if (std::none_of(prefix.begin(), prefix.end(), [&](auto c) { return c >= 'A' && c <= 'Z'; })) - return std::equal_range( + if (std::none_of(prefix.begin(), prefix.end(), [](char32_t x) { return iswupper(static_cast(x)); })) + range = std::equal_range( words_no_case.begin(), words_no_case.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) { return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0; }); else - return std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) + range = std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) { return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0; }); + + return replxx::Replxx::completions_t(range.first, range.second); +} + +void LineReader::Suggest::addWords(Words && new_words) +{ + Words new_words_no_case = new_words; + if (!new_words.empty()) + { + std::sort(new_words.begin(), new_words.end()); + std::sort(new_words_no_case.begin(), new_words_no_case.end(), NoCaseCompare{}); + } + + { + std::lock_guard lock(mutex); + addNewWords(words, new_words, std::less{}); + addNewWords(words_no_case, new_words_no_case, NoCaseCompare{}); + } + + assert(std::is_sorted(words.begin(), words.end())); + assert(std::is_sorted(words_no_case.begin(), words_no_case.end(), NoCaseCompare{})); } LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_) diff --git a/base/base/LineReader.h b/base/base/LineReader.h index 12a856e2051d..33daae49974a 100644 --- a/base/base/LineReader.h +++ b/base/base/LineReader.h @@ -1,10 +1,12 @@ #pragma once -#include - +#include #include #include #include +#include + +#include class LineReader { @@ -12,14 +14,16 @@ class LineReader struct Suggest { using Words = std::vector; - using WordsRange = std::pair; + /// Get vector for the matched range of words if any. + replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length); + void addWords(Words && new_words); + + private: Words words; Words words_no_case; - std::atomic ready{false}; - /// Get iterators for the matched range of words if any. - std::optional getCompletions(const String & prefix, size_t prefix_length) const; + std::mutex mutex; }; using Patterns = std::vector; diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 5d99da99c8cf..9ea53bb132b6 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -25,13 +25,6 @@ void trim(String & s) s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end()); } -/// Check if string ends with given character after skipping whitespaces. -bool ends_with(const std::string_view & s, const std::string_view & p) -{ - auto ss = std::string_view(s.data(), s.rend() - std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); })); - return ss.ends_with(p); -} - std::string getEditor() { const char * editor = std::getenv("EDITOR"); @@ -132,8 +125,14 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx) } +static bool replxx_last_is_delimiter = false; +void ReplxxLineReader::setLastIsDelimiter(bool flag) +{ + replxx_last_is_delimiter = flag; +} + ReplxxLineReader::ReplxxLineReader( - const Suggest & suggest, + Suggest & suggest, const String & history_file_path_, bool multiline_, Patterns extenders_, @@ -179,14 +178,13 @@ ReplxxLineReader::ReplxxLineReader( auto callback = [&suggest] (const String & context, size_t context_size) { - if (auto range = suggest.getCompletions(context, context_size)) - return Replxx::completions_t(range->first, range->second); - return Replxx::completions_t(); + return suggest.getCompletions(context, context_size); }; rx.set_completion_callback(callback); rx.set_complete_on_empty(false); rx.set_word_break_characters(word_break_characters); + rx.set_ignore_case(true); if (highlighter) rx.set_highlighter_callback(highlighter); @@ -198,21 +196,11 @@ ReplxxLineReader::ReplxxLineReader( auto commit_action = [this](char32_t code) { - std::string_view str = rx.get_state().text(); - - /// Always commit line when we see extender at the end. It will start a new prompt. - for (const auto * extender : extenders) - if (ends_with(str, extender)) - return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); - - /// If we see an delimiter at the end, commit right away. - for (const auto * delimiter : delimiters) - if (ends_with(str, delimiter)) - return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); - /// If we allow multiline and there is already something in the input, start a newline. - if (multiline && !input.empty()) + /// NOTE: Lexer is only available if we use highlighter. + if (highlighter && multiline && !replxx_last_is_delimiter) return rx.invoke(Replxx::ACTION::NEW_LINE, code); + replxx_last_is_delimiter = false; return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }; /// bind C-j to ENTER action. diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index d4cc7de1e7a3..b9ec214d02c3 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -9,7 +9,7 @@ class ReplxxLineReader : public LineReader { public: ReplxxLineReader( - const Suggest & suggest, + Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, @@ -19,6 +19,9 @@ class ReplxxLineReader : public LineReader void enableBracketedPaste() override; + /// If highlight is on, we will set a flag to denote whether the last token is a delimiter. + /// This is useful to determine the behavior of key when multiline is enabled. + static void setLastIsDelimiter(bool flag); private: InputStatus readOneLine(const String & prompt) override; void addToHistory(const String & line) override; diff --git a/base/base/logger_useful.h b/base/base/logger_useful.h index 1237c6bd47ca..ad7d6583f5e9 100644 --- a/base/base/logger_useful.h +++ b/base/base/logger_useful.h @@ -12,6 +12,8 @@ namespace { template constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); } template constexpr auto firstArg(T && x, Ts &&...) { return std::forward(x); } + /// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor + template constexpr auto firstArg(fmt::basic_runtime && data, Ts &&...) { return data.str.data(); } } diff --git a/base/base/sort.h b/base/base/sort.h index 1f12cc622188..592a899a291e 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -1,26 +1,42 @@ #pragma once +#include + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wold-style-cast" #include -template +template void nth_element(RandomIt first, RandomIt nth, RandomIt last) { ::miniselect::floyd_rivest_select(first, nth, last); } -template +template void partial_sort(RandomIt first, RandomIt middle, RandomIt last) { ::miniselect::floyd_rivest_partial_sort(first, middle, last); } -template +template void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare) { ::miniselect::floyd_rivest_partial_sort(first, middle, last, compare); } #pragma GCC diagnostic pop + +template +void sort(RandomIt first, RandomIt last, Compare compare) +{ + ::pdqsort(first, last, compare); +} + +template +void sort(RandomIt first, RandomIt last) +{ + using value_type = typename std::iterator_traits::value_type; + using comparator = std::less; + ::pdqsort(first, last, comparator()); +} diff --git a/base/bridge/IBridge.cpp b/base/bridge/IBridge.cpp index 553973b645d4..4c808278ed0c 100644 --- a/base/bridge/IBridge.cpp +++ b/base/bridge/IBridge.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index f3026d7c87a3..311349a2ba7c 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -79,18 +79,14 @@ static void call_default_signal_handler(int sig) raise(sig); } -static constexpr size_t max_query_id_size = 127; - static const size_t signal_pipe_buf_size = sizeof(int) + sizeof(siginfo_t) - + sizeof(ucontext_t) + + sizeof(ucontext_t*) + sizeof(StackTrace) + sizeof(UInt32) - + max_query_id_size + 1 /// query_id + varint encoded length + sizeof(void*); - using signal_function = void(int, siginfo_t*, void*); static void writeSignalIDtoSignalPipe(int sig) @@ -129,18 +125,14 @@ static void signalHandler(int sig, siginfo_t * info, void * context) char buf[signal_pipe_buf_size]; DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf); - const ucontext_t signal_context = *reinterpret_cast(context); - const StackTrace stack_trace(signal_context); - - StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe. - query_id.size = std::min(query_id.size, max_query_id_size); + const ucontext_t * signal_context = reinterpret_cast(context); + const StackTrace stack_trace(*signal_context); DB::writeBinary(sig, out); DB::writePODBinary(*info, out); DB::writePODBinary(signal_context, out); DB::writePODBinary(stack_trace, out); DB::writeBinary(UInt32(getThreadId()), out); - DB::writeStringBinary(query_id, out); DB::writePODBinary(DB::current_thread, out); out.next(); @@ -184,6 +176,8 @@ class SignalListener : public Poco::Runnable void run() override { + static_assert(PIPE_BUF >= 512); + static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512"); char buf[signal_pipe_buf_size]; DB::ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf); @@ -227,10 +221,9 @@ class SignalListener : public Poco::Runnable else { siginfo_t info{}; - ucontext_t context{}; + ucontext_t * context{}; StackTrace stack_trace(NoCapture{}); UInt32 thread_num{}; - std::string query_id; DB::ThreadStatus * thread_ptr{}; if (sig != SanitizerTrap) @@ -241,12 +234,11 @@ class SignalListener : public Poco::Runnable DB::readPODBinary(stack_trace, in); DB::readBinary(thread_num, in); - DB::readBinary(query_id, in); DB::readPODBinary(thread_ptr, in); /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. - std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach(); + std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, thread_ptr); }).detach(); } } } @@ -279,18 +271,27 @@ class SignalListener : public Poco::Runnable void onFault( int sig, const siginfo_t & info, - const ucontext_t & context, + ucontext_t * context, const StackTrace & stack_trace, UInt32 thread_num, - const std::string & query_id, DB::ThreadStatus * thread_ptr) const { DB::ThreadStatus thread_status; + String query_id; + String query; + /// Send logs from this thread to client if possible. /// It will allow client to see failure messages directly. if (thread_ptr) { + query_id = thread_ptr->getQueryId().toString(); + + if (auto thread_group = thread_ptr->getThreadGroup()) + { + query = thread_group->query; + } + if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace); } @@ -305,19 +306,19 @@ class SignalListener : public Poco::Runnable } else { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})", + LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, - thread_num, query_id, strsignal(sig), sig); + thread_num, query_id, query, strsignal(sig), sig); } String error_message; if (sig != SanitizerTrap) - error_message = signalToErrorMessage(sig, info, context); + error_message = signalToErrorMessage(sig, info, *context); else error_message = "Sanitizer trap."; - LOG_FATAL(log, error_message); + LOG_FATAL(log, fmt::runtime(error_message)); if (stack_trace.getSize()) { @@ -330,11 +331,11 @@ class SignalListener : public Poco::Runnable for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) bare_stacktrace << ' ' << stack_trace.getFramePointers()[i]; - LOG_FATAL(log, bare_stacktrace.str()); + LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); } /// Write symbolized stack trace line by line for better grep-ability. - stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); + stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, fmt::runtime(s)); }); #if defined(OS_LINUX) /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace. @@ -389,20 +390,16 @@ static void sanitizerDeathCallback() const StackTrace stack_trace; - StringRef query_id = DB::CurrentThread::getQueryId(); - query_id.size = std::min(query_id.size, max_query_id_size); - int sig = SignalListener::SanitizerTrap; DB::writeBinary(sig, out); DB::writePODBinary(stack_trace, out); DB::writeBinary(UInt32(getThreadId()), out); - DB::writeStringBinary(query_id, out); DB::writePODBinary(DB::current_thread, out); out.next(); /// The time that is usually enough for separate thread to print info into log. - sleepForSeconds(10); + sleepForSeconds(20); } #endif diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 6ef87db6a614..ae8f51cabd31 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -6,12 +6,12 @@ add_library (daemon target_include_directories (daemon PUBLIC ..) -if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES) +if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup) endif() target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) -if (USE_SENTRY) - target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) +if (TARGET ch_contrib::sentry) + target_link_libraries (daemon PRIVATE ch_contrib::sentry) endif () diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index 4fc2a002cd8f..ddec09121e1f 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -37,7 +37,7 @@ if (GLIBC_COMPATIBILITY) target_include_directories(glibc-compatibility PRIVATE libcxxabi ${musl_arch_include_dir}) - if (NOT USE_STATIC_LIBRARIES AND NOT MAKE_STATIC_LIBRARIES) + if (NOT USE_STATIC_LIBRARIES AND NOT USE_STATIC_LIBRARIES) target_compile_options(glibc-compatibility PRIVATE -fPIC) endif () diff --git a/base/glibc-compatibility/glibc-compat-2.32.h b/base/glibc-compatibility/glibc-compat-2.32.h deleted file mode 100644 index 53ed34d60fa5..000000000000 --- a/base/glibc-compatibility/glibc-compat-2.32.h +++ /dev/null @@ -1,50 +0,0 @@ -/// In glibc 2.32 new version of some symbols had been added [1]: -/// -/// $ nm -D clickhouse | fgrep -e @GLIBC_2.32 -/// U pthread_getattr_np@GLIBC_2.32 -/// U pthread_sigmask@GLIBC_2.32 -/// -/// [1]: https://www.spinics.net/lists/fedora-devel/msg273044.html -/// -/// Right now ubuntu 20.04 is used as official image for building -/// ClickHouse, however once it will be switched someone may not be happy -/// with that fact that he/she cannot use official binaries anymore because -/// they have glibc < 2.32. -/// -/// To avoid this dependency, let's force previous version of those -/// symbols from glibc. -/// -/// Also note, that the following approach had been tested: -/// a) -Wl,--wrap -- but it goes into endless recursion whey you try to do -/// something like this: -/// -/// int __pthread_getattr_np_compact(pthread_t thread, pthread_attr_t *attr); -/// GLIBC_COMPAT_SYMBOL(__pthread_getattr_np_compact, pthread_getattr_np) -/// int __pthread_getattr_np_compact(pthread_t thread, pthread_attr_t *attr); -/// int __wrap_pthread_getattr_np(pthread_t thread, pthread_attr_t *attr) -/// { -/// return __pthread_getattr_np_compact(thread, attr); -/// } -/// -/// int __pthread_sigmask_compact(int how, const sigset_t *set, sigset_t *oldset); -/// GLIBC_COMPAT_SYMBOL(__pthread_sigmask_compact, pthread_sigmask) -/// int __pthread_sigmask_compact(int how, const sigset_t *set, sigset_t *oldset); -/// int __wrap_pthread_sigmask(int how, const sigset_t *set, sigset_t *oldset) -/// { -/// return __pthread_sigmask_compact(how, set, oldset); -/// } -/// -/// b) -Wl,--defsym -- same problems (and you cannot use version of symbol with -/// version in the expression) -/// c) this approach -- simply add this file with -include directive. - -#if defined(__amd64__) -#define GLIBC_COMPAT_SYMBOL(func) __asm__(".symver " #func "," #func "@GLIBC_2.2.5"); -#elif defined(__aarch64__) -#define GLIBC_COMPAT_SYMBOL(func) __asm__(".symver " #func "," #func "@GLIBC_2.17"); -#else -#error Your platform is not supported. -#endif - -GLIBC_COMPAT_SYMBOL(pthread_sigmask) -GLIBC_COMPAT_SYMBOL(pthread_getattr_np) diff --git a/cmake/Modules/FindArrow.cmake b/cmake/Modules/FindArrow.cmake deleted file mode 100644 index 5bd111de1e33..000000000000 --- a/cmake/Modules/FindArrow.cmake +++ /dev/null @@ -1,433 +0,0 @@ -# https://github.com/apache/arrow/blob/master/cpp/cmake_modules/FindArrow.cmake - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# - Find Arrow (arrow/api.h, libarrow.a, libarrow.so) -# This module defines -# ARROW_FOUND, whether Arrow has been found -# ARROW_FULL_SO_VERSION, full shared object version of found Arrow "100.0.0" -# ARROW_IMPORT_LIB, path to libarrow's import library (Windows only) -# ARROW_INCLUDE_DIR, directory containing headers -# ARROW_LIBS, deprecated. Use ARROW_LIB_DIR instead -# ARROW_LIB_DIR, directory containing Arrow libraries -# ARROW_SHARED_IMP_LIB, deprecated. Use ARROW_IMPORT_LIB instead -# ARROW_SHARED_LIB, path to libarrow's shared library -# ARROW_SO_VERSION, shared object version of found Arrow such as "100" -# ARROW_STATIC_LIB, path to libarrow.a -# ARROW_VERSION, version of found Arrow -# ARROW_VERSION_MAJOR, major version of found Arrow -# ARROW_VERSION_MINOR, minor version of found Arrow -# ARROW_VERSION_PATCH, patch version of found Arrow - -if(DEFINED ARROW_FOUND) - return() -endif() - -include(FindPkgConfig) -include(FindPackageHandleStandardArgs) - -set(ARROW_SEARCH_LIB_PATH_SUFFIXES) -if(CMAKE_LIBRARY_ARCHITECTURE) - list(APPEND ARROW_SEARCH_LIB_PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}") -endif() -list(APPEND ARROW_SEARCH_LIB_PATH_SUFFIXES - "lib64" - "lib32" - "lib" - "bin") -set(ARROW_CONFIG_SUFFIXES - "_RELEASE" - "_RELWITHDEBINFO" - "_MINSIZEREL" - "_DEBUG" - "") -if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} ARROW_CONFIG_SUFFIX_PREFERRED) - set(ARROW_CONFIG_SUFFIX_PREFERRED "_${ARROW_CONFIG_SUFFIX_PREFERRED}") - list(INSERT ARROW_CONFIG_SUFFIXES 0 "${ARROW_CONFIG_SUFFIX_PREFERRED}") -endif() - -if(NOT DEFINED ARROW_MSVC_STATIC_LIB_SUFFIX) - if(MSVC) - set(ARROW_MSVC_STATIC_LIB_SUFFIX "_static") - else() - set(ARROW_MSVC_STATIC_LIB_SUFFIX "") - endif() -endif() - -# Internal function. -# -# Set shared library name for ${base_name} to ${output_variable}. -# -# Example: -# arrow_build_shared_library_name(ARROW_SHARED_LIBRARY_NAME arrow) -# # -> ARROW_SHARED_LIBRARY_NAME=libarrow.so on Linux -# # -> ARROW_SHARED_LIBRARY_NAME=libarrow.dylib on macOS -# # -> ARROW_SHARED_LIBRARY_NAME=arrow.dll with MSVC on Windows -# # -> ARROW_SHARED_LIBRARY_NAME=libarrow.dll with MinGW on Windows -function(arrow_build_shared_library_name output_variable base_name) - set(${output_variable} - "${CMAKE_SHARED_LIBRARY_PREFIX}${base_name}${CMAKE_SHARED_LIBRARY_SUFFIX}" - PARENT_SCOPE) -endfunction() - -# Internal function. -# -# Set import library name for ${base_name} to ${output_variable}. -# This is useful only for MSVC build. Import library is used only -# with MSVC build. -# -# Example: -# arrow_build_import_library_name(ARROW_IMPORT_LIBRARY_NAME arrow) -# # -> ARROW_IMPORT_LIBRARY_NAME=arrow on Linux (meaningless) -# # -> ARROW_IMPORT_LIBRARY_NAME=arrow on macOS (meaningless) -# # -> ARROW_IMPORT_LIBRARY_NAME=arrow.lib with MSVC on Windows -# # -> ARROW_IMPORT_LIBRARY_NAME=libarrow.dll.a with MinGW on Windows -function(arrow_build_import_library_name output_variable base_name) - set(${output_variable} - "${CMAKE_IMPORT_LIBRARY_PREFIX}${base_name}${CMAKE_IMPORT_LIBRARY_SUFFIX}" - PARENT_SCOPE) -endfunction() - -# Internal function. -# -# Set static library name for ${base_name} to ${output_variable}. -# -# Example: -# arrow_build_static_library_name(ARROW_STATIC_LIBRARY_NAME arrow) -# # -> ARROW_STATIC_LIBRARY_NAME=libarrow.a on Linux -# # -> ARROW_STATIC_LIBRARY_NAME=libarrow.a on macOS -# # -> ARROW_STATIC_LIBRARY_NAME=arrow.lib with MSVC on Windows -# # -> ARROW_STATIC_LIBRARY_NAME=libarrow.dll.a with MinGW on Windows -function(arrow_build_static_library_name output_variable base_name) - set( - ${output_variable} - "${CMAKE_STATIC_LIBRARY_PREFIX}${base_name}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}" - PARENT_SCOPE) -endfunction() - -# Internal function. -# -# Set macro value for ${macro_name} in ${header_content} to ${output_variable}. -# -# Example: -# arrow_extract_macro_value(version_major -# "ARROW_VERSION_MAJOR" -# "#define ARROW_VERSION_MAJOR 1.0.0") -# # -> version_major=1.0.0 -function(arrow_extract_macro_value output_variable macro_name header_content) - string(REGEX MATCH "#define +${macro_name} +[^\r\n]+" macro_definition - "${header_content}") - string(REGEX - REPLACE "^#define +${macro_name} +(.+)$" "\\1" macro_value "${macro_definition}") - set(${output_variable} "${macro_value}" PARENT_SCOPE) -endfunction() - -# Internal macro only for arrow_find_package. -# -# Find package in HOME. -macro(arrow_find_package_home) - find_path(${prefix}_include_dir "${header_path}" - PATHS "${home}" - PATH_SUFFIXES "include" - NO_DEFAULT_PATH) - set(include_dir "${${prefix}_include_dir}") - set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE) - - if(MSVC) - set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES}) - # .dll isn't found by find_library with MSVC because .dll isn't included in - # CMAKE_FIND_LIBRARY_SUFFIXES. - list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}") - endif() - find_library(${prefix}_shared_lib - NAMES "${shared_lib_name}" - PATHS "${home}" - PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES} - NO_DEFAULT_PATH) - if(MSVC) - set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL}) - endif() - set(shared_lib "${${prefix}_shared_lib}") - set(${prefix}_SHARED_LIB "${shared_lib}" PARENT_SCOPE) - if(shared_lib) - add_library(${target_shared} SHARED IMPORTED) - set_target_properties(${target_shared} PROPERTIES IMPORTED_LOCATION "${shared_lib}") - if(include_dir) - set_target_properties(${target_shared} - PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}") - endif() - find_library(${prefix}_import_lib - NAMES "${import_lib_name}" - PATHS "${home}" - PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES} - NO_DEFAULT_PATH) - set(import_lib "${${prefix}_import_lib}") - set(${prefix}_IMPORT_LIB "${import_lib}" PARENT_SCOPE) - if(import_lib) - set_target_properties(${target_shared} PROPERTIES IMPORTED_IMPLIB "${import_lib}") - endif() - endif() - - find_library(${prefix}_static_lib - NAMES "${static_lib_name}" - PATHS "${home}" - PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES} - NO_DEFAULT_PATH) - set(static_lib "${${prefix}_static_lib}") - set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE) - if(static_lib) - add_library(${target_static} STATIC IMPORTED) - set_target_properties(${target_static} PROPERTIES IMPORTED_LOCATION "${static_lib}") - if(include_dir) - set_target_properties(${target_static} - PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}") - endif() - endif() -endmacro() - -# Internal macro only for arrow_find_package. -# -# Find package by CMake package configuration. -macro(arrow_find_package_cmake_package_configuration) - find_package(${cmake_package_name} CONFIG) - if(${cmake_package_name}_FOUND) - set(${prefix}_USE_CMAKE_PACKAGE_CONFIG TRUE PARENT_SCOPE) - if(TARGET ${target_shared}) - foreach(suffix ${ARROW_CONFIG_SUFFIXES}) - get_target_property(shared_lib ${target_shared} IMPORTED_LOCATION${suffix}) - if(shared_lib) - # Remove shared library version: - # libarrow.so.100.0.0 -> libarrow.so - # Because ARROW_HOME and pkg-config approaches don't add - # shared library version. - string(REGEX - REPLACE "(${CMAKE_SHARED_LIBRARY_SUFFIX})[.0-9]+$" "\\1" shared_lib - "${shared_lib}") - set(${prefix}_SHARED_LIB "${shared_lib}" PARENT_SCOPE) - break() - endif() - endforeach() - endif() - if(TARGET ${target_static}) - foreach(suffix ${ARROW_CONFIG_SUFFIXES}) - get_target_property(static_lib ${target_static} IMPORTED_LOCATION${suffix}) - if(static_lib) - set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE) - break() - endif() - endforeach() - endif() - endif() -endmacro() - -# Internal macro only for arrow_find_package. -# -# Find package by pkg-config. -macro(arrow_find_package_pkg_config) - pkg_check_modules(${prefix}_PC ${pkg_config_name}) - if(${prefix}_PC_FOUND) - set(${prefix}_USE_PKG_CONFIG TRUE PARENT_SCOPE) - - set(include_dir "${${prefix}_PC_INCLUDEDIR}") - set(lib_dir "${${prefix}_PC_LIBDIR}") - set(shared_lib_paths "${${prefix}_PC_LINK_LIBRARIES}") - # Use the first shared library path as the IMPORTED_LOCATION - # for ${target_shared}. This assumes that the first shared library - # path is the shared library path for this module. - list(GET shared_lib_paths 0 first_shared_lib_path) - # Use the rest shared library paths as the INTERFACE_LINK_LIBRARIES - # for ${target_shared}. This assumes that the rest shared library - # paths are dependency library paths for this module. - list(LENGTH shared_lib_paths n_shared_lib_paths) - if(n_shared_lib_paths LESS_EQUAL 1) - set(rest_shared_lib_paths) - else() - list(SUBLIST - shared_lib_paths - 1 - -1 - rest_shared_lib_paths) - endif() - - set(${prefix}_VERSION "${${prefix}_PC_VERSION}" PARENT_SCOPE) - set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE) - set(${prefix}_SHARED_LIB "${first_shared_lib_path}" PARENT_SCOPE) - - add_library(${target_shared} SHARED IMPORTED) - set_target_properties(${target_shared} - PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "${include_dir}" - INTERFACE_LINK_LIBRARIES - "${rest_shared_lib_paths}" - IMPORTED_LOCATION - "${first_shared_lib_path}") - get_target_property(shared_lib ${target_shared} IMPORTED_LOCATION) - - find_library(${prefix}_static_lib - NAMES "${static_lib_name}" - PATHS "${lib_dir}" - NO_DEFAULT_PATH) - set(static_lib "${${prefix}_static_lib}") - set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE) - if(static_lib) - add_library(${target_static} STATIC IMPORTED) - set_target_properties(${target_static} - PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}" - IMPORTED_LOCATION "${static_lib}") - endif() - endif() -endmacro() - -function(arrow_find_package - prefix - home - base_name - header_path - cmake_package_name - pkg_config_name) - arrow_build_shared_library_name(shared_lib_name ${base_name}) - arrow_build_import_library_name(import_lib_name ${base_name}) - arrow_build_static_library_name(static_lib_name ${base_name}) - - set(target_shared ${base_name}_shared) - set(target_static ${base_name}_static) - - if(home) - arrow_find_package_home() - set(${prefix}_FIND_APPROACH "HOME: ${home}" PARENT_SCOPE) - else() - arrow_find_package_cmake_package_configuration() - if(${cmake_package_name}_FOUND) - set(${prefix}_FIND_APPROACH - "CMake package configuration: ${cmake_package_name}" - PARENT_SCOPE) - else() - arrow_find_package_pkg_config() - set(${prefix}_FIND_APPROACH "pkg-config: ${pkg_config_name}" PARENT_SCOPE) - endif() - endif() - - if(NOT include_dir) - if(TARGET ${target_shared}) - get_target_property(include_dir ${target_shared} INTERFACE_INCLUDE_DIRECTORIES) - elseif(TARGET ${target_static}) - get_target_property(include_dir ${target_static} INTERFACE_INCLUDE_DIRECTORIES) - endif() - endif() - if(include_dir) - set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE) - endif() - - if(shared_lib) - get_filename_component(lib_dir "${shared_lib}" DIRECTORY) - elseif(static_lib) - get_filename_component(lib_dir "${static_lib}" DIRECTORY) - else() - set(lib_dir NOTFOUND) - endif() - set(${prefix}_LIB_DIR "${lib_dir}" PARENT_SCOPE) - # For backward compatibility - set(${prefix}_LIBS "${lib_dir}" PARENT_SCOPE) -endfunction() - -if(NOT "$ENV{ARROW_HOME}" STREQUAL "") - file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME) -endif() -arrow_find_package(ARROW - "${ARROW_HOME}" - arrow - arrow/api.h - Arrow - arrow) - -if(ARROW_HOME) - if(ARROW_INCLUDE_DIR) - file(READ "${ARROW_INCLUDE_DIR}/arrow/util/config.h" ARROW_CONFIG_H_CONTENT) - arrow_extract_macro_value(ARROW_VERSION_MAJOR "ARROW_VERSION_MAJOR" - "${ARROW_CONFIG_H_CONTENT}") - arrow_extract_macro_value(ARROW_VERSION_MINOR "ARROW_VERSION_MINOR" - "${ARROW_CONFIG_H_CONTENT}") - arrow_extract_macro_value(ARROW_VERSION_PATCH "ARROW_VERSION_PATCH" - "${ARROW_CONFIG_H_CONTENT}") - if("${ARROW_VERSION_MAJOR}" STREQUAL "" - OR "${ARROW_VERSION_MINOR}" STREQUAL "" - OR "${ARROW_VERSION_PATCH}" STREQUAL "") - set(ARROW_VERSION "0.0.0") - else() - set(ARROW_VERSION - "${ARROW_VERSION_MAJOR}.${ARROW_VERSION_MINOR}.${ARROW_VERSION_PATCH}") - endif() - - arrow_extract_macro_value(ARROW_SO_VERSION_QUOTED "ARROW_SO_VERSION" - "${ARROW_CONFIG_H_CONTENT}") - string(REGEX REPLACE "^\"(.+)\"$" "\\1" ARROW_SO_VERSION "${ARROW_SO_VERSION_QUOTED}") - arrow_extract_macro_value(ARROW_FULL_SO_VERSION_QUOTED "ARROW_FULL_SO_VERSION" - "${ARROW_CONFIG_H_CONTENT}") - string(REGEX - REPLACE "^\"(.+)\"$" "\\1" ARROW_FULL_SO_VERSION - "${ARROW_FULL_SO_VERSION_QUOTED}") - endif() -else() - if(ARROW_USE_CMAKE_PACKAGE_CONFIG) - find_package(Arrow CONFIG) - elseif(ARROW_USE_PKG_CONFIG) - pkg_get_variable(ARROW_SO_VERSION arrow so_version) - pkg_get_variable(ARROW_FULL_SO_VERSION arrow full_so_version) - endif() -endif() - -set(ARROW_ABI_VERSION ${ARROW_SO_VERSION}) - -mark_as_advanced(ARROW_ABI_VERSION - ARROW_CONFIG_SUFFIXES - ARROW_FULL_SO_VERSION - ARROW_IMPORT_LIB - ARROW_INCLUDE_DIR - ARROW_LIBS - ARROW_LIB_DIR - ARROW_SEARCH_LIB_PATH_SUFFIXES - ARROW_SHARED_IMP_LIB - ARROW_SHARED_LIB - ARROW_SO_VERSION - ARROW_STATIC_LIB - ARROW_VERSION - ARROW_VERSION_MAJOR - ARROW_VERSION_MINOR - ARROW_VERSION_PATCH) - -find_package_handle_standard_args(Arrow REQUIRED_VARS - # The first required variable is shown - # in the found message. So this list is - # not sorted alphabetically. - ARROW_INCLUDE_DIR - ARROW_LIB_DIR - ARROW_FULL_SO_VERSION - ARROW_SO_VERSION - VERSION_VAR - ARROW_VERSION) -set(ARROW_FOUND ${Arrow_FOUND}) - -if(Arrow_FOUND AND NOT Arrow_FIND_QUIETLY) - message(STATUS "Arrow version: ${ARROW_VERSION} (${ARROW_FIND_APPROACH})") - message(STATUS "Arrow SO and ABI version: ${ARROW_SO_VERSION}") - message(STATUS "Arrow full SO version: ${ARROW_FULL_SO_VERSION}") - message(STATUS "Found the Arrow core shared library: ${ARROW_SHARED_LIB}") - message(STATUS "Found the Arrow core import library: ${ARROW_IMPORT_LIB}") - message(STATUS "Found the Arrow core static library: ${ARROW_STATIC_LIB}") -endif() diff --git a/cmake/Modules/FindICU.cmake b/cmake/Modules/FindICU.cmake deleted file mode 100644 index 0e61b3dcf29d..000000000000 --- a/cmake/Modules/FindICU.cmake +++ /dev/null @@ -1,394 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. - -#.rst: -# FindICU -# ------- -# -# Find the International Components for Unicode (ICU) libraries and -# programs. -# -# This module supports multiple components. -# Components can include any of: ``data``, ``i18n``, ``io``, ``le``, -# ``lx``, ``test``, ``tu`` and ``uc``. -# -# Note that on Windows ``data`` is named ``dt`` and ``i18n`` is named -# ``in``; any of the names may be used, and the appropriate -# platform-specific library name will be automatically selected. -# -# This module reports information about the ICU installation in -# several variables. General variables:: -# -# ICU_VERSION - ICU release version -# ICU_FOUND - true if the main programs and libraries were found -# ICU_LIBRARIES - component libraries to be linked -# ICU_INCLUDE_DIRS - the directories containing the ICU headers -# -# Imported targets:: -# -# ICU:: -# -# Where ```` is the name of an ICU component, for example -# ``ICU::i18n``. -# -# ICU programs are reported in:: -# -# ICU_GENCNVAL_EXECUTABLE - path to gencnval executable -# ICU_ICUINFO_EXECUTABLE - path to icuinfo executable -# ICU_GENBRK_EXECUTABLE - path to genbrk executable -# ICU_ICU-CONFIG_EXECUTABLE - path to icu-config executable -# ICU_GENRB_EXECUTABLE - path to genrb executable -# ICU_GENDICT_EXECUTABLE - path to gendict executable -# ICU_DERB_EXECUTABLE - path to derb executable -# ICU_PKGDATA_EXECUTABLE - path to pkgdata executable -# ICU_UCONV_EXECUTABLE - path to uconv executable -# ICU_GENCFU_EXECUTABLE - path to gencfu executable -# ICU_MAKECONV_EXECUTABLE - path to makeconv executable -# ICU_GENNORM2_EXECUTABLE - path to gennorm2 executable -# ICU_GENCCODE_EXECUTABLE - path to genccode executable -# ICU_GENSPREP_EXECUTABLE - path to gensprep executable -# ICU_ICUPKG_EXECUTABLE - path to icupkg executable -# ICU_GENCMN_EXECUTABLE - path to gencmn executable -# -# ICU component libraries are reported in:: -# -# ICU__FOUND - ON if component was found -# ICU__LIBRARIES - libraries for component -# -# ICU datafiles are reported in:: -# -# ICU_MAKEFILE_INC - Makefile.inc -# ICU_PKGDATA_INC - pkgdata.inc -# -# Note that ```` is the uppercased name of the component. -# -# This module reads hints about search results from:: -# -# ICU_ROOT - the root of the ICU installation -# -# The environment variable ``ICU_ROOT`` may also be used; the -# ICU_ROOT variable takes precedence. -# -# The following cache variables may also be set:: -# -# ICU_

_EXECUTABLE - the path to executable

-# ICU_INCLUDE_DIR - the directory containing the ICU headers -# ICU__LIBRARY - the library for component -# -# .. note:: -# -# In most cases none of the above variables will require setting, -# unless multiple ICU versions are available and a specific version -# is required. -# -# Other variables one may set to control this module are:: -# -# ICU_DEBUG - Set to ON to enable debug output from FindICU. - -# Written by Roger Leigh - -set(icu_programs - gencnval - icuinfo - genbrk - icu-config - genrb - gendict - derb - pkgdata - uconv - gencfu - makeconv - gennorm2 - genccode - gensprep - icupkg - gencmn) - -set(icu_data - Makefile.inc - pkgdata.inc) - -# The ICU checks are contained in a function due to the large number -# of temporary variables needed. -function(_ICU_FIND) - # Set up search paths, taking compiler into account. Search ICU_ROOT, - # with ICU_ROOT in the environment as a fallback if unset. - if(ICU_ROOT) - list(APPEND icu_roots "${ICU_ROOT}") - else() - if(NOT "$ENV{ICU_ROOT}" STREQUAL "") - file(TO_CMAKE_PATH "$ENV{ICU_ROOT}" NATIVE_PATH) - list(APPEND icu_roots "${NATIVE_PATH}") - set(ICU_ROOT "${NATIVE_PATH}" - CACHE PATH "Location of the ICU installation" FORCE) - endif() - endif() - - # Find include directory - list(APPEND icu_include_suffixes "include") - find_path(ICU_INCLUDE_DIR - NAMES "unicode/utypes.h" - HINTS ${icu_roots} - PATH_SUFFIXES ${icu_include_suffixes} - DOC "ICU include directory") - set(ICU_INCLUDE_DIR "${ICU_INCLUDE_DIR}" PARENT_SCOPE) - - # Get version - if(ICU_INCLUDE_DIR AND EXISTS "${ICU_INCLUDE_DIR}/unicode/uvernum.h") - file(STRINGS "${ICU_INCLUDE_DIR}/unicode/uvernum.h" icu_header_str - REGEX "^#define[\t ]+U_ICU_VERSION[\t ]+\".*\".*") - - string(REGEX REPLACE "^#define[\t ]+U_ICU_VERSION[\t ]+\"([^ \\n]*)\".*" - "\\1" icu_version_string "${icu_header_str}") - set(ICU_VERSION "${icu_version_string}") - set(ICU_VERSION "${icu_version_string}" PARENT_SCOPE) - unset(icu_header_str) - unset(icu_version_string) - endif() - - if(CMAKE_SIZEOF_VOID_P EQUAL 8) - # 64-bit binary directory - set(_bin64 "bin64") - # 64-bit library directory - set(_lib64 "lib64") - endif() - - - # Find all ICU programs - list(APPEND icu_binary_suffixes "${_bin64}" "bin") - foreach(program ${icu_programs}) - string(TOUPPER "${program}" program_upcase) - set(cache_var "ICU_${program_upcase}_EXECUTABLE") - set(program_var "ICU_${program_upcase}_EXECUTABLE") - find_program("${cache_var}" "${program}" - HINTS ${icu_roots} - PATH_SUFFIXES ${icu_binary_suffixes} - DOC "ICU ${program} executable") - mark_as_advanced(cache_var) - set("${program_var}" "${${cache_var}}" PARENT_SCOPE) - endforeach() - - # Find all ICU libraries - list(APPEND icu_library_suffixes "${_lib64}" "lib") - set(ICU_REQUIRED_LIBS_FOUND ON) - foreach(component ${ICU_FIND_COMPONENTS}) - string(TOUPPER "${component}" component_upcase) - set(component_cache "ICU_${component_upcase}_LIBRARY") - set(component_cache_release "${component_cache}_RELEASE") - set(component_cache_debug "${component_cache}_DEBUG") - set(component_found "${component_upcase}_FOUND") - set(component_libnames "icu${component}") - set(component_debug_libnames "icu${component}d") - - # Special case deliberate library naming mismatches between Unix - # and Windows builds - unset(component_libnames) - unset(component_debug_libnames) - list(APPEND component_libnames "icu${component}") - list(APPEND component_debug_libnames "icu${component}d") - if(component STREQUAL "data") - list(APPEND component_libnames "icudt") - # Note there is no debug variant at present - list(APPEND component_debug_libnames "icudtd") - endif() - if(component STREQUAL "dt") - list(APPEND component_libnames "icudata") - # Note there is no debug variant at present - list(APPEND component_debug_libnames "icudatad") - endif() - if(component STREQUAL "i18n") - list(APPEND component_libnames "icuin") - list(APPEND component_debug_libnames "icuind") - endif() - if(component STREQUAL "in") - list(APPEND component_libnames "icui18n") - list(APPEND component_debug_libnames "icui18nd") - endif() - - find_library("${component_cache_release}" ${component_libnames} - HINTS ${icu_roots} - PATH_SUFFIXES ${icu_library_suffixes} - DOC "ICU ${component} library (release)") - find_library("${component_cache_debug}" ${component_debug_libnames} - HINTS ${icu_roots} - PATH_SUFFIXES ${icu_library_suffixes} - DOC "ICU ${component} library (debug)") - include(SelectLibraryConfigurations) - select_library_configurations(ICU_${component_upcase}) - mark_as_advanced("${component_cache_release}" "${component_cache_debug}") - if(${component_cache}) - set("${component_found}" ON) - list(APPEND ICU_LIBRARY "${${component_cache}}") - endif() - mark_as_advanced("${component_found}") - set("${component_cache}" "${${component_cache}}" PARENT_SCOPE) - set("${component_found}" "${${component_found}}" PARENT_SCOPE) - if(${component_found}) - if (ICU_FIND_REQUIRED_${component}) - list(APPEND ICU_LIBS_FOUND "${component} (required)") - else() - list(APPEND ICU_LIBS_FOUND "${component} (optional)") - endif() - else() - if (ICU_FIND_REQUIRED_${component}) - set(ICU_REQUIRED_LIBS_FOUND OFF) - list(APPEND ICU_LIBS_NOTFOUND "${component} (required)") - else() - list(APPEND ICU_LIBS_NOTFOUND "${component} (optional)") - endif() - endif() - endforeach() - set(_ICU_REQUIRED_LIBS_FOUND "${ICU_REQUIRED_LIBS_FOUND}" PARENT_SCOPE) - set(ICU_LIBRARY "${ICU_LIBRARY}" PARENT_SCOPE) - - # Find all ICU data files - if(CMAKE_LIBRARY_ARCHITECTURE) - list(APPEND icu_data_suffixes - "${_lib64}/${CMAKE_LIBRARY_ARCHITECTURE}/icu/${ICU_VERSION}" - "lib/${CMAKE_LIBRARY_ARCHITECTURE}/icu/${ICU_VERSION}" - "${_lib64}/${CMAKE_LIBRARY_ARCHITECTURE}/icu" - "lib/${CMAKE_LIBRARY_ARCHITECTURE}/icu") - endif() - list(APPEND icu_data_suffixes - "${_lib64}/icu/${ICU_VERSION}" - "lib/icu/${ICU_VERSION}" - "${_lib64}/icu" - "lib/icu") - foreach(data ${icu_data}) - string(TOUPPER "${data}" data_upcase) - string(REPLACE "." "_" data_upcase "${data_upcase}") - set(cache_var "ICU_${data_upcase}") - set(data_var "ICU_${data_upcase}") - find_file("${cache_var}" "${data}" - HINTS ${icu_roots} - PATH_SUFFIXES ${icu_data_suffixes} - DOC "ICU ${data} data file") - mark_as_advanced(cache_var) - set("${data_var}" "${${cache_var}}" PARENT_SCOPE) - endforeach() - - if(NOT ICU_FIND_QUIETLY) - if(ICU_LIBS_FOUND) - message(STATUS "Found the following ICU libraries:") - foreach(found ${ICU_LIBS_FOUND}) - message(STATUS " ${found}") - endforeach() - endif() - if(ICU_LIBS_NOTFOUND) - message(STATUS "The following ICU libraries were not found:") - foreach(notfound ${ICU_LIBS_NOTFOUND}) - message(STATUS " ${notfound}") - endforeach() - endif() - endif() - - if(ICU_DEBUG) - message(STATUS "--------FindICU.cmake search debug--------") - message(STATUS "ICU binary path search order: ${icu_roots}") - message(STATUS "ICU include path search order: ${icu_roots}") - message(STATUS "ICU library path search order: ${icu_roots}") - message(STATUS "----------------") - endif() -endfunction() - -_ICU_FIND() - -include(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(ICU - FOUND_VAR ICU_FOUND - REQUIRED_VARS ICU_INCLUDE_DIR - ICU_LIBRARY - _ICU_REQUIRED_LIBS_FOUND - VERSION_VAR ICU_VERSION - FAIL_MESSAGE "Failed to find all ICU components") - -unset(_ICU_REQUIRED_LIBS_FOUND) - -if(ICU_FOUND) - set(ICU_INCLUDE_DIRS "${ICU_INCLUDE_DIR}") - set(ICU_LIBRARIES "${ICU_LIBRARY}") - foreach(_ICU_component ${ICU_FIND_COMPONENTS}) - string(TOUPPER "${_ICU_component}" _ICU_component_upcase) - set(_ICU_component_cache "ICU_${_ICU_component_upcase}_LIBRARY") - set(_ICU_component_cache_release "ICU_${_ICU_component_upcase}_LIBRARY_RELEASE") - set(_ICU_component_cache_debug "ICU_${_ICU_component_upcase}_LIBRARY_DEBUG") - set(_ICU_component_lib "ICU_${_ICU_component_upcase}_LIBRARIES") - set(_ICU_component_found "${_ICU_component_upcase}_FOUND") - set(_ICU_imported_target "ICU::${_ICU_component}") - if(${_ICU_component_found}) - set("${_ICU_component_lib}" "${${_ICU_component_cache}}") - if(NOT TARGET ${_ICU_imported_target}) - add_library(${_ICU_imported_target} UNKNOWN IMPORTED) - if(ICU_INCLUDE_DIR) - set_target_properties(${_ICU_imported_target} PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${ICU_INCLUDE_DIR}") - endif() - if(EXISTS "${${_ICU_component_cache}}") - set_target_properties(${_ICU_imported_target} PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" - IMPORTED_LOCATION "${${_ICU_component_cache}}") - endif() - if(EXISTS "${${_ICU_component_cache_release}}") - set_property(TARGET ${_ICU_imported_target} APPEND PROPERTY - IMPORTED_CONFIGURATIONS RELEASE) - set_target_properties(${_ICU_imported_target} PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" - IMPORTED_LOCATION_RELEASE "${${_ICU_component_cache_release}}") - endif() - if(EXISTS "${${_ICU_component_cache_debug}}") - set_property(TARGET ${_ICU_imported_target} APPEND PROPERTY - IMPORTED_CONFIGURATIONS DEBUG) - set_target_properties(${_ICU_imported_target} PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "CXX" - IMPORTED_LOCATION_DEBUG "${${_ICU_component_cache_debug}}") - endif() - endif() - endif() - unset(_ICU_component_upcase) - unset(_ICU_component_cache) - unset(_ICU_component_lib) - unset(_ICU_component_found) - unset(_ICU_imported_target) - endforeach() -endif() - -if(ICU_DEBUG) - message(STATUS "--------FindICU.cmake results debug--------") - message(STATUS "ICU found: ${ICU_FOUND}") - message(STATUS "ICU_VERSION number: ${ICU_VERSION}") - message(STATUS "ICU_ROOT directory: ${ICU_ROOT}") - message(STATUS "ICU_INCLUDE_DIR directory: ${ICU_INCLUDE_DIR}") - message(STATUS "ICU_LIBRARIES: ${ICU_LIBRARIES}") - - foreach(program IN LISTS icu_programs) - string(TOUPPER "${program}" program_upcase) - set(program_lib "ICU_${program_upcase}_EXECUTABLE") - message(STATUS "${program} program: ${${program_lib}}") - unset(program_upcase) - unset(program_lib) - endforeach() - - foreach(data IN LISTS icu_data) - string(TOUPPER "${data}" data_upcase) - string(REPLACE "." "_" data_upcase "${data_upcase}") - set(data_lib "ICU_${data_upcase}") - message(STATUS "${data} data: ${${data_lib}}") - unset(data_upcase) - unset(data_lib) - endforeach() - - foreach(component IN LISTS ICU_FIND_COMPONENTS) - string(TOUPPER "${component}" component_upcase) - set(component_lib "ICU_${component_upcase}_LIBRARIES") - set(component_found "${component_upcase}_FOUND") - message(STATUS "${component} library found: ${${component_found}}") - message(STATUS "${component} library: ${${component_lib}}") - unset(component_upcase) - unset(component_lib) - unset(component_found) - endforeach() - message(STATUS "----------------") -endif() - -unset(icu_programs) diff --git a/cmake/Modules/FindOpenLDAP.cmake b/cmake/Modules/FindOpenLDAP.cmake deleted file mode 100644 index 9c6262fa245d..000000000000 --- a/cmake/Modules/FindOpenLDAP.cmake +++ /dev/null @@ -1,55 +0,0 @@ -# Find OpenLDAP libraries. -# -# Can be configured with: -# OPENLDAP_ROOT_DIR - path to the OpenLDAP installation prefix -# OPENLDAP_USE_STATIC_LIBS - look for static version of the libraries -# OPENLDAP_USE_REENTRANT_LIBS - look for thread-safe version of the libraries -# -# Sets values of: -# OPENLDAP_FOUND - TRUE if found -# OPENLDAP_INCLUDE_DIRS - paths to the include directories -# OPENLDAP_LIBRARIES - paths to the libldap and liblber libraries -# OPENLDAP_LDAP_LIBRARY - paths to the libldap library -# OPENLDAP_LBER_LIBRARY - paths to the liblber library -# - -if(OPENLDAP_USE_STATIC_LIBS) - set(_orig_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) - if(WIN32) - set(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".a" ${CMAKE_FIND_LIBRARY_SUFFIXES}) - else() - set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") - endif() -endif() - -set(_r_suffix) -if(OPENLDAP_USE_REENTRANT_LIBS) - set(_r_suffix "_r") -endif() - -if(OPENLDAP_ROOT_DIR) - find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "include" NO_DEFAULT_PATH) - find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) - find_library(OPENLDAP_LBER_LIBRARY NAMES "lber" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) -else() - find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h") - find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}") - find_library(OPENLDAP_LBER_LIBRARY NAMES "lber") -endif() - -unset(_r_suffix) - -set(OPENLDAP_LIBRARIES ${OPENLDAP_LDAP_LIBRARY} ${OPENLDAP_LBER_LIBRARY}) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args( - OpenLDAP DEFAULT_MSG - OPENLDAP_INCLUDE_DIRS OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY -) - -mark_as_advanced(OPENLDAP_INCLUDE_DIRS OPENLDAP_LIBRARIES OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY) - -if(OPENLDAP_USE_STATIC_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_orig_CMAKE_FIND_LIBRARY_SUFFIXES}) - unset(_orig_CMAKE_FIND_LIBRARY_SUFFIXES) -endif() diff --git a/cmake/Modules/FindParquet.cmake b/cmake/Modules/FindParquet.cmake deleted file mode 100644 index 654020c0b873..000000000000 --- a/cmake/Modules/FindParquet.cmake +++ /dev/null @@ -1,132 +0,0 @@ -# https://github.com/apache/arrow/blob/master/cpp/cmake_modules/FindParquet.cmake - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# - Find Parquet (parquet/api/reader.h, libparquet.a, libparquet.so) -# -# This module requires Arrow from which it uses -# arrow_find_package() -# -# This module defines -# PARQUET_FOUND, whether Parquet has been found -# PARQUET_IMPORT_LIB, path to libparquet's import library (Windows only) -# PARQUET_INCLUDE_DIR, directory containing headers -# PARQUET_LIBS, deprecated. Use PARQUET_LIB_DIR instead -# PARQUET_LIB_DIR, directory containing Parquet libraries -# PARQUET_SHARED_IMP_LIB, deprecated. Use PARQUET_IMPORT_LIB instead -# PARQUET_SHARED_LIB, path to libparquet's shared library -# PARQUET_SO_VERSION, shared object version of found Parquet such as "100" -# PARQUET_STATIC_LIB, path to libparquet.a - -if(DEFINED PARQUET_FOUND) - return() -endif() - -set(find_package_arguments) -if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION) - list(APPEND find_package_arguments "${${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION}") -endif() -if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) - list(APPEND find_package_arguments REQUIRED) -endif() -if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - list(APPEND find_package_arguments QUIET) -endif() -find_package(Arrow ${find_package_arguments}) - -if(NOT "$ENV{PARQUET_HOME}" STREQUAL "") - file(TO_CMAKE_PATH "$ENV{PARQUET_HOME}" PARQUET_HOME) -endif() - -if((NOT PARQUET_HOME) AND ARROW_HOME) - set(PARQUET_HOME ${ARROW_HOME}) -endif() - -if(ARROW_FOUND) - arrow_find_package(PARQUET - "${PARQUET_HOME}" - parquet - parquet/api/reader.h - Parquet - parquet) - if(PARQUET_HOME) - if(PARQUET_INCLUDE_DIR) - file(READ "${PARQUET_INCLUDE_DIR}/parquet/parquet_version.h" - PARQUET_VERSION_H_CONTENT) - arrow_extract_macro_value(PARQUET_VERSION_MAJOR "PARQUET_VERSION_MAJOR" - "${PARQUET_VERSION_H_CONTENT}") - arrow_extract_macro_value(PARQUET_VERSION_MINOR "PARQUET_VERSION_MINOR" - "${PARQUET_VERSION_H_CONTENT}") - arrow_extract_macro_value(PARQUET_VERSION_PATCH "PARQUET_VERSION_PATCH" - "${PARQUET_VERSION_H_CONTENT}") - if("${PARQUET_VERSION_MAJOR}" STREQUAL "" - OR "${PARQUET_VERSION_MINOR}" STREQUAL "" - OR "${PARQUET_VERSION_PATCH}" STREQUAL "") - set(PARQUET_VERSION "0.0.0") - else() - set(PARQUET_VERSION - "${PARQUET_VERSION_MAJOR}.${PARQUET_VERSION_MINOR}.${PARQUET_VERSION_PATCH}") - endif() - - arrow_extract_macro_value(PARQUET_SO_VERSION_QUOTED "PARQUET_SO_VERSION" - "${PARQUET_VERSION_H_CONTENT}") - string(REGEX - REPLACE "^\"(.+)\"$" "\\1" PARQUET_SO_VERSION "${PARQUET_SO_VERSION_QUOTED}") - arrow_extract_macro_value(PARQUET_FULL_SO_VERSION_QUOTED "PARQUET_FULL_SO_VERSION" - "${PARQUET_VERSION_H_CONTENT}") - string(REGEX - REPLACE "^\"(.+)\"$" "\\1" PARQUET_FULL_SO_VERSION - "${PARQUET_FULL_SO_VERSION_QUOTED}") - endif() - else() - if(PARQUET_USE_CMAKE_PACKAGE_CONFIG) - find_package(Parquet CONFIG) - elseif(PARQUET_USE_PKG_CONFIG) - pkg_get_variable(PARQUET_SO_VERSION parquet so_version) - pkg_get_variable(PARQUET_FULL_SO_VERSION parquet full_so_version) - endif() - endif() - set(PARQUET_ABI_VERSION "${PARQUET_SO_VERSION}") -endif() - -mark_as_advanced(PARQUET_ABI_VERSION - PARQUET_IMPORT_LIB - PARQUET_INCLUDE_DIR - PARQUET_LIBS - PARQUET_LIB_DIR - PARQUET_SHARED_IMP_LIB - PARQUET_SHARED_LIB - PARQUET_SO_VERSION - PARQUET_STATIC_LIB - PARQUET_VERSION) - -find_package_handle_standard_args(Parquet - REQUIRED_VARS - PARQUET_INCLUDE_DIR - PARQUET_LIB_DIR - PARQUET_SO_VERSION - VERSION_VAR - PARQUET_VERSION) -set(PARQUET_FOUND ${Parquet_FOUND}) - -if(Parquet_FOUND AND NOT Parquet_FIND_QUIETLY) - message(STATUS "Parquet version: ${PARQUET_VERSION} (${PARQUET_FIND_APPROACH})") - message(STATUS "Found the Parquet shared library: ${PARQUET_SHARED_LIB}") - message(STATUS "Found the Parquet import library: ${PARQUET_IMPORT_LIB}") - message(STATUS "Found the Parquet static library: ${PARQUET_STATIC_LIB}") -endif() diff --git a/cmake/Modules/Findcityhash.cmake b/cmake/Modules/Findcityhash.cmake deleted file mode 100644 index 5250df2e0a65..000000000000 --- a/cmake/Modules/Findcityhash.cmake +++ /dev/null @@ -1,44 +0,0 @@ -# - Try to find cityhash headers and libraries. -# -# Usage of this module as follows: -# -# find_package(cityhash) -# -# Variables used by this module, they can change the default behaviour and need -# to be set before calling find_package: -# -# CITYHASH_ROOT_DIR Set this variable to the root installation of -# cityhash if the module has problems finding -# the proper installation path. -# -# Variables defined by this module: -# -# CITYHASH_FOUND System has cityhash libs/headers -# CITYHASH_LIBRARIES The cityhash library/libraries -# CITYHASH_INCLUDE_DIR The location of cityhash headers - -find_path(CITYHASH_ROOT_DIR - NAMES include/city.h -) - -find_library(CITYHASH_LIBRARIES - NAMES cityhash - PATHS ${CITYHASH_ROOT_DIR}/lib ${CITYHASH_LIBRARIES_PATHS} -) - -find_path(CITYHASH_INCLUDE_DIR - NAMES city.h - PATHS ${CITYHASH_ROOT_DIR}/include ${CITYHASH_INCLUDE_PATHS} -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(cityhash DEFAULT_MSG - CITYHASH_LIBRARIES - CITYHASH_INCLUDE_DIR -) - -mark_as_advanced( - CITYHASH_ROOT_DIR - CITYHASH_LIBRARIES - CITYHASH_INCLUDE_DIR -) diff --git a/cmake/Modules/Finddouble-conversion.cmake b/cmake/Modules/Finddouble-conversion.cmake deleted file mode 100644 index cb01be0f25b6..000000000000 --- a/cmake/Modules/Finddouble-conversion.cmake +++ /dev/null @@ -1,44 +0,0 @@ -# - Try to find double-conversion headers and libraries. -# -# Usage of this module as follows: -# -# find_package(double-conversion) -# -# Variables used by this module, they can change the default behaviour and need -# to be set before calling find_package: -# -# DOUBLE_CONVERSION_ROOT_DIR Set this variable to the root installation of -# double-conversion if the module has problems finding -# the proper installation path. -# -# Variables defined by this module: -# -# DOUBLE_CONVERSION_FOUND System has double-conversion libs/headers -# DOUBLE_CONVERSION_LIBRARIES The double-conversion library/libraries -# DOUBLE_CONVERSION_INCLUDE_DIR The location of double-conversion headers - -find_path(DOUBLE_CONVERSION_ROOT_DIR - NAMES include/double-conversion/double-conversion.h -) - -find_library(DOUBLE_CONVERSION_LIBRARIES - NAMES double-conversion - PATHS ${DOUBLE_CONVERSION_ROOT_DIR}/lib ${BTRIE_CITYHASH_PATHS} -) - -find_path(DOUBLE_CONVERSION_INCLUDE_DIR - NAMES double-conversion/double-conversion.h - PATHS ${DOUBLE_CONVERSION_ROOT_DIR}/include ${DOUBLE_CONVERSION_INCLUDE_PATHS} -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(double_conversion DEFAULT_MSG - DOUBLE_CONVERSION_LIBRARIES - DOUBLE_CONVERSION_INCLUDE_DIR -) - -mark_as_advanced( - DOUBLE_CONVERSION_ROOT_DIR - DOUBLE_CONVERSION_LIBRARIES - DOUBLE_CONVERSION_INCLUDE_DIR -) diff --git a/cmake/Modules/Findfarmhash.cmake b/cmake/Modules/Findfarmhash.cmake deleted file mode 100644 index 2b45fde2c67a..000000000000 --- a/cmake/Modules/Findfarmhash.cmake +++ /dev/null @@ -1,44 +0,0 @@ -# - Try to find farmhash headers and libraries. -# -# Usage of this module as follows: -# -# find_package(farmhash) -# -# Variables used by this module, they can change the default behaviour and need -# to be set before calling find_package: -# -# FARMHASH_ROOT_DIR Set this variable to the root installation of -# farmhash if the module has problems finding -# the proper installation path. -# -# Variables defined by this module: -# -# FARMHASH_FOUND System has farmhash libs/headers -# FARMHASH_LIBRARIES The farmhash library/libraries -# FARMHASH_INCLUDE_DIR The location of farmhash headers - -find_path(FARMHASH_ROOT_DIR - NAMES include/farmhash.h -) - -find_library(FARMHASH_LIBRARIES - NAMES farmhash - PATHS ${FARMHASH_ROOT_DIR}/lib ${FARMHASH_LIBRARIES_PATHS} -) - -find_path(FARMHASH_INCLUDE_DIR - NAMES farmhash.h - PATHS ${FARMHASH_ROOT_DIR}/include ${FARMHASH_INCLUDE_PATHS} -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(farmhash DEFAULT_MSG - FARMHASH_LIBRARIES - FARMHASH_INCLUDE_DIR -) - -mark_as_advanced( - FARMHASH_ROOT_DIR - FARMHASH_LIBRARIES - FARMHASH_INCLUDE_DIR -) diff --git a/cmake/Modules/FindgRPC.cmake b/cmake/Modules/FindgRPC.cmake deleted file mode 100644 index 945d307952b2..000000000000 --- a/cmake/Modules/FindgRPC.cmake +++ /dev/null @@ -1,337 +0,0 @@ -#[[ -Defines the following variables: -``gRPC_FOUND`` - Whether the gRPC framework is found -``gRPC_INCLUDE_DIRS`` - The include directories of the gRPC framework, including the include directories of the C++ wrapper. -``gRPC_LIBRARIES`` - The libraries of the gRPC framework. -``gRPC_CPP_PLUGIN`` - The plugin for generating gRPC client and server C++ stubs from `.proto` files -``gRPC_PYTHON_PLUGIN`` - The plugin for generating gRPC client and server Python stubs from `.proto` files - -The following :prop_tgt:`IMPORTED` targets are also defined: -``grpc++`` -``grpc++_unsecure`` -``grpc_cpp_plugin`` -``grpc_python_plugin`` - -Set the following variables to adjust the behaviour of this script: -``gRPC_USE_UNSECURE_LIBRARIES`` - if set gRPC_LIBRARIES will be filled with the unsecure version of the libraries (i.e. without SSL) - instead of the secure ones. -``gRPC_DEBUG` - if set the debug message will be printed. - -Add custom commands to process ``.proto`` files to C++:: -protobuf_generate_grpc_cpp( - [DESCRIPTORS ] [EXPORT_MACRO ] [...]) - -``SRCS`` - Variable to define with autogenerated source files -``HDRS`` - Variable to define with autogenerated header files -``DESCRIPTORS`` - Variable to define with autogenerated descriptor files, if requested. -``EXPORT_MACRO`` - is a macro which should expand to ``__declspec(dllexport)`` or - ``__declspec(dllimport)`` depending on what is being compiled. -``ARGN`` - ``.proto`` files -#]] - -# Function to generate C++ files from .proto files. -# This function is a modified version of the function PROTOBUF_GENERATE_CPP() copied from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake. -function(PROTOBUF_GENERATE_GRPC_CPP SRCS HDRS) - cmake_parse_arguments(protobuf_generate_grpc_cpp "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN}) - - set(_proto_files "${protobuf_generate_grpc_cpp_UNPARSED_ARGUMENTS}") - if(NOT _proto_files) - message(SEND_ERROR "Error: PROTOBUF_GENERATE_GRPC_CPP() called without any proto files") - return() - endif() - - if(PROTOBUF_GENERATE_GRPC_CPP_APPEND_PATH) - set(_append_arg APPEND_PATH) - endif() - - if(protobuf_generate_grpc_cpp_DESCRIPTORS) - set(_descriptors DESCRIPTORS) - endif() - - if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) - set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") - endif() - - if(DEFINED Protobuf_IMPORT_DIRS) - set(_import_arg IMPORT_DIRS ${Protobuf_IMPORT_DIRS}) - endif() - - set(_outvar) - protobuf_generate_grpc(${_append_arg} ${_descriptors} LANGUAGE cpp EXPORT_MACRO ${protobuf_generate_cpp_EXPORT_MACRO} OUT_VAR _outvar ${_import_arg} PROTOS ${_proto_files}) - - set(${SRCS}) - set(${HDRS}) - if(protobuf_generate_grpc_cpp_DESCRIPTORS) - set(${protobuf_generate_grpc_cpp_DESCRIPTORS}) - endif() - - foreach(_file ${_outvar}) - if(_file MATCHES "cc$") - list(APPEND ${SRCS} ${_file}) - elseif(_file MATCHES "desc$") - list(APPEND ${protobuf_generate_grpc_cpp_DESCRIPTORS} ${_file}) - else() - list(APPEND ${HDRS} ${_file}) - endif() - endforeach() - set(${SRCS} ${${SRCS}} PARENT_SCOPE) - set(${HDRS} ${${HDRS}} PARENT_SCOPE) - if(protobuf_generate_grpc_cpp_DESCRIPTORS) - set(${protobuf_generate_grpc_cpp_DESCRIPTORS} "${${protobuf_generate_grpc_cpp_DESCRIPTORS}}" PARENT_SCOPE) - endif() -endfunction() - -# Helper function. -# This function is a modified version of the function protobuf_generate() copied from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake. -function(protobuf_generate_grpc) - set(_options APPEND_PATH DESCRIPTORS) - set(_singleargs LANGUAGE OUT_VAR EXPORT_MACRO PROTOC_OUT_DIR) - if(COMMAND target_sources) - list(APPEND _singleargs TARGET) - endif() - set(_multiargs PROTOS IMPORT_DIRS GENERATE_EXTENSIONS) - - cmake_parse_arguments(protobuf_generate_grpc "${_options}" "${_singleargs}" "${_multiargs}" "${ARGN}") - - if(NOT protobuf_generate_grpc_PROTOS AND NOT protobuf_generate_grpc_TARGET) - message(SEND_ERROR "Error: protobuf_generate_grpc called without any targets or source files") - return() - endif() - - if(NOT protobuf_generate_grpc_OUT_VAR AND NOT protobuf_generate_grpc_TARGET) - message(SEND_ERROR "Error: protobuf_generate_grpc called without a target or output variable") - return() - endif() - - if(NOT protobuf_generate_grpc_LANGUAGE) - set(protobuf_generate_grpc_LANGUAGE cpp) - endif() - string(TOLOWER ${protobuf_generate_grpc_LANGUAGE} protobuf_generate_grpc_LANGUAGE) - - if(NOT protobuf_generate_grpc_PROTOC_OUT_DIR) - set(protobuf_generate_grpc_PROTOC_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) - endif() - - if(protobuf_generate_grpc_EXPORT_MACRO AND protobuf_generate_grpc_LANGUAGE STREQUAL cpp) - set(_dll_export_decl "dllexport_decl=${protobuf_generate_grpc_EXPORT_MACRO}:") - endif() - - if(NOT protobuf_generate_grpc_GENERATE_EXTENSIONS) - if(protobuf_generate_grpc_LANGUAGE STREQUAL cpp) - set(protobuf_generate_grpc_GENERATE_EXTENSIONS .pb.h .pb.cc .grpc.pb.h .grpc.pb.cc) - elseif(protobuf_generate_grpc_LANGUAGE STREQUAL python) - set(protobuf_generate_grpc_GENERATE_EXTENSIONS _pb2.py) - else() - message(SEND_ERROR "Error: protobuf_generate_grpc given unknown Language ${LANGUAGE}, please provide a value for GENERATE_EXTENSIONS") - return() - endif() - endif() - - if(NOT protobuf_generate_grpc_PLUGIN) - if(protobuf_generate_grpc_LANGUAGE STREQUAL cpp) - set(protobuf_generate_grpc_PLUGIN "grpc_cpp_plugin") - elseif(protobuf_generate_grpc_LANGUAGE STREQUAL python) - set(protobuf_generate_grpc_PLUGIN "grpc_python_plugin") - else() - message(SEND_ERROR "Error: protobuf_generate_grpc given unknown Language ${LANGUAGE}, please provide a value for PLUGIN") - return() - endif() - endif() - - if(protobuf_generate_grpc_TARGET) - get_target_property(_source_list ${protobuf_generate_grpc_TARGET} SOURCES) - foreach(_file ${_source_list}) - if(_file MATCHES "proto$") - list(APPEND protobuf_generate_grpc_PROTOS ${_file}) - endif() - endforeach() - endif() - - if(NOT protobuf_generate_grpc_PROTOS) - message(SEND_ERROR "Error: protobuf_generate_grpc could not find any .proto files") - return() - endif() - - if(protobuf_generate_grpc_APPEND_PATH) - # Create an include path for each file specified - foreach(_file ${protobuf_generate_grpc_PROTOS}) - get_filename_component(_abs_file ${_file} ABSOLUTE) - get_filename_component(_abs_path ${_abs_file} PATH) - list(FIND _protobuf_include_path ${_abs_path} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${_abs_path}) - endif() - endforeach() - else() - set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - - foreach(DIR ${protobuf_generate_grpc_IMPORT_DIRS}) - get_filename_component(ABS_PATH ${DIR} ABSOLUTE) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - - set(_generated_srcs_all) - foreach(_proto ${protobuf_generate_grpc_PROTOS}) - get_filename_component(_abs_file ${_proto} ABSOLUTE) - get_filename_component(_abs_dir ${_abs_file} DIRECTORY) - get_filename_component(_basename ${_proto} NAME_WE) - file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir}) - - set(_possible_rel_dir) - if(NOT protobuf_generate_grpc_APPEND_PATH) - set(_possible_rel_dir ${_rel_dir}/) - endif() - - set(_generated_srcs) - foreach(_ext ${protobuf_generate_grpc_GENERATE_EXTENSIONS}) - list(APPEND _generated_srcs "${protobuf_generate_grpc_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}${_ext}") - endforeach() - - if(protobuf_generate_grpc_DESCRIPTORS AND protobuf_generate_grpc_LANGUAGE STREQUAL cpp) - set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc") - set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}") - list(APPEND _generated_srcs ${_descriptor_file}) - endif() - list(APPEND _generated_srcs_all ${_generated_srcs}) - - add_custom_command( - OUTPUT ${_generated_srcs} - COMMAND protobuf::protoc - ARGS --${protobuf_generate_grpc_LANGUAGE}_out ${_dll_export_decl}${protobuf_generate_grpc_PROTOC_OUT_DIR} - --grpc_out ${_dll_export_decl}${protobuf_generate_grpc_PROTOC_OUT_DIR} - --plugin=protoc-gen-grpc=$ - ${_dll_desc_out} ${_protobuf_include_path} ${_abs_file} - DEPENDS ${_abs_file} protobuf::protoc ${protobuf_generate_grpc_PLUGIN} - COMMENT "Running ${protobuf_generate_grpc_LANGUAGE} protocol buffer compiler on ${_proto}" - VERBATIM) - endforeach() - - set_source_files_properties(${_generated_srcs_all} PROPERTIES GENERATED TRUE) - if(protobuf_generate_grpc_OUT_VAR) - set(${protobuf_generate_grpc_OUT_VAR} ${_generated_srcs_all} PARENT_SCOPE) - endif() - if(protobuf_generate_grpc_TARGET) - target_sources(${protobuf_generate_grpc_TARGET} PRIVATE ${_generated_srcs_all}) - endif() -endfunction() - - -# Find the libraries. -if(gRPC_USE_STATIC_LIBS) - # Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES - set(_gRPC_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) - if(WIN32) - set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) - else() - set(CMAKE_FIND_LIBRARY_SUFFIXES .a) - endif() -endif() - -find_library(gRPC_LIBRARY NAMES grpc) -find_library(gRPC_CPP_LIBRARY NAMES grpc++) -find_library(gRPC_UNSECURE_LIBRARY NAMES grpc_unsecure) -find_library(gRPC_CPP_UNSECURE_LIBRARY NAMES grpc++_unsecure) -find_library(gRPC_CARES_LIBRARY NAMES cares) - -set(gRPC_LIBRARIES) -if(gRPC_USE_UNSECURE_LIBRARIES) - if(gRPC_UNSECURE_LIBRARY) - set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_UNSECURE_LIBRARY}) - endif() - if(gRPC_CPP_UNSECURE_LIBRARY) - set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_UNSECURE_LIBRARY}) - endif() -else() - if(gRPC_LIBRARY) - set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_LIBRARY}) - endif() - if(gRPC_CPP_UNSECURE_LIBRARY) - set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_LIBRARY}) - endif() -endif() -set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CARES_LIBRARY}) - -# Restore the original find library ordering. -if(gRPC_USE_STATIC_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_gRPC_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) -endif() - -# Find the include directories. -find_path(gRPC_INCLUDE_DIR grpc/grpc.h) -find_path(gRPC_CPP_INCLUDE_DIR grpc++/grpc++.h) - -if(gRPC_INCLUDE_DIR AND gRPC_CPP_INCLUDE_DIR AND NOT(gRPC_INCLUDE_DIR STREQUAL gRPC_CPP_INCLUDE_DIR)) - set(gRPC_INCLUDE_DIRS ${gRPC_INCLUDE_DIR} ${gRPC_CPP_INCLUDE_DIR}) -elseif(gRPC_INCLUDE_DIR) - set(gRPC_INCLUDE_DIRS ${gRPC_INCLUDE_DIR}) -else() - set(gRPC_INCLUDE_DIRS ${gRPC_CPP_INCLUDE_DIR}) -endif() - -# Get full path to plugin. -find_program(gRPC_CPP_PLUGIN - NAMES grpc_cpp_plugin - DOC "The plugin for generating gRPC client and server C++ stubs from `.proto` files") - -find_program(gRPC_PYTHON_PLUGIN - NAMES grpc_python_plugin - DOC "The plugin for generating gRPC client and server Python stubs from `.proto` files") - -# Add imported targets. -if(gRPC_CPP_LIBRARY AND NOT TARGET grpc++) - add_library(grpc++ UNKNOWN IMPORTED) - set_target_properties(grpc++ PROPERTIES - IMPORTED_LOCATION "${gRPC_CPP_LIBRARY}") - set_target_properties(grpc++ PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES ${gRPC_INCLUDE_DIRS}) -endif() - -if(gRPC_CPP_UNSECURE_LIBRARY AND NOT TARGET grpc++_unsecure) - add_library(grpc++_unsecure UNKNOWN IMPORTED) - set_target_properties(grpc++_unsecure PROPERTIES - IMPORTED_LOCATION "${gRPC_CPP_UNSECURE_LIBRARY}") - set_target_properties(grpc++_unsecure PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES ${gRPC_INCLUDE_DIRS}) -endif() - -if(gRPC_CPP_PLUGIN AND NOT TARGET grpc_cpp_plugin) - add_executable(grpc_cpp_plugin IMPORTED) - set_target_properties(grpc_cpp_plugin PROPERTIES - IMPORTED_LOCATION "${gRPC_CPP_PLUGIN}") -endif() - -if(gRPC_PYTHON_PLUGIN AND NOT TARGET grpc_python_plugin) - add_executable(grpc_python_plugin IMPORTED) - set_target_properties(grpc_python_plugin PROPERTIES - IMPORTED_LOCATION "${gRPC_PYTHON_PLUGIN}") -endif() - -#include(FindPackageHandleStandardArgs.cmake) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(gRPC - REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY gRPC_CARES_LIBRARY - gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR gRPC_CPP_PLUGIN gRPC_PYTHON_PLUGIN) - -if(gRPC_FOUND) - if(gRPC_DEBUG) - message(STATUS "gRPC: INCLUDE_DIRS=${gRPC_INCLUDE_DIRS}") - message(STATUS "gRPC: LIBRARIES=${gRPC_LIBRARIES}") - message(STATUS "gRPC: CPP_PLUGIN=${gRPC_CPP_PLUGIN}") - message(STATUS "gRPC: PYTHON_PLUGIN=${gRPC_PYTHON_PLUGIN}") - endif() -endif() diff --git a/cmake/find/ccache.cmake b/cmake/ccache.cmake similarity index 100% rename from cmake/find/ccache.cmake rename to cmake/ccache.cmake diff --git a/cmake/contrib_finder.cmake b/cmake/contrib_finder.cmake deleted file mode 100644 index e97fda6a6f35..000000000000 --- a/cmake/contrib_finder.cmake +++ /dev/null @@ -1,23 +0,0 @@ -macro(find_contrib_lib LIB_NAME) - - string(TOLOWER ${LIB_NAME} LIB_NAME_LC) - string(TOUPPER ${LIB_NAME} LIB_NAME_UC) - string(REPLACE "-" "_" LIB_NAME_UC ${LIB_NAME_UC}) - - option (USE_INTERNAL_${LIB_NAME_UC}_LIBRARY "Use bundled library ${LIB_NAME} instead of system" ON) - - if (NOT USE_INTERNAL_${LIB_NAME_UC}_LIBRARY) - find_package ("${LIB_NAME}") - if (NOT ${LIB_NAME_UC}_FOUND) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use system ${LIB_NAME}") - endif() - endif () - - if (NOT ${LIB_NAME_UC}_FOUND) - set (USE_INTERNAL_${LIB_NAME_UC}_LIBRARY 1) - set (${LIB_NAME_UC}_LIBRARIES ${LIB_NAME_LC}) - set (${LIB_NAME_UC}_INCLUDE_DIR ${${LIB_NAME_UC}_CONTRIB_INCLUDE_DIR}) - endif () - - message (STATUS "Using ${LIB_NAME}: ${${LIB_NAME_UC}_INCLUDE_DIR} : ${${LIB_NAME_UC}_LIBRARIES}") -endmacro() diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 20c61ead3d24..535d1b3c93e3 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -134,7 +134,7 @@ else () set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () - set (TEST_FLAG "-mavx512f -mavx512bw") + set (TEST_FLAG "-mavx512f -mavx512bw -mavx512vl") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" #include @@ -143,6 +143,8 @@ else () (void)a; auto b = _mm512_add_epi16(__m512i(), __m512i()); (void)b; + auto c = _mm_cmp_epi8_mask(__m128i(), __m128i(), 0); + (void)c; return 0; } " HAVE_AVX512) @@ -181,7 +183,7 @@ else () set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") endif () if (HAVE_AVX512) - set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw -mprefer-vector-width=256") + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw -mavx512vl -mprefer-vector-width=256") endif () endif () endif () diff --git a/cmake/cxx.cmake b/cmake/cxx.cmake new file mode 100644 index 000000000000..7d93bf05fc7e --- /dev/null +++ b/cmake/cxx.cmake @@ -0,0 +1,8 @@ +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build. + +add_subdirectory(contrib/libcxxabi-cmake) +add_subdirectory(contrib/libcxx-cmake) + +# Exception handling library is embedded into libcxxabi. + +target_link_libraries(global-libs INTERFACE cxx cxxabi) diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index a6ee800d59ba..1f92663a4b93 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -22,16 +22,12 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -include (cmake/find/cxx.cmake) - -add_library(global-group INTERFACE) +include (cmake/cxx.cmake) target_link_libraries(global-group INTERFACE $ ) -link_libraries(global-group) - # FIXME: remove when all contribs will get custom cmake lists install( TARGETS global-group global-libs diff --git a/cmake/find/amqpcpp.cmake b/cmake/find/amqpcpp.cmake deleted file mode 100644 index e033bea439fe..000000000000 --- a/cmake/find/amqpcpp.cmake +++ /dev/null @@ -1,29 +0,0 @@ -if (MISSING_INTERNAL_LIBUV_LIBRARY) - message (WARNING "Can't find internal libuv needed for AMQP-CPP library") - set (ENABLE_AMQPCPP OFF CACHE INTERNAL "") -endif() - -option(ENABLE_AMQPCPP "Enalbe AMQP-CPP" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_AMQPCPP) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/src") - message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal AMQP-CPP library") - set (USE_AMQPCPP 0) - return() -endif () - -set (USE_AMQPCPP 1) -set (AMQPCPP_LIBRARY amqp-cpp ${OPENSSL_LIBRARIES}) - -set (AMQPCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include") -list (APPEND AMQPCPP_INCLUDE_DIR - "${LIBUV_INCLUDE_DIR}" - "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP") - -list (APPEND AMQPCPP_LIBRARY "${LIBUV_LIBRARY}") - -message (STATUS "Using AMQP-CPP=${USE_AMQPCPP}: ${AMQPCPP_INCLUDE_DIR} : ${AMQPCPP_LIBRARY}") diff --git a/cmake/find/avro.cmake b/cmake/find/avro.cmake deleted file mode 100644 index a70fb92c122b..000000000000 --- a/cmake/find/avro.cmake +++ /dev/null @@ -1,35 +0,0 @@ -# Needed when using Apache Avro serialization format -option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_AVRO) - if (USE_INTERNAL_AVRO_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal avro library with ENABLE_AVRO=OFF") - endif() - return() -endif() - -option (USE_INTERNAL_AVRO_LIBRARY "Set to FALSE to use system avro library instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/avro/lang") - if (USE_INTERNAL_AVRO_LIBRARY) - message(WARNING "submodule contrib/avro is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot find internal avro") - set(USE_INTERNAL_AVRO_LIBRARY 0) - endif() - set(MISSING_INTERNAL_AVRO_LIBRARY 1) -endif() - -if (NOT USE_INTERNAL_AVRO_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Using system avro library is not supported yet") -elseif(NOT MISSING_INTERNAL_AVRO_LIBRARY) - include(cmake/find/snappy.cmake) - set(AVROCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/avro/lang/c++/include") - set(AVROCPP_LIBRARY avrocpp) - set(USE_INTERNAL_AVRO_LIBRARY 1) -endif () - -if (AVROCPP_LIBRARY AND AVROCPP_INCLUDE_DIR) - set(USE_AVRO 1) -endif() - -message (STATUS "Using avro=${USE_AVRO}: ${AVROCPP_INCLUDE_DIR} : ${AVROCPP_LIBRARY}") diff --git a/cmake/find/base64.cmake b/cmake/find/base64.cmake deleted file mode 100644 index ee12fbb11ba3..000000000000 --- a/cmake/find/base64.cmake +++ /dev/null @@ -1,25 +0,0 @@ -if(ARCH_AMD64 OR ARCH_ARM) - option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES}) -elseif(ENABLE_BASE64) - message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64") -endif() - -if (NOT ENABLE_BASE64) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64/LICENSE") - set (MISSING_INTERNAL_BASE64_LIBRARY 1) - message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init") -endif () - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64") - message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init") -else() - set (BASE64_LIBRARY base64) - set (USE_BASE64 1) -endif() - -if (NOT USE_BASE64) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable base64") -endif() diff --git a/cmake/find/blob_storage.cmake b/cmake/find/blob_storage.cmake deleted file mode 100644 index 4ad7296e95e9..000000000000 --- a/cmake/find/blob_storage.cmake +++ /dev/null @@ -1,29 +0,0 @@ -option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES}) - -if (ENABLE_AZURE_BLOB_STORAGE) - option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY - "Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)" - ON) - - set(USE_AZURE_BLOB_STORAGE 1) - set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk) - - if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk" - OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules") - AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY) - message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init") - set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF) - set(USE_AZURE_BLOB_STORAGE 0) - endif () - - if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY) - message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library") - endif() - - if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY) - message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library") - endif() - -endif() - -message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}") diff --git a/cmake/find/brotli.cmake b/cmake/find/brotli.cmake deleted file mode 100644 index 6469ec04f457..000000000000 --- a/cmake/find/brotli.cmake +++ /dev/null @@ -1,42 +0,0 @@ -option (ENABLE_BROTLI "Enable brotli" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_BROTLI) - if (USE_INTERNAL_BROTLI_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal brotly library with ENABLE_BROTLI=OFF") - endif() - return() -endif() - -option (USE_INTERNAL_BROTLI_LIBRARY "Set to FALSE to use system libbrotli library instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include/brotli/decode.h") - if (USE_INTERNAL_BROTLI_LIBRARY) - message (WARNING "submodule contrib/brotli is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot find internal brotli") - set (USE_INTERNAL_BROTLI_LIBRARY 0) - endif () - set (MISSING_INTERNAL_BROTLI_LIBRARY 1) -endif () - -if(NOT USE_INTERNAL_BROTLI_LIBRARY) - find_library(BROTLI_LIBRARY_COMMON brotlicommon) - find_library(BROTLI_LIBRARY_DEC brotlidec) - find_library(BROTLI_LIBRARY_ENC brotlienc) - find_path(BROTLI_INCLUDE_DIR NAMES brotli/decode.h brotli/encode.h brotli/port.h brotli/types.h PATHS ${BROTLI_INCLUDE_PATHS}) - if(BROTLI_LIBRARY_DEC AND BROTLI_LIBRARY_ENC AND BROTLI_LIBRARY_COMMON) - set(BROTLI_LIBRARY ${BROTLI_LIBRARY_DEC} ${BROTLI_LIBRARY_ENC} ${BROTLI_LIBRARY_COMMON}) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use system brotli") - endif() -endif() - -if (BROTLI_LIBRARY AND BROTLI_INCLUDE_DIR) - set (USE_BROTLI 1) -elseif (NOT MISSING_INTERNAL_BROTLI_LIBRARY) - set (BROTLI_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include") - set (USE_INTERNAL_BROTLI_LIBRARY 1) - set (BROTLI_LIBRARY brotli) - set (USE_BROTLI 1) -endif () - -message (STATUS "Using brotli=${USE_BROTLI}: ${BROTLI_INCLUDE_DIR} : ${BROTLI_LIBRARY}") diff --git a/cmake/find/bzip2.cmake b/cmake/find/bzip2.cmake deleted file mode 100644 index 5e6a6fb58418..000000000000 --- a/cmake/find/bzip2.cmake +++ /dev/null @@ -1,19 +0,0 @@ -option(ENABLE_BZIP2 "Enable bzip2 compression support" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_BZIP2) - message (STATUS "bzip2 compression disabled") - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/bzip2/bzlib.h") - message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal bzip2 library") - set (USE_NLP 0) - return() -endif () - -set (USE_BZIP2 1) -set (BZIP2_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2") -set (BZIP2_LIBRARY bzip2) - -message (STATUS "Using bzip2=${USE_BZIP2}: ${BZIP2_INCLUDE_DIR} : ${BZIP2_LIBRARY}") diff --git a/cmake/find/capnp.cmake b/cmake/find/capnp.cmake deleted file mode 100644 index fa62c64105f3..000000000000 --- a/cmake/find/capnp.cmake +++ /dev/null @@ -1,42 +0,0 @@ -option (ENABLE_CAPNP "Enable Cap'n Proto" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_CAPNP) - if (USE_INTERNAL_CAPNP_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal capnproto library with ENABLE_CAPNP=OFF") - endif() - return() -endif() - -option (USE_INTERNAL_CAPNP_LIBRARY "Set to FALSE to use system capnproto library instead of bundled" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/capnproto/c++") - if(USE_INTERNAL_CAPNP_LIBRARY) - message(WARNING "submodule contrib/capnproto is missing. to fix try run: \n git submodule update --init") - message(${RECONFIGURE_MESSAGE_LEVEL} "cannot find internal capnproto") - set(USE_INTERNAL_CAPNP_LIBRARY 0) - endif() - set(MISSING_INTERNAL_CAPNP_LIBRARY 1) -endif() - -# FIXME: refactor to use `add_library(… IMPORTED)` if possible. -if (NOT USE_INTERNAL_CAPNP_LIBRARY) - find_library (KJ kj) - find_library (CAPNP capnp) - find_library (CAPNPC capnpc) - - if(KJ AND CAPNP AND CAPNPC) - set (CAPNP_LIBRARIES ${CAPNPC} ${CAPNP} ${KJ}) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system capnproto") - endif() -endif() - -if (CAPNP_LIBRARIES) - set (USE_CAPNP 1) -elseif(NOT MISSING_INTERNAL_CAPNP_LIBRARY) - set (CAPNP_LIBRARIES capnpc) - set (USE_CAPNP 1) - set (USE_INTERNAL_CAPNP_LIBRARY 1) -endif () - -message (STATUS "Using capnp=${USE_CAPNP}: ${CAPNP_LIBRARIES}") diff --git a/cmake/find/cassandra.cmake b/cmake/find/cassandra.cmake deleted file mode 100644 index 7fcbdbb90a5f..000000000000 --- a/cmake/find/cassandra.cmake +++ /dev/null @@ -1,34 +0,0 @@ -if (MISSING_INTERNAL_LIBUV_LIBRARY) - message (WARNING "Disabling cassandra due to missing libuv") - set (ENABLE_CASSANDRA OFF CACHE INTERNAL "") -endif() - -option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_CASSANDRA) - return() -endif() - -if (APPLE) - set(CMAKE_MACOSX_RPATH ON) -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") - message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal Cassandra") - set (USE_CASSANDRA 0) - return() -endif() - -set (USE_CASSANDRA 1) -set (CASSANDRA_INCLUDE_DIR - "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") -if (MAKE_STATIC_LIBRARIES) - set (CASSANDRA_LIBRARY cassandra_static) -else() - set (CASSANDRA_LIBRARY cassandra) -endif() - -set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") - -message (STATUS "Using cassandra=${USE_CASSANDRA}: ${CASSANDRA_INCLUDE_DIR} : ${CASSANDRA_LIBRARY}") diff --git a/cmake/find/curl.cmake b/cmake/find/curl.cmake deleted file mode 100644 index 577b13698c23..000000000000 --- a/cmake/find/curl.cmake +++ /dev/null @@ -1,35 +0,0 @@ -option (ENABLE_CURL "Enable curl" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_CURL) - if (USE_INTERNAL_CURL) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal curl with ENABLE_CURL=OFF") - endif() - return() -endif() - -option (USE_INTERNAL_CURL "Use internal curl library" ON) - -if (NOT USE_INTERNAL_CURL) - find_package (CURL) - if (NOT CURL_FOUND) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system curl") - endif() -endif() - -if (NOT CURL_FOUND) - set (USE_INTERNAL_CURL 1) - set (CURL_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") - - # find_package(CURL) compatibility for the following packages that uses - # find_package(CURL)/include(FindCURL): - # - sentry-native - set (CURL_FOUND ON CACHE BOOL "") - set (CURL_ROOT_DIR ${CURL_LIBRARY_DIR} CACHE PATH "") - set (CURL_INCLUDE_DIR ${CURL_LIBRARY_DIR}/include CACHE PATH "") - set (CURL_INCLUDE_DIRS ${CURL_LIBRARY_DIR}/include CACHE PATH "") - set (CURL_LIBRARY curl CACHE STRING "") - set (CURL_LIBRARIES ${CURL_LIBRARY} CACHE STRING "") - set (CURL_VERSION_STRING 7.67.0 CACHE STRING "") -endif () - -message (STATUS "Using curl: ${CURL_INCLUDE_DIRS} : ${CURL_LIBRARIES}") diff --git a/cmake/find/cxx.cmake b/cmake/find/cxx.cmake deleted file mode 100644 index d1f62f0eceab..000000000000 --- a/cmake/find/cxx.cmake +++ /dev/null @@ -1,71 +0,0 @@ -option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++" ON) - -if (NOT USE_LIBCXX) - if (USE_INTERNAL_LIBCXX_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal libcxx with USE_LIBCXX=OFF") - endif() - - target_link_libraries(global-libs INTERFACE -l:libstdc++.a -l:libstdc++fs.a) # Always link these libraries as static - target_link_libraries(global-libs INTERFACE ${EXCEPTION_HANDLING_LIBRARY}) - return() -endif() - -set(USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT ON) - -option (USE_INTERNAL_LIBCXX_LIBRARY "Disable to use system libcxx and libcxxabi libraries instead of bundled" - ${USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT}) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxx/src") - if (USE_INTERNAL_LIBCXX_LIBRARY) - message(WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libcxx") - set(USE_INTERNAL_LIBCXX_LIBRARY 0) - endif() - set(USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT 0) - set(MISSING_INTERNAL_LIBCXX_LIBRARY 1) -endif() - -set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build. - -if (NOT USE_INTERNAL_LIBCXX_LIBRARY) - find_library (LIBCXX_LIBRARY c++) - find_library (LIBCXXFS_LIBRARY c++fs) - find_library (LIBCXXABI_LIBRARY c++abi) - - if(LIBCXX_LIBRARY AND LIBCXXABI_LIBRARY) # c++fs is now a part of the libc++ - set (HAVE_LIBCXX 1) - else () - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system libcxx") - endif() - - if(NOT LIBCXXFS_LIBRARY) - set(LIBCXXFS_LIBRARY ${LIBCXX_LIBRARY}) - endif() - - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") - - target_link_libraries(global-libs INTERFACE ${EXCEPTION_HANDLING_LIBRARY}) -endif () - -if (NOT HAVE_LIBCXX AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) - set (LIBCXX_LIBRARY cxx) - set (LIBCXXABI_LIBRARY cxxabi) - add_subdirectory(contrib/libcxxabi-cmake) - add_subdirectory(contrib/libcxx-cmake) - - # Exception handling library is embedded into libcxxabi. - - set (HAVE_LIBCXX 1) - set(USE_INTERNAL_LIBCXX_LIBRARY 1) -endif () - -if (HAVE_LIBCXX) - target_link_libraries(global-libs INTERFACE ${LIBCXX_LIBRARY} ${LIBCXXABI_LIBRARY} ${LIBCXXFS_LIBRARY}) - - message (STATUS "Using libcxx: ${LIBCXX_LIBRARY}") - message (STATUS "Using libcxxfs: ${LIBCXXFS_LIBRARY}") - message (STATUS "Using libcxxabi: ${LIBCXXABI_LIBRARY}") -else() - target_link_libraries(global-libs INTERFACE -l:libstdc++.a -l:libstdc++fs.a) # Always link these libraries as static - target_link_libraries(global-libs INTERFACE ${EXCEPTION_HANDLING_LIBRARY}) -endif() diff --git a/cmake/find/cyrus-sasl.cmake b/cmake/find/cyrus-sasl.cmake deleted file mode 100644 index f0c088995b03..000000000000 --- a/cmake/find/cyrus-sasl.cmake +++ /dev/null @@ -1,23 +0,0 @@ -if (${ENABLE_LIBRARIES} AND ${ENABLE_KRB5}) - set (DEFAULT_ENABLE_CYRUS_SASL 1) -else() - set (DEFAULT_ENABLE_CYRUS_SASL 0) -endif() - -OPTION(ENABLE_CYRUS_SASL "Enable cyrus-sasl" ${DEFAULT_ENABLE_CYRUS_SASL}) -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cyrus-sasl/README") - message (WARNING "submodule contrib/cyrus-sasl is missing. to fix try run: \n git submodule update --init") - set (ENABLE_CYRUS_SASL 0) -endif () - -if (ENABLE_CYRUS_SASL) - - set (USE_CYRUS_SASL 1) - set (CYRUS_SASL_LIBRARY sasl2) - - set (CYRUS_SASL_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/cyrus-sasl/include") - - -endif () - -message (STATUS "Using cyrus-sasl: krb5=${USE_KRB5}: ${CYRUS_SASL_INCLUDE_DIR} : ${CYRUS_SASL_LIBRARY}") diff --git a/cmake/find/datasketches.cmake b/cmake/find/datasketches.cmake deleted file mode 100644 index 3d0bb1d1f95d..000000000000 --- a/cmake/find/datasketches.cmake +++ /dev/null @@ -1,29 +0,0 @@ -option (ENABLE_DATASKETCHES "Enable DataSketches" ${ENABLE_LIBRARIES}) - -if (ENABLE_DATASKETCHES) - -option (USE_INTERNAL_DATASKETCHES_LIBRARY "Set to FALSE to use system DataSketches library instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/CMakeLists.txt") - if (USE_INTERNAL_DATASKETCHES_LIBRARY) - message(WARNING "submodule contrib/datasketches-cpp is missing. to fix try run: \n git submodule update --init") - endif() - set(MISSING_INTERNAL_DATASKETCHES_LIBRARY 1) - set(USE_INTERNAL_DATASKETCHES_LIBRARY 0) -endif() - -if (USE_INTERNAL_DATASKETCHES_LIBRARY) - set(DATASKETCHES_LIBRARY theta) - set(DATASKETCHES_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include" "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include") -elseif (NOT MISSING_INTERNAL_DATASKETCHES_LIBRARY) - find_library(DATASKETCHES_LIBRARY theta) - find_path(DATASKETCHES_INCLUDE_DIR NAMES theta_sketch.hpp PATHS ${DATASKETCHES_INCLUDE_PATHS}) -endif() - -if (DATASKETCHES_LIBRARY AND DATASKETCHES_INCLUDE_DIR) - set(USE_DATASKETCHES 1) -endif() - -endif() - -message (STATUS "Using datasketches=${USE_DATASKETCHES}: ${DATASKETCHES_INCLUDE_DIR} : ${DATASKETCHES_LIBRARY}") diff --git a/cmake/find/fast_float.cmake b/cmake/find/fast_float.cmake deleted file mode 100644 index 3e8b7cc5280d..000000000000 --- a/cmake/find/fast_float.cmake +++ /dev/null @@ -1,6 +0,0 @@ -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/fast_float/fast_float.h") - message (FATAL_ERROR "submodule contrib/fast_float is missing. to fix try run: \n git submodule update --init") -endif () - -set(FAST_FLOAT_LIBRARY fast_float) -set(FAST_FLOAT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/") diff --git a/cmake/find/fastops.cmake b/cmake/find/fastops.cmake deleted file mode 100644 index 72426eb59125..000000000000 --- a/cmake/find/fastops.cmake +++ /dev/null @@ -1,24 +0,0 @@ -if(ARCH_AMD64 AND NOT OS_FREEBSD AND NOT OS_DARWIN) - option(ENABLE_FASTOPS "Enable fast vectorized mathematical functions library by Mikhail Parakhin" ${ENABLE_LIBRARIES}) -elseif(ENABLE_FASTOPS) - message (${RECONFIGURE_MESSAGE_LEVEL} "Fastops library is supported on x86_64 only, and not FreeBSD or Darwin") -endif() - -if(NOT ENABLE_FASTOPS) - set(USE_FASTOPS 0) - return() -endif() - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fastops/fastops/fastops.h") - message(WARNING "submodule contrib/fastops is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal fastops library") - set(MISSING_INTERNAL_FASTOPS_LIBRARY 1) -endif() - -if(NOT MISSING_INTERNAL_FASTOPS_LIBRARY) - set(USE_FASTOPS 1) - set(FASTOPS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/fastops/) - set(FASTOPS_LIBRARY fastops) -endif() - -message(STATUS "Using fastops=${USE_FASTOPS}: ${FASTOPS_INCLUDE_DIR} : ${FASTOPS_LIBRARY}") diff --git a/cmake/find/filelog.cmake b/cmake/find/filelog.cmake deleted file mode 100644 index f545ee9d0ede..000000000000 --- a/cmake/find/filelog.cmake +++ /dev/null @@ -1,8 +0,0 @@ -# StorageFileLog only support Linux platform -if (OS_LINUX) - set (USE_FILELOG 1) - message (STATUS "Using StorageFileLog = 1") -else() - message(STATUS "StorageFileLog is only supported on Linux") -endif () - diff --git a/cmake/find/gperf.cmake b/cmake/find/gperf.cmake deleted file mode 100644 index 9b806598c578..000000000000 --- a/cmake/find/gperf.cmake +++ /dev/null @@ -1,16 +0,0 @@ -if(NOT DEFINED ENABLE_GPERF OR ENABLE_GPERF) - # Check if gperf was installed - find_program(GPERF gperf) - if(GPERF) - option(ENABLE_GPERF "Use gperf function hash generator tool" ${ENABLE_LIBRARIES}) - endif() -endif() - -if (ENABLE_GPERF) - if(NOT GPERF) - message(FATAL_ERROR "Could not find the program gperf") - endif() - set(USE_GPERF 1) -endif() - -message(STATUS "Using gperf=${USE_GPERF}: ${GPERF}") diff --git a/cmake/find/grpc.cmake b/cmake/find/grpc.cmake deleted file mode 100644 index 92a85b0df044..000000000000 --- a/cmake/find/grpc.cmake +++ /dev/null @@ -1,72 +0,0 @@ -# disable grpc due to conflicts of abseil (required by grpc) dynamic annotations with libtsan.a -if (SANITIZE STREQUAL "thread" AND COMPILER_GCC) - set(ENABLE_GRPC_DEFAULT OFF) -else() - set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES}) -endif() - -option(ENABLE_GRPC "Use gRPC" ${ENABLE_GRPC_DEFAULT}) - -if(NOT ENABLE_GRPC) - if(USE_INTERNAL_GRPC_LIBRARY) - message(${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal gRPC library with ENABLE_GRPC=OFF") - endif() - return() -endif() - -if(NOT USE_PROTOBUF) - message(WARNING "Cannot use gRPC library without protobuf") -endif() - -# Normally we use the internal gRPC framework. -# You can set USE_INTERNAL_GRPC_LIBRARY to OFF to force using the external gRPC framework, which should be installed in the system in this case. -# The external gRPC framework can be installed in the system by running -# sudo apt-get install libgrpc++-dev protobuf-compiler-grpc -option(USE_INTERNAL_GRPC_LIBRARY "Set to FALSE to use system gRPC library instead of bundled. (Experimental. Set to OFF on your own risk)" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/grpc/CMakeLists.txt") - if(USE_INTERNAL_GRPC_LIBRARY) - message(WARNING "submodule contrib/grpc is missing. to fix try run: \n git submodule update --init") - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal grpc") - set(USE_INTERNAL_GRPC_LIBRARY 0) - endif() - set(MISSING_INTERNAL_GRPC_LIBRARY 1) -endif() - -if(USE_SSL) - set(gRPC_USE_UNSECURE_LIBRARIES FALSE) -else() - set(gRPC_USE_UNSECURE_LIBRARIES TRUE) -endif() - -if(NOT USE_INTERNAL_GRPC_LIBRARY) - find_package(gRPC) - if(NOT gRPC_INCLUDE_DIRS OR NOT gRPC_LIBRARIES) - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system gRPC library") - set(EXTERNAL_GRPC_LIBRARY_FOUND 0) - elseif(NOT gRPC_CPP_PLUGIN) - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grpc_cpp_plugin") - set(EXTERNAL_GRPC_LIBRARY_FOUND 0) - else() - set(EXTERNAL_GRPC_LIBRARY_FOUND 1) - set(USE_GRPC 1) - endif() -endif() - -if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY) - set(gRPC_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/grpc/include") - if(gRPC_USE_UNSECURE_LIBRARIES) - set(gRPC_LIBRARIES grpc_unsecure grpc++_unsecure) - else() - set(gRPC_LIBRARIES grpc grpc++) - endif() - set(gRPC_CPP_PLUGIN $) - set(gRPC_PYTHON_PLUGIN $) - - include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake") - - set(USE_INTERNAL_GRPC_LIBRARY 1) - set(USE_GRPC 1) -endif() - -message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${gRPC_CPP_PLUGIN}") diff --git a/cmake/find/gtest.cmake b/cmake/find/gtest.cmake deleted file mode 100644 index 935744bcbd11..000000000000 --- a/cmake/find/gtest.cmake +++ /dev/null @@ -1,40 +0,0 @@ -# included only if ENABLE_TESTS=1 - -option (USE_INTERNAL_GTEST_LIBRARY "Set to FALSE to use system Google Test instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest/CMakeLists.txt") - if (USE_INTERNAL_GTEST_LIBRARY) - message (WARNING "submodule contrib/googletest is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal gtest") - set (USE_INTERNAL_GTEST_LIBRARY 0) - endif () - - set (MISSING_INTERNAL_GTEST_LIBRARY 1) -endif () - -if(NOT USE_INTERNAL_GTEST_LIBRARY) - # TODO: autodetect of GTEST_SRC_DIR by EXISTS /usr/src/googletest/CMakeLists.txt - if(NOT GTEST_SRC_DIR) - find_package(GTest) - if (NOT GTEST_INCLUDE_DIRS) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system Google Test") - endif() - endif() -endif() - -if (NOT GTEST_SRC_DIR AND NOT GTEST_INCLUDE_DIRS AND NOT MISSING_INTERNAL_GTEST_LIBRARY) - set (USE_INTERNAL_GTEST_LIBRARY 1) - set (GTEST_MAIN_LIBRARIES gtest_main) - set (GTEST_LIBRARIES gtest) - set (GTEST_BOTH_LIBRARIES ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES}) - set (GTEST_INCLUDE_DIRS ${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest) -elseif(USE_INTERNAL_GTEST_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Wouldn't use internal Google Test library") - set (USE_INTERNAL_GTEST_LIBRARY 0) -endif () - -if((GTEST_INCLUDE_DIRS AND GTEST_BOTH_LIBRARIES) OR GTEST_SRC_DIR) - set(USE_GTEST 1) -endif() - -message (STATUS "Using gtest=${USE_GTEST}: ${GTEST_INCLUDE_DIRS} : ${GTEST_BOTH_LIBRARIES} : ${GTEST_SRC_DIR}") diff --git a/cmake/find/h3.cmake b/cmake/find/h3.cmake deleted file mode 100644 index e692b431e906..000000000000 --- a/cmake/find/h3.cmake +++ /dev/null @@ -1,39 +0,0 @@ -option (ENABLE_H3 "Enable H3" ${ENABLE_LIBRARIES}) -if(NOT ENABLE_H3) - if(USE_INTERNAL_H3_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal h3 library with ENABLE_H3=OFF") - endif () - return() -endif() - -option(USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of bundled" - ON) # we are not aware of any distribution that provides h3 package - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include/h3Index.h") - if(USE_INTERNAL_H3_LIBRARY) - message(WARNING "submodule contrib/h3 is missing. to fix try run: \n git submodule update --init") - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal h3 library") - set(USE_INTERNAL_H3_LIBRARY 0) - endif() - set(MISSING_INTERNAL_H3_LIBRARY 1) -endif() - -if(NOT USE_INTERNAL_H3_LIBRARY) - find_library(H3_LIBRARY h3) - find_path(H3_INCLUDE_DIR NAMES h3/h3api.h PATHS ${H3_INCLUDE_PATHS}) - - if(NOT H3_LIBRARY OR NOT H3_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system h3 library") - endif() -endif() - -if (H3_LIBRARY AND H3_INCLUDE_DIR) - set (USE_H3 1) -elseif(NOT MISSING_INTERNAL_H3_LIBRARY) - set (H3_LIBRARY h3) - set (H3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include") - set (USE_H3 1) - set (USE_INTERNAL_H3_LIBRARY 1) -endif() - -message (STATUS "Using h3=${USE_H3}: ${H3_INCLUDE_DIR} : ${H3_LIBRARY}") diff --git a/cmake/find/hdfs3.cmake b/cmake/find/hdfs3.cmake deleted file mode 100644 index aac6b99dfa29..000000000000 --- a/cmake/find/hdfs3.cmake +++ /dev/null @@ -1,45 +0,0 @@ -if(NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND USE_PROTOBUF AND NOT ARCH_PPC64LE) - option(ENABLE_HDFS "Enable HDFS" ${ENABLE_LIBRARIES}) -elseif(ENABLE_HDFS OR USE_INTERNAL_HDFS3_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use HDFS3 with current configuration") -endif() - -if(NOT ENABLE_HDFS) - if(USE_INTERNAL_HDFS3_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal HDFS3 library with ENABLE_HDFS3=OFF") - endif() - return() -endif() - -option(USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled (experimental - set to OFF on your own risk)" - ON) # We don't know any linux distribution with package for it - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/hdfs.h") - if(USE_INTERNAL_HDFS3_LIBRARY) - message(WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal HDFS3 library") - set(USE_INTERNAL_HDFS3_LIBRARY 0) - endif() - set(MISSING_INTERNAL_HDFS3_LIBRARY 1) -endif() - -if(NOT USE_INTERNAL_HDFS3_LIBRARY) - find_library(HDFS3_LIBRARY hdfs3) - find_path(HDFS3_INCLUDE_DIR NAMES hdfs/hdfs.h PATHS ${HDFS3_INCLUDE_PATHS}) - if(NOT HDFS3_LIBRARY OR NOT HDFS3_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot find system HDFS3 library") - endif() -endif() - -if(HDFS3_LIBRARY AND HDFS3_INCLUDE_DIR) - set(USE_HDFS 1) -elseif(NOT MISSING_INTERNAL_HDFS3_LIBRARY AND LIBGSASL_LIBRARY AND LIBXML2_LIBRARIES) - set(HDFS3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include") - set(HDFS3_LIBRARY hdfs3) - set(USE_INTERNAL_HDFS3_LIBRARY 1) - set(USE_HDFS 1) -else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannout enable HDFS3") -endif() - -message(STATUS "Using hdfs3=${USE_HDFS}: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}") diff --git a/cmake/find/hive-metastore.cmake b/cmake/find/hive-metastore.cmake deleted file mode 100644 index bc283cf8bd2c..000000000000 --- a/cmake/find/hive-metastore.cmake +++ /dev/null @@ -1,26 +0,0 @@ -option(ENABLE_HIVE "Enable Hive" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_HIVE) - message("Hive disabled") - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hive-metastore") - message(WARNING "submodule contrib/hive-metastore is missing. to fix try run: \n git submodule update --init") - set(USE_HIVE 0) -elseif (NOT USE_THRIFT) - message(WARNING "Thrift is not found, which is needed by Hive") - set(USE_HIVE 0) -elseif (NOT USE_HDFS) - message(WARNING "HDFS is not found, which is needed by Hive") - set(USE_HIVE 0) -elseif (NOT USE_ORC OR NOT USE_ARROW OR NOT USE_PARQUET) - message(WARNING "ORC/Arrow/Parquet is not found, which are needed by Hive") - set(USE_HIVE 0) -else() - set(USE_HIVE 1) - set(HIVE_METASTORE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore) - set(HIVE_METASTORE_LIBRARY hivemetastore) -endif() - -message (STATUS "Using_Hive=${USE_HIVE}: ${HIVE_METASTORE_INCLUDE_DIR} : ${HIVE_METASTORE_LIBRARY}") diff --git a/cmake/find/icu.cmake b/cmake/find/icu.cmake deleted file mode 100644 index 5ba25e93875c..000000000000 --- a/cmake/find/icu.cmake +++ /dev/null @@ -1,51 +0,0 @@ -if (OS_LINUX) - option(ENABLE_ICU "Enable ICU" ${ENABLE_LIBRARIES}) -else () - option(ENABLE_ICU "Enable ICU" 0) -endif () - -if (NOT ENABLE_ICU) - if(USE_INTERNAL_ICU_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal icu library with ENABLE_ICU=OFF") - endif() - message(STATUS "Build without ICU (support for collations and charset conversion functions will be disabled)") - return() -endif() - -option (USE_INTERNAL_ICU_LIBRARY "Set to FALSE to use system ICU library instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/LICENSE") - if (USE_INTERNAL_ICU_LIBRARY) - message (WARNING "submodule contrib/icu is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ICU") - set (USE_INTERNAL_ICU_LIBRARY 0) - endif () - set (MISSING_INTERNAL_ICU_LIBRARY 1) -endif () - -if(NOT USE_INTERNAL_ICU_LIBRARY) - if (APPLE) - set(ICU_ROOT "/usr/local/opt/icu4c" CACHE STRING "") - endif() - find_package(ICU COMPONENTS i18n uc data) # TODO: remove Modules/FindICU.cmake after cmake 3.7 - #set (ICU_LIBRARIES ${ICU_I18N_LIBRARY} ${ICU_UC_LIBRARY} ${ICU_DATA_LIBRARY} CACHE STRING "") - if(ICU_FOUND) - set(USE_ICU 1) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system ICU") - endif() -endif() - -if (ICU_LIBRARY AND ICU_INCLUDE_DIR) - set (USE_ICU 1) -elseif (NOT MISSING_INTERNAL_ICU_LIBRARY) - set (USE_INTERNAL_ICU_LIBRARY 1) - set (ICU_LIBRARIES icui18n icuuc icudata) - set (USE_ICU 1) -endif () - -if(USE_ICU) - message(STATUS "Using icu=${USE_ICU}: ${ICU_INCLUDE_DIR} : ${ICU_LIBRARIES}") -else() - message(STATUS "Build without ICU (support for collations and charset conversion functions will be disabled)") -endif() diff --git a/cmake/find/krb5.cmake b/cmake/find/krb5.cmake deleted file mode 100644 index 24cc51325dc4..000000000000 --- a/cmake/find/krb5.cmake +++ /dev/null @@ -1,25 +0,0 @@ -OPTION(ENABLE_KRB5 "Enable krb5" ${ENABLE_LIBRARIES}) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/krb5/README") - message (WARNING "submodule contrib/krb5 is missing. to fix try run: \n git submodule update --init") - set (ENABLE_KRB5 0) -endif () - -if (NOT CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND NOT CMAKE_CROSSCOMPILING)) - message (WARNING "krb5 disabled in non-Linux and non-native-Darwin environments") - set (ENABLE_KRB5 0) -endif () - -if (ENABLE_KRB5) - - set (USE_KRB5 1) - set (KRB5_LIBRARY krb5) - - set (KRB5_INCLUDE_DIR - "${ClickHouse_SOURCE_DIR}/contrib/krb5/src/include" - "${ClickHouse_BINARY_DIR}/contrib/krb5-cmake/include" - ) - -endif () - -message (STATUS "Using krb5=${USE_KRB5}: ${KRB5_INCLUDE_DIR} : ${KRB5_LIBRARY}") diff --git a/cmake/find/ldap.cmake b/cmake/find/ldap.cmake deleted file mode 100644 index d0d1e54bfeca..000000000000 --- a/cmake/find/ldap.cmake +++ /dev/null @@ -1,100 +0,0 @@ -option (ENABLE_LDAP "Enable LDAP" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_LDAP) - if(USE_INTERNAL_LDAP_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal LDAP library with ENABLE_LDAP=OFF") - endif () - return() -endif() - -option (USE_INTERNAL_LDAP_LIBRARY "Set to FALSE to use system *LDAP library instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/openldap/README") - if (USE_INTERNAL_LDAP_LIBRARY) - message (WARNING "Submodule contrib/openldap is missing. To fix try running:\n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal LDAP library") - endif () - - set (USE_INTERNAL_LDAP_LIBRARY 0) - set (MISSING_INTERNAL_LDAP_LIBRARY 1) -endif () - -set (OPENLDAP_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES}) -set (OPENLDAP_USE_REENTRANT_LIBS 1) - -if (NOT USE_INTERNAL_LDAP_LIBRARY) - if (OPENLDAP_USE_STATIC_LIBS) - message (WARNING "Unable to use external static OpenLDAP libraries, falling back to the bundled version.") - message (${RECONFIGURE_MESSAGE_LEVEL} "Unable to use external OpenLDAP") - set (USE_INTERNAL_LDAP_LIBRARY 1) - else () - if (APPLE AND NOT OPENLDAP_ROOT_DIR) - set (OPENLDAP_ROOT_DIR "/usr/local/opt/openldap") - endif () - - find_package (OpenLDAP) - - if (NOT OPENLDAP_FOUND) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system OpenLDAP") - endif() - endif () -endif () - -if (NOT OPENLDAP_FOUND AND NOT MISSING_INTERNAL_LDAP_LIBRARY) - string (TOLOWER "${CMAKE_SYSTEM_NAME}" _system_name) - string (TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _system_processor) - - if ( - "${_system_processor}" STREQUAL "amd64" OR - "${_system_processor}" STREQUAL "x64" - ) - set (_system_processor "x86_64") - elseif ( - "${_system_processor}" STREQUAL "arm64" - ) - set (_system_processor "aarch64") - endif () - - if ( - ( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "x86_64" ) OR - ( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "aarch64" ) OR - ( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "ppc64le" ) OR - ( "${_system_name}" STREQUAL "freebsd" AND "${_system_processor}" STREQUAL "x86_64" ) OR - ( "${_system_name}" STREQUAL "freebsd" AND "${_system_processor}" STREQUAL "aarch64" ) OR - ( "${_system_name}" STREQUAL "darwin" AND "${_system_processor}" STREQUAL "x86_64" ) OR - ( "${_system_name}" STREQUAL "darwin" AND "${_system_processor}" STREQUAL "aarch64" ) - ) - set (_ldap_supported_platform TRUE) - endif () - - if (NOT _ldap_supported_platform) - message (WARNING "LDAP support using the bundled library is not implemented for ${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR} platform.") - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable LDAP support") - elseif (NOT USE_SSL) - message (WARNING "LDAP support using the bundled library is not possible if SSL is not used.") - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable LDAP support") - else () - set (USE_INTERNAL_LDAP_LIBRARY 1) - set (OPENLDAP_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/openldap") - set (OPENLDAP_INCLUDE_DIRS - "${ClickHouse_SOURCE_DIR}/contrib/openldap-cmake/${_system_name}_${_system_processor}/include" - "${ClickHouse_SOURCE_DIR}/contrib/openldap/include" - ) - # Below, 'ldap'/'ldap_r' and 'lber' will be resolved to - # the targets defined in contrib/openldap-cmake/CMakeLists.txt - if (OPENLDAP_USE_REENTRANT_LIBS) - set (OPENLDAP_LDAP_LIBRARY "ldap_r") - else () - set (OPENLDAP_LDAP_LIBRARY "ldap") - endif() - set (OPENLDAP_LBER_LIBRARY "lber") - set (OPENLDAP_LIBRARIES ${OPENLDAP_LDAP_LIBRARY} ${OPENLDAP_LBER_LIBRARY}) - set (OPENLDAP_FOUND 1) - endif () -endif () - -if (OPENLDAP_FOUND) - set (USE_LDAP 1) -endif () - -message (STATUS "Using ldap=${USE_LDAP}: ${OPENLDAP_INCLUDE_DIRS} : ${OPENLDAP_LIBRARIES}") diff --git a/cmake/find/libgsasl.cmake b/cmake/find/libgsasl.cmake deleted file mode 100644 index d4e1ebce6296..000000000000 --- a/cmake/find/libgsasl.cmake +++ /dev/null @@ -1,40 +0,0 @@ -option(ENABLE_GSASL_LIBRARY "Enable gsasl library" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_GSASL_LIBRARY) - if(USE_INTERNAL_LIBGSASL_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal libgsasl library with ENABLE_GSASL_LIBRARY=OFF") - endif() - return() -endif() - -option (USE_INTERNAL_LIBGSASL_LIBRARY "Set to FALSE to use system libgsasl library instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libgsasl/src/gsasl.h") - if (USE_INTERNAL_LIBGSASL_LIBRARY) - message (WARNING "submodule contrib/libgsasl is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libgsasl") - set (USE_INTERNAL_LIBGSASL_LIBRARY 0) - endif () - set (MISSING_INTERNAL_LIBGSASL_LIBRARY 1) -endif () - -if (NOT USE_INTERNAL_LIBGSASL_LIBRARY) - find_library (LIBGSASL_LIBRARY gsasl) - find_path (LIBGSASL_INCLUDE_DIR NAMES gsasl.h PATHS ${LIBGSASL_INCLUDE_PATHS}) - if (NOT LIBGSASL_LIBRARY OR NOT LIBGSASL_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system libgsasl") - endif () -endif () - -if (LIBGSASL_LIBRARY AND LIBGSASL_INCLUDE_DIR) -elseif (NOT MISSING_INTERNAL_LIBGSASL_LIBRARY) - set (LIBGSASL_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libgsasl/src" "${ClickHouse_SOURCE_DIR}/contrib/libgsasl/linux_x86_64/include") - set (USE_INTERNAL_LIBGSASL_LIBRARY 1) - set (LIBGSASL_LIBRARY gsasl) -endif () - -if(LIBGSASL_LIBRARY AND LIBGSASL_INCLUDE_DIR) - set (USE_LIBGSASL 1) -endif() - -message (STATUS "Using libgsasl=${USE_LIBGSASL}: ${LIBGSASL_INCLUDE_DIR} : ${LIBGSASL_LIBRARY}") diff --git a/cmake/find/libpqxx.cmake b/cmake/find/libpqxx.cmake deleted file mode 100644 index 68dddffde701..000000000000 --- a/cmake/find/libpqxx.cmake +++ /dev/null @@ -1,31 +0,0 @@ -option(ENABLE_LIBPQXX "Enalbe libpqxx" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_LIBPQXX) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpqxx/src") - message (WARNING "submodule contrib/libpqxx is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpqxx library") - set (USE_LIBPQXX 0) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpq/include") - message (ERROR "submodule contrib/libpq is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpq needed for libpqxx") - set (USE_LIBPQXX 0) - return() -endif() - -if (NOT USE_INTERNAL_SSL_LIBRARY) - set (USE_LIBPQXX 0) -else () -set (USE_LIBPQXX 1) -set (LIBPQXX_LIBRARY libpqxx) -set (LIBPQ_LIBRARY libpq) -set (LIBPQXX_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpqxx/include") -set (LIBPQ_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpq") -message (STATUS "Using libpqxx=${USE_LIBPQXX}: ${LIBPQXX_INCLUDE_DIR} : ${LIBPQXX_LIBRARY}") -message (STATUS "Using libpq: ${LIBPQ_ROOT_DIR} : ${LIBPQ_INCLUDE_DIR} : ${LIBPQ_LIBRARY}") -endif() diff --git a/cmake/find/libprotobuf-mutator.cmake b/cmake/find/libprotobuf-mutator.cmake deleted file mode 100644 index a308db67c8b9..000000000000 --- a/cmake/find/libprotobuf-mutator.cmake +++ /dev/null @@ -1,11 +0,0 @@ -option(USE_LIBPROTOBUF_MUTATOR "Enable libprotobuf-mutator" ${ENABLE_FUZZING}) - -if (NOT USE_LIBPROTOBUF_MUTATOR) - return() -endif() - -set(LibProtobufMutator_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator") - -if (NOT EXISTS "${LibProtobufMutator_SOURCE_DIR}/README.md") - message (ERROR "submodule contrib/libprotobuf-mutator is missing. to fix try run: \n git submodule update --init") -endif() diff --git a/cmake/find/libuv.cmake b/cmake/find/libuv.cmake deleted file mode 100644 index c94dfd50b767..000000000000 --- a/cmake/find/libuv.cmake +++ /dev/null @@ -1,22 +0,0 @@ -if (OS_DARWIN AND COMPILER_GCC) - message (WARNING "libuv cannot be built with GCC in macOS due to a bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082") - SET(MISSING_INTERNAL_LIBUV_LIBRARY 1) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") - message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init") - SET(MISSING_INTERNAL_LIBUV_LIBRARY 1) - return() -endif() - -if (MAKE_STATIC_LIBRARIES) - set (LIBUV_LIBRARY uv_a) -else() - set (LIBUV_LIBRARY uv) -endif() - -set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") -set (LIBUV_INCLUDE_DIR "${LIBUV_ROOT_DIR}/include") - -message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}") diff --git a/cmake/find/libxml2.cmake b/cmake/find/libxml2.cmake deleted file mode 100644 index e9fe7780d39c..000000000000 --- a/cmake/find/libxml2.cmake +++ /dev/null @@ -1,34 +0,0 @@ -option (USE_INTERNAL_LIBXML2_LIBRARY "Set to FALSE to use system libxml2 library instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libxml2/libxml.h") - if (USE_INTERNAL_LIBXML2_LIBRARY) - message (WARNING "submodule contrib/libxml2 is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libxml") - set (USE_INTERNAL_LIBXML2_LIBRARY 0) - endif () - set (MISSING_INTERNAL_LIBXML2_LIBRARY 1) -endif () - -if (NOT USE_INTERNAL_LIBXML2_LIBRARY) - find_package (LibXml2) - #find_library (LIBXML2_LIBRARY libxml2) - #find_path (LIBXML2_INCLUDE_DIR NAMES libxml.h PATHS ${LIBXML2_INCLUDE_PATHS}) - - if (NOT LIBXML2_LIBRARY OR NOT LIBXML2_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system libxml2") - endif () - - if (USE_STATIC_LIBRARIES) - find_package(LibLZMA) - set (LIBXML2_LIBRARIES ${LIBXML2_LIBRARIES} ${LIBLZMA_LIBRARIES}) - endif () -endif () - -if (LIBXML2_LIBRARY AND LIBXML2_INCLUDE_DIR) -elseif (NOT MISSING_INTERNAL_LIBXML2_LIBRARY) - set (LIBXML2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libxml2/include ${ClickHouse_SOURCE_DIR}/contrib/libxml2-cmake/linux_x86_64/include) - set (USE_INTERNAL_LIBXML2_LIBRARY 1) - set (LIBXML2_LIBRARIES libxml2) -endif () - -message (STATUS "Using libxml2: ${LIBXML2_INCLUDE_DIR} : ${LIBXML2_LIBRARIES}") diff --git a/cmake/find/llvm.cmake b/cmake/find/llvm.cmake deleted file mode 100644 index ece5d5434a05..000000000000 --- a/cmake/find/llvm.cmake +++ /dev/null @@ -1,79 +0,0 @@ -if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") - set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) -else() - set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) -endif() - -option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) - -if (NOT ENABLE_EMBEDDED_COMPILER) - set (USE_EMBEDDED_COMPILER 0) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/CMakeLists.txt") - message (${RECONFIGURE_MESSAGE_LEVEL} "submodule /contrib/llvm is missing. to fix try run: \n git submodule update --init") -endif () - -set (USE_EMBEDDED_COMPILER 1) - -set (LLVM_FOUND 1) -set (LLVM_VERSION "12.0.0bundled") -set (LLVM_INCLUDE_DIRS - "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/include" - "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/include" -) -set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm") - -message(STATUS "LLVM include Directory: ${LLVM_INCLUDE_DIRS}") -message(STATUS "LLVM library Directory: ${LLVM_LIBRARY_DIRS}") -message(STATUS "LLVM C++ compiler flags: ${LLVM_CXXFLAGS}") - -# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. -set (REQUIRED_LLVM_LIBRARIES -LLVMExecutionEngine -LLVMRuntimeDyld -LLVMX86CodeGen -LLVMX86Desc -LLVMX86Info -LLVMAsmPrinter -LLVMDebugInfoDWARF -LLVMGlobalISel -LLVMSelectionDAG -LLVMMCDisassembler -LLVMPasses -LLVMCodeGen -LLVMipo -LLVMBitWriter -LLVMInstrumentation -LLVMScalarOpts -LLVMAggressiveInstCombine -LLVMInstCombine -LLVMVectorize -LLVMTransformUtils -LLVMTarget -LLVMAnalysis -LLVMProfileData -LLVMObject -LLVMBitReader -LLVMCore -LLVMRemarks -LLVMBitstreamReader -LLVMMCParser -LLVMMC -LLVMBinaryFormat -LLVMDebugInfoCodeView -LLVMSupport -LLVMDemangle -) - -#function(llvm_libs_all REQUIRED_LLVM_LIBRARIES) -# llvm_map_components_to_libnames (result all) -# if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result) -# list (REMOVE_ITEM result "LTO" "LLVM") -# else() -# set (result "LLVM") -# endif () -# list (APPEND result ${CMAKE_DL_LIBS} ${ZLIB_LIBRARIES}) -# set (${REQUIRED_LLVM_LIBRARIES} ${result} PARENT_SCOPE) -#endfunction() diff --git a/cmake/find/ltdl.cmake b/cmake/find/ltdl.cmake deleted file mode 100644 index b48a3630222b..000000000000 --- a/cmake/find/ltdl.cmake +++ /dev/null @@ -1,5 +0,0 @@ -if (ENABLE_ODBC AND NOT USE_INTERNAL_ODBC_LIBRARY) - set (LTDL_PATHS "/usr/local/opt/libtool/lib") - find_library (LTDL_LIBRARY ltdl PATHS ${LTDL_PATHS} REQUIRED) - message (STATUS "Using ltdl: ${LTDL_LIBRARY}") -endif () diff --git a/cmake/find/miniselect.cmake b/cmake/find/miniselect.cmake deleted file mode 100644 index 0a50c9bf4a82..000000000000 --- a/cmake/find/miniselect.cmake +++ /dev/null @@ -1,2 +0,0 @@ -set(MINISELECT_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/miniselect/include) -message(STATUS "Using miniselect: ${MINISELECT_INCLUDE_DIR}") diff --git a/cmake/find/msgpack.cmake b/cmake/find/msgpack.cmake deleted file mode 100644 index ac52740c7747..000000000000 --- a/cmake/find/msgpack.cmake +++ /dev/null @@ -1,37 +0,0 @@ -option (ENABLE_MSGPACK "Enable msgpack library" ${ENABLE_LIBRARIES}) - -if(NOT ENABLE_MSGPACK) - if(USE_INTERNAL_MSGPACK_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal msgpack with ENABLE_MSGPACK=OFF") - endif() - return() -endif() - -option (USE_INTERNAL_MSGPACK_LIBRARY "Set to FALSE to use system msgpack library instead of bundled" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include/msgpack.hpp") - if(USE_INTERNAL_MSGPACK_LIBRARY) - message(WARNING "Submodule contrib/msgpack-c is missing. To fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal msgpack") - set(USE_INTERNAL_MSGPACK_LIBRARY 0) - endif() - set(MISSING_INTERNAL_MSGPACK_LIBRARY 1) -endif() - -if(NOT USE_INTERNAL_MSGPACK_LIBRARY) - find_path(MSGPACK_INCLUDE_DIR NAMES msgpack.hpp PATHS ${MSGPACK_INCLUDE_PATHS}) - if(NOT MSGPACK_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system msgpack") - endif() -endif() - -if(NOT MSGPACK_INCLUDE_DIR AND NOT MISSING_INTERNAL_MSGPACK_LIBRARY) - set(MSGPACK_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include") - set(USE_INTERNAL_MSGPACK_LIBRARY 1) -endif() - -if (MSGPACK_INCLUDE_DIR) - set(USE_MSGPACK 1) -endif() - -message(STATUS "Using msgpack=${USE_MSGPACK}: ${MSGPACK_INCLUDE_DIR}") diff --git a/cmake/find/mysqlclient.cmake b/cmake/find/mysqlclient.cmake deleted file mode 100644 index 746775410cbc..000000000000 --- a/cmake/find/mysqlclient.cmake +++ /dev/null @@ -1,78 +0,0 @@ -if(OS_LINUX AND OPENSSL_FOUND) - option(ENABLE_MYSQL "Enable MySQL" ${ENABLE_LIBRARIES}) -else () - option(ENABLE_MYSQL "Enable MySQL" FALSE) -endif () - -if(NOT ENABLE_MYSQL) - if (USE_INTERNAL_MYSQL_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal mysql library with ENABLE_MYSQL=OFF") - endif () - message (STATUS "Build without mysqlclient (support for MYSQL dictionary source will be disabled)") - return() -endif() - -option(USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient library instead of bundled" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mariadb-connector-c/README") - if(USE_INTERNAL_MYSQL_LIBRARY) - message(WARNING "submodule contrib/mariadb-connector-c is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal mysql library") - set(USE_INTERNAL_MYSQL_LIBRARY 0) - endif() - set(MISSING_INTERNAL_MYSQL_LIBRARY 1) -endif() - -if (NOT USE_INTERNAL_MYSQL_LIBRARY) - set (MYSQL_LIB_PATHS - "/usr/local/opt/mysql/lib" - "/usr/local/lib" - "/usr/local/lib64" - "/usr/local/lib/mariadb" # macos brew mariadb-connector-c - "/usr/mysql/lib" - "/usr/mysql/lib64" - "/usr/lib" - "/usr/lib64" - "/lib" - "/lib64") - - set (MYSQL_INCLUDE_PATHS - "/usr/local/opt/mysql/include" - "/usr/mysql/include" - "/usr/local/include" - "/usr/include/mariadb" - "/usr/include/mysql" - "/usr/include") - - find_path (MYSQL_INCLUDE_DIR NAMES mysql.h mysql/mysql.h mariadb/mysql.h PATHS ${MYSQL_INCLUDE_PATHS} PATH_SUFFIXES mysql) - - if (USE_STATIC_LIBRARIES) - find_library (STATIC_MYSQLCLIENT_LIB NAMES mariadbclient mysqlclient PATHS ${MYSQL_LIB_PATHS} PATH_SUFFIXES mysql) - else () - find_library (MYSQLCLIENT_LIBRARIES NAMES mariadb mariadbclient mysqlclient PATHS ${MYSQL_LIB_PATHS} PATH_SUFFIXES mysql) - endif () - - if (MYSQL_INCLUDE_DIR AND (STATIC_MYSQLCLIENT_LIB OR MYSQLCLIENT_LIBRARIES)) - set (USE_MYSQL 1) - set (MYSQLXX_LIBRARY mysqlxx) - if (APPLE) - # /usr/local/include/mysql/mysql_com.h:1011:10: fatal error: mysql/udf_registration_types.h: No such file or directory - set(MYSQL_INCLUDE_DIR ${MYSQL_INCLUDE_DIR} ${MYSQL_INCLUDE_DIR}/mysql) - endif () - else () - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system mysql library") - endif () -endif () - -if (NOT USE_MYSQL AND NOT MISSING_INTERNAL_MYSQL_LIBRARY) - set (MYSQLCLIENT_LIBRARIES mariadbclient) - set (MYSQLXX_LIBRARY mysqlxx) - set (USE_MYSQL 1) - set (USE_INTERNAL_MYSQL_LIBRARY 1) -endif() - -if (USE_MYSQL) - message (STATUS "Using mysqlclient=${USE_MYSQL}: ${MYSQL_INCLUDE_DIR} : ${MYSQLCLIENT_LIBRARIES}; staticlib=${STATIC_MYSQLCLIENT_LIB}") -else () - message (STATUS "Build without mysqlclient (support for MYSQL dictionary source will be disabled)") -endif () diff --git a/cmake/find/nanodbc.cmake b/cmake/find/nanodbc.cmake deleted file mode 100644 index 2fa60e71f55a..000000000000 --- a/cmake/find/nanodbc.cmake +++ /dev/null @@ -1,16 +0,0 @@ -if (NOT ENABLE_ODBC) - return () -endif () - -if (NOT USE_INTERNAL_NANODBC_LIBRARY) - message (FATAL_ERROR "Only the bundled nanodbc library can be used") -endif () - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbc") - message (FATAL_ERROR "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init") -endif() - -set (NANODBC_LIBRARY nanodbc) -set (NANODBC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbc") - -message (STATUS "Using nanodbc: ${NANODBC_INCLUDE_DIR} : ${NANODBC_LIBRARY}") diff --git a/cmake/find/nlp.cmake b/cmake/find/nlp.cmake deleted file mode 100644 index 5c10f2f24e75..000000000000 --- a/cmake/find/nlp.cmake +++ /dev/null @@ -1,32 +0,0 @@ -option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_NLP) - - message (STATUS "NLP functions disabled") - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libstemmer_c/Makefile") - message (WARNING "submodule contrib/libstemmer_c is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libstemmer_c library, NLP functions will be disabled") - set (USE_NLP 0) - return() -endif () - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast/wnb") - message (WARNING "submodule contrib/wordnet-blast is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal wordnet-blast library, NLP functions will be disabled") - set (USE_NLP 0) - return() -endif () - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lemmagen-c/README.md") - message (WARNING "submodule contrib/lemmagen-c is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal lemmagen-c library, NLP functions will be disabled") - set (USE_NLP 0) - return() -endif () - -set (USE_NLP 1) - -message (STATUS "Using Libraries for NLP functions: contrib/wordnet-blast, contrib/libstemmer_c, contrib/lemmagen-c") diff --git a/cmake/find/nuraft.cmake b/cmake/find/nuraft.cmake deleted file mode 100644 index c19f6774e7d6..000000000000 --- a/cmake/find/nuraft.cmake +++ /dev/null @@ -1,24 +0,0 @@ -option(ENABLE_NURAFT "Enable NuRaft" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_NURAFT) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/src") - message (WARNING "submodule contrib/NuRaft is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal NuRaft library") - set (USE_NURAFT 0) - return() -endif () - -if (NOT OS_FREEBSD) - set (USE_NURAFT 1) - set (NURAFT_LIBRARY nuraft) - - set (NURAFT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/include") - - message (STATUS "Using NuRaft=${USE_NURAFT}: ${NURAFT_INCLUDE_DIR} : ${NURAFT_LIBRARY}") -else() - set (USE_NURAFT 0) - message (STATUS "Using internal NuRaft library on FreeBSD and Darwin is not supported") -endif() diff --git a/cmake/find/odbc.cmake b/cmake/find/odbc.cmake deleted file mode 100644 index 2f06cfed9414..000000000000 --- a/cmake/find/odbc.cmake +++ /dev/null @@ -1,55 +0,0 @@ -option (ENABLE_ODBC "Enable ODBC library" ${ENABLE_LIBRARIES}) - -if (NOT OS_LINUX) - if (ENABLE_ODBC) - message(STATUS "ODBC is only supported on Linux") - endif() - set (ENABLE_ODBC OFF CACHE INTERNAL "") -endif () - -if (NOT ENABLE_ODBC) - if (USE_INTERNAL_ODBC_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal ODBC with ENABLE_ODBC=OFF") - endif() - - add_library (unixodbc INTERFACE) - target_compile_definitions (unixodbc INTERFACE USE_ODBC=0) - - message (STATUS "Not using unixodbc") - return() -endif() - -option (USE_INTERNAL_ODBC_LIBRARY "Use internal ODBC library" ON) - -if (NOT USE_INTERNAL_ODBC_LIBRARY) - find_library (LIBRARY_ODBC NAMES unixodbc odbc) - find_path (INCLUDE_ODBC sql.h) - - if(LIBRARY_ODBC AND INCLUDE_ODBC) - add_library (unixodbc INTERFACE) - set_target_properties (unixodbc PROPERTIES INTERFACE_LINK_LIBRARIES ${LIBRARY_ODBC}) - set_target_properties (unixodbc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_ODBC}) - set_target_properties (unixodbc PROPERTIES INTERFACE_COMPILE_DEFINITIONS USE_ODBC=1) - - if (USE_STATIC_LIBRARIES) - find_library(LTDL_LIBRARY ltdl) - if (LTDL_LIBRARY) - target_link_libraries(unixodbc INTERFACE ${LTDL_LIBRARY}) - endif() - endif() - - set(EXTERNAL_ODBC_LIBRARY_FOUND 1) - message (STATUS "Found odbc: ${LIBRARY_ODBC}") - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system ODBC library") - set(EXTERNAL_ODBC_LIBRARY_FOUND 0) - endif() -endif() - -if (NOT EXTERNAL_ODBC_LIBRARY_FOUND) - set (USE_INTERNAL_ODBC_LIBRARY 1) -endif () - -set (USE_INTERNAL_NANODBC_LIBRARY 1) - -message (STATUS "Using unixodbc") diff --git a/cmake/find/orc.cmake b/cmake/find/orc.cmake deleted file mode 100644 index a5c3f57468ae..000000000000 --- a/cmake/find/orc.cmake +++ /dev/null @@ -1,57 +0,0 @@ -option (ENABLE_ORC "Enable ORC" ${ENABLE_LIBRARIES}) - -if(NOT ENABLE_ORC) - if(USE_INTERNAL_ORC_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal ORC library with ENABLE_ORC=OFF") - endif() - return() -endif() - -if (USE_INTERNAL_PARQUET_LIBRARY) - option(USE_INTERNAL_ORC_LIBRARY "Set to FALSE to use system ORC instead of bundled (experimental set to OFF on your own risk)" - ON) -elseif(USE_INTERNAL_ORC_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Currently internal ORC can be build only with bundled Parquet") -endif() - -include(cmake/find/snappy.cmake) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include/orc/OrcFile.hh") - if(USE_INTERNAL_ORC_LIBRARY) - message(WARNING "submodule contrib/orc is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ORC") - set(USE_INTERNAL_ORC_LIBRARY 0) - endif() - set(MISSING_INTERNAL_ORC_LIBRARY 1) -endif () - -if (NOT USE_INTERNAL_ORC_LIBRARY) - find_package(orc) - if (NOT ORC_LIBRARY OR NOT ORC_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system ORC") - endif () -endif () - -#if (USE_INTERNAL_ORC_LIBRARY) -#find_path(CYRUS_SASL_INCLUDE_DIR sasl/sasl.h) -#find_library(CYRUS_SASL_SHARED_LIB sasl2) -#if (NOT CYRUS_SASL_INCLUDE_DIR OR NOT CYRUS_SASL_SHARED_LIB) -# set(USE_ORC 0) -#endif() -#endif() - -if (ORC_LIBRARY AND ORC_INCLUDE_DIR) - set(USE_ORC 1) -elseif(NOT MISSING_INTERNAL_ORC_LIBRARY AND ARROW_LIBRARY AND SNAPPY_LIBRARY) # (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY) - set(ORC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include") - set(ORC_LIBRARY orc) - set(USE_ORC 1) - set(USE_INTERNAL_ORC_LIBRARY 1) -else() - message (${RECONFIGURE_MESSAGE_LEVEL} - "Can't enable ORC support - missing dependencies. Missing internal orc=${MISSING_INTERNAL_ORC_LIBRARY}. " - "arrow=${ARROW_LIBRARY} snappy=${SNAPPY_LIBRARY}") - set(USE_INTERNAL_ORC_LIBRARY 0) -endif() - -message (STATUS "Using internal=${USE_INTERNAL_ORC_LIBRARY} orc=${USE_ORC}: ${ORC_INCLUDE_DIR} : ${ORC_LIBRARY}") diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake deleted file mode 100644 index 48c2bb7babb3..000000000000 --- a/cmake/find/parquet.cmake +++ /dev/null @@ -1,168 +0,0 @@ -if (Protobuf_PROTOC_EXECUTABLE) - option (ENABLE_PARQUET "Enable parquet" ${ENABLE_LIBRARIES}) -elseif(ENABLE_PARQUET OR USE_INTERNAL_PARQUET_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use parquet without protoc executable") -endif() - -if (NOT ENABLE_PARQUET) - if(USE_INTERNAL_PARQUET_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal parquet with ENABLE_PARQUET=OFF") - endif() - message(STATUS "Building without Parquet support") - return() -endif() - -if (NOT OS_FREEBSD) # Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory - option(USE_INTERNAL_PARQUET_LIBRARY "Set to FALSE to use system parquet library instead of bundled" ON) -elseif(USE_INTERNAL_PARQUET_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Using internal parquet is not supported on freebsd") -endif() - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/CMakeLists.txt") - if(USE_INTERNAL_PARQUET_LIBRARY) - message(WARNING "submodule contrib/arrow (required for Parquet) is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal parquet library") - set(USE_INTERNAL_PARQUET_LIBRARY 0) - endif() - set(MISSING_INTERNAL_PARQUET_LIBRARY 1) -endif() - -if (NOT SNAPPY_LIBRARY) - include(cmake/find/snappy.cmake) -endif() - -if(NOT USE_INTERNAL_PARQUET_LIBRARY) - find_package(Arrow) - find_package(Parquet) - find_library(UTF8_PROC_LIBRARY utf8proc) - find_package(BZip2) - - if(USE_STATIC_LIBRARIES) - find_library(ARROW_DEPS_LIBRARY arrow_bundled_dependencies) - - if (ARROW_DEPS_LIBRARY) - set(ARROW_IMPORT_OBJ_DIR "${CMAKE_CURRENT_BINARY_DIR}/contrib/arrow-cmake/imported-objects") - set(ARROW_OTHER_OBJS - "${ARROW_IMPORT_OBJ_DIR}/jemalloc.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/arena.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/background_thread.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/base.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/bin.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/bitmap.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/ckh.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/ctl.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/div.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/extent.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/extent_dss.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/extent_mmap.pic.o" - # skip hash - "${ARROW_IMPORT_OBJ_DIR}/hook.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/large.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/log.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/malloc_io.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/mutex.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/mutex_pool.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/nstime.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/pages.pic.o" - # skip prng - "${ARROW_IMPORT_OBJ_DIR}/prof.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/rtree.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/stats.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/sc.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/sz.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/tcache.pic.o" - # skip ticker - "${ARROW_IMPORT_OBJ_DIR}/tsd.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/test_hooks.pic.o" - "${ARROW_IMPORT_OBJ_DIR}/witness.pic.o" - ) - add_custom_command(OUTPUT ${ARROW_OTHER_OBJS} - COMMAND - mkdir -p "${ARROW_IMPORT_OBJ_DIR}" && - cd "${ARROW_IMPORT_OBJ_DIR}" && - "${CMAKE_AR}" x "${ARROW_DEPS_LIBRARY}" - ) - set_source_files_properties(jemalloc.pic.o PROPERTIES EXTERNAL_OBJECT true GENERATED true) - add_library(imported_arrow_deps STATIC ${ARROW_OTHER_OBJS}) - - set(ARROW_LIBRARY ${ARROW_STATIC_LIB} - imported_arrow_deps ${THRIFT_LIBRARY} ${UTF8_PROC_LIBRARY} ${BZIP2_LIBRARIES} ${SNAPPY_LIBRARY}) - else() - message(WARNING "Using external static Arrow does not always work. " - "Could not find arrow_bundled_dependencies.a. If compilation fails, " - "Try: -D\"USE_INTERNAL_PARQUET_LIBRARY\"=ON or -D\"ENABLE_PARQUET\"=OFF or " - "-D\"USE_STATIC_LIBRARIES\"=OFF") - set(ARROW_LIBRARY ${ARROW_STATIC_LIB}) - endif() - set(PARQUET_LIBRARY ${PARQUET_STATIC_LIB}) - else() - set(ARROW_LIBRARY ${ARROW_SHARED_LIB}) - set(PARQUET_LIBRARY ${PARQUET_SHARED_LIB}) - endif() - - if(ARROW_INCLUDE_DIR AND ARROW_LIBRARY AND PARQUET_INCLUDE_DIR AND PARQUET_LIBRARY AND THRIFT_LIBRARY AND UTF8_PROC_LIBRARY AND BZIP2_FOUND) - set(USE_PARQUET 1) - set(EXTERNAL_PARQUET_FOUND 1) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} - "Can't find system parquet: arrow=${ARROW_INCLUDE_DIR}:${ARROW_LIBRARY} ;" - " parquet=${PARQUET_INCLUDE_DIR}:${PARQUET_LIBRARY} ;" - " thrift=${THRIFT_LIBRARY} ;") - set(EXTERNAL_PARQUET_FOUND 0) - endif() -endif() - -if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD) - if(SNAPPY_LIBRARY) - set(CAN_USE_INTERNAL_PARQUET_LIBRARY 1) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal parquet library without snappy") - endif() - - include(CheckCXXSourceCompiles) - if(NOT USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY) - set(CMAKE_REQUIRED_LIBRARIES ${DOUBLE_CONVERSION_LIBRARIES}) - set(CMAKE_REQUIRED_INCLUDES ${DOUBLE_CONVERSION_INCLUDE_DIR}) - check_cxx_source_compiles(" - #include - int main() { static const int flags_ = double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY; return 0;} - " HAVE_DOUBLE_CONVERSION_ALLOW_CASE_INSENSIBILITY) - - if(NOT HAVE_DOUBLE_CONVERSION_ALLOW_CASE_INSENSIBILITY) # HAVE_STD_RANDOM_SHUFFLE - message (${RECONFIGURE_MESSAGE_LEVEL} "Disabling internal parquet library because arrow is broken (can't use old double_conversion)") - set(CAN_USE_INTERNAL_PARQUET_LIBRARY 0) - endif() - endif() - - if(NOT CAN_USE_INTERNAL_PARQUET_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal parquet") - set(USE_INTERNAL_PARQUET_LIBRARY 0) - else() - set(USE_INTERNAL_PARQUET_LIBRARY 1) - - if(MAKE_STATIC_LIBRARIES) - set(FLATBUFFERS_LIBRARY flatbuffers) - set(ARROW_LIBRARY arrow_static) - set(PARQUET_LIBRARY parquet_static) - else() - set(FLATBUFFERS_LIBRARY flatbuffers_shared) - set(ARROW_LIBRARY arrow_shared) - set(PARQUET_LIBRARY parquet_shared) - endif() - - set(USE_PARQUET 1) - set(USE_ORC 1) - set(USE_ARROW 1) - endif() -elseif(OS_FREEBSD) - message (${RECONFIGURE_MESSAGE_LEVEL} "Using internal parquet library on FreeBSD is not supported") -endif() - -if(USE_PARQUET) - message(STATUS "Using Parquet: arrow=${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ;" - " parquet=${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ;" - " thrift=${THRIFT_LIBRARY} ;" - " flatbuffers=${FLATBUFFERS_LIBRARY}") -else() - message(STATUS "Building without Parquet support") -endif() diff --git a/cmake/find/pdqsort.cmake b/cmake/find/pdqsort.cmake deleted file mode 100644 index 51461044cf90..000000000000 --- a/cmake/find/pdqsort.cmake +++ /dev/null @@ -1,2 +0,0 @@ -set(PDQSORT_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/pdqsort) -message(STATUS "Using pdqsort: ${PDQSORT_INCLUDE_DIR}") diff --git a/cmake/find/poco.cmake b/cmake/find/poco.cmake deleted file mode 100644 index 992337281523..000000000000 --- a/cmake/find/poco.cmake +++ /dev/null @@ -1,8 +0,0 @@ -option (USE_INTERNAL_POCO_LIBRARY "Use internal Poco library" ON) - -if (NOT USE_INTERNAL_POCO_LIBRARY) - find_path (ROOT_DIR NAMES Foundation/include/Poco/Poco.h include/Poco/Poco.h) - if (NOT ROOT_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system poco") - endif() -endif () diff --git a/cmake/find/protobuf.cmake b/cmake/find/protobuf.cmake deleted file mode 100644 index a2ea8ae87fc2..000000000000 --- a/cmake/find/protobuf.cmake +++ /dev/null @@ -1,62 +0,0 @@ -option(ENABLE_PROTOBUF "Enable protobuf" ${ENABLE_LIBRARIES}) - -if(NOT ENABLE_PROTOBUF) - if(USE_INTERNAL_PROTOBUF_LIBRARY) - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf with ENABLE_PROTOBUF=OFF") - endif() - return() -endif() - -# Normally we use the internal protobuf library. -# You can set USE_INTERNAL_PROTOBUF_LIBRARY to OFF to force using the external protobuf library, which should be installed in the system in this case. -# The external protobuf library can be installed in the system by running -# sudo apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev -option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled. (Experimental. Set to OFF on your own risk)" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt") - if(USE_INTERNAL_PROTOBUF_LIBRARY) - message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init") - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf") - set(USE_INTERNAL_PROTOBUF_LIBRARY 0) - endif() - set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1) -endif() - -if(NOT USE_INTERNAL_PROTOBUF_LIBRARY) - find_package(Protobuf) - if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY) - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system protobuf library") - set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 0) - elseif(NOT Protobuf_PROTOC_EXECUTABLE) - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system protobuf compiler") - set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 0) - else() - set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 1) - set(USE_PROTOBUF 1) - endif() -endif() - -if(NOT EXTERNAL_PROTOBUF_LIBRARY_FOUND AND NOT MISSING_INTERNAL_PROTOBUF_LIBRARY) - set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") - set(Protobuf_LIBRARY libprotobuf) - set(Protobuf_PROTOC_EXECUTABLE "$") - set(Protobuf_PROTOC_LIBRARY libprotoc) - - include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake") - - set(USE_INTERNAL_PROTOBUF_LIBRARY 1) - set(USE_PROTOBUF 1) -endif() - -if(OS_FREEBSD AND SANITIZE STREQUAL "address") - # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found - # #include - if(LLVM_INCLUDE_DIRS) - set(Protobuf_INCLUDE_DIR "${Protobuf_INCLUDE_DIR}" ${LLVM_INCLUDE_DIRS}) - else() - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use protobuf on FreeBSD with address sanitizer without LLVM") - set(USE_PROTOBUF 0) - endif() -endif() - -message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY} : ${Protobuf_PROTOC_EXECUTABLE} : ${Protobuf_PROTOC_LIBRARY}") diff --git a/cmake/find/rapidjson.cmake b/cmake/find/rapidjson.cmake deleted file mode 100644 index cdf6761446eb..000000000000 --- a/cmake/find/rapidjson.cmake +++ /dev/null @@ -1,35 +0,0 @@ -option(ENABLE_RAPIDJSON "Use rapidjson" ${ENABLE_LIBRARIES}) -if(NOT ENABLE_RAPIDJSON) - if(USE_INTERNAL_RAPIDJSON_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal rapidjson library with ENABLE_RAPIDJSON=OFF") - endif() - return() -endif() - -option(USE_INTERNAL_RAPIDJSON_LIBRARY "Set to FALSE to use system rapidjson library instead of bundled" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include/rapidjson/rapidjson.h") - if(USE_INTERNAL_RAPIDJSON_LIBRARY) - message(WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal rapidjson library") - set(USE_INTERNAL_RAPIDJSON_LIBRARY 0) - endif() - set(MISSING_INTERNAL_RAPIDJSON_LIBRARY 1) -endif() - -if(NOT USE_INTERNAL_RAPIDJSON_LIBRARY) - find_path(RAPIDJSON_INCLUDE_DIR NAMES rapidjson/rapidjson.h PATHS ${RAPIDJSON_INCLUDE_PATHS}) - if(NOT RAPIDJSON_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system rapidjson") - endif() -endif() - -if(RAPIDJSON_INCLUDE_DIR) - set(USE_RAPIDJSON 1) -elseif(NOT MISSING_INTERNAL_RAPIDJSON_LIBRARY) - set(RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include") - set(USE_INTERNAL_RAPIDJSON_LIBRARY 1) - set(USE_RAPIDJSON 1) -endif() - -message(STATUS "Using rapidjson=${USE_RAPIDJSON}: ${RAPIDJSON_INCLUDE_DIR}") diff --git a/cmake/find/rdkafka.cmake b/cmake/find/rdkafka.cmake deleted file mode 100644 index cad267bacffc..000000000000 --- a/cmake/find/rdkafka.cmake +++ /dev/null @@ -1,68 +0,0 @@ -option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_RDKAFKA) - if (USE_INTERNAL_RDKAFKA_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal librdkafka with ENABLE_RDKAFKA=OFF") - endif() - return() -endif() - -option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cppkafka/src") - if(USE_INTERNAL_RDKAFKA_LIBRARY) - message (WARNING "submodule contrib/cppkafka is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal cppkafka") - set (USE_INTERNAL_RDKAFKA_LIBRARY 0) - endif() - set (MISSING_INTERNAL_CPPKAFKA_LIBRARY 1) -endif () - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src") - if(USE_INTERNAL_RDKAFKA_LIBRARY OR MISSING_INTERNAL_CPPKAFKA_LIBRARY) - message (WARNING "submodule contrib/librdkafka is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal rdkafka") - set (USE_INTERNAL_RDKAFKA_LIBRARY 0) - endif() - set (MISSING_INTERNAL_RDKAFKA_LIBRARY 1) -endif () - -if (NOT USE_INTERNAL_RDKAFKA_LIBRARY) - find_library (RDKAFKA_LIB rdkafka) - find_path (RDKAFKA_INCLUDE_DIR NAMES librdkafka/rdkafka.h PATHS ${RDKAFKA_INCLUDE_PATHS}) - if (NOT RDKAFKA_LIB OR NOT RDKAFKA_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system librdkafka") - endif() - - if (USE_STATIC_LIBRARIES AND NOT OS_FREEBSD) - find_library (SASL2_LIBRARY sasl2) - if (NOT SASL2_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system sasl2 library needed for static librdkafka") - endif() - endif () - set (CPPKAFKA_LIBRARY cppkafka) -endif () - -if (RDKAFKA_LIB AND RDKAFKA_INCLUDE_DIR) - set (USE_RDKAFKA 1) - add_library (rdkafka_imp UNKNOWN IMPORTED) - set_target_properties (rdkafka_imp PROPERTIES IMPORTED_LOCATION ${RDKAFKA_LIB}) - set_target_properties (rdkafka_imp PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${RDKAFKA_INCLUDE_DIR}) - - set (RDKAFKA_LIBRARY rdkafka_imp ${OPENSSL_LIBRARIES}) - set (CPPKAFKA_LIBRARY cppkafka) - if (SASL2_LIBRARY) - list (APPEND RDKAFKA_LIBRARY ${SASL2_LIBRARY}) - endif () - if (LZ4_LIBRARY) - list (APPEND RDKAFKA_LIBRARY ${LZ4_LIBRARY}) - endif () -elseif (NOT MISSING_INTERNAL_RDKAFKA_LIBRARY AND NOT MISSING_INTERNAL_CPPKAFKA_LIBRARY) - set (USE_INTERNAL_RDKAFKA_LIBRARY 1) - set (RDKAFKA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src") - set (RDKAFKA_LIBRARY rdkafka) - set (CPPKAFKA_LIBRARY cppkafka) - set (USE_RDKAFKA 1) -endif () - -message (STATUS "Using librdkafka=${USE_RDKAFKA}: ${RDKAFKA_INCLUDE_DIR} : ${RDKAFKA_LIBRARY} ${CPPKAFKA_LIBRARY}") diff --git a/cmake/find/re2.cmake b/cmake/find/re2.cmake deleted file mode 100644 index ed5c72d13fae..000000000000 --- a/cmake/find/re2.cmake +++ /dev/null @@ -1,41 +0,0 @@ -option (USE_INTERNAL_RE2_LIBRARY "Set to FALSE to use system re2 library instead of bundled [slower]" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/re2/re2") - if(USE_INTERNAL_RE2_LIBRARY) - message(WARNING "submodule contrib/re2 is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal re2 library") - endif() - set(USE_INTERNAL_RE2_LIBRARY 0) - set(MISSING_INTERNAL_RE2_LIBRARY 1) -endif() - -if (NOT USE_INTERNAL_RE2_LIBRARY) - find_library (RE2_LIBRARY re2) - find_path (RE2_INCLUDE_DIR NAMES re2/re2.h PATHS ${RE2_INCLUDE_PATHS}) - if (NOT RE2_LIBRARY OR NOT RE2_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system re2 library") - endif () -endif () - -string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space) -if(_have_space GREATER 0) - message(WARNING "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.") - set (MISSING_INTERNAL_RE2_ST_LIBRARY 1) -endif() - -if (RE2_LIBRARY AND RE2_INCLUDE_DIR) - set (RE2_ST_LIBRARY ${RE2_LIBRARY}) -elseif (NOT MISSING_INTERNAL_RE2_LIBRARY) - set (USE_INTERNAL_RE2_LIBRARY 1) - set (RE2_LIBRARY re2) - set (RE2_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/re2") - if (NOT MISSING_INTERNAL_RE2_ST_LIBRARY) - set (RE2_ST_LIBRARY re2_st) - set (USE_RE2_ST 1) - else () - set (RE2_ST_LIBRARY ${RE2_LIBRARY}) - message (${RECONFIGURE_MESSAGE_LEVEL} "Using internal re2 library instead of re2_st") - endif () -endif () - -message (STATUS "Using re2: ${RE2_INCLUDE_DIR} : ${RE2_LIBRARY}; ${RE2_ST_INCLUDE_DIR} : ${RE2_ST_LIBRARY}") diff --git a/cmake/find/rocksdb.cmake b/cmake/find/rocksdb.cmake deleted file mode 100644 index 10592d1d037a..000000000000 --- a/cmake/find/rocksdb.cmake +++ /dev/null @@ -1,71 +0,0 @@ -if (OS_DARWIN AND ARCH_AARCH64) - set (ENABLE_ROCKSDB OFF CACHE INTERNAL "") -endif() - -option(ENABLE_ROCKSDB "Enable ROCKSDB" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_ROCKSDB) - if (USE_INTERNAL_ROCKSDB_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal rocksdb library with ENABLE_ROCKSDB=OFF") - endif() - return() -endif() - -option(USE_INTERNAL_ROCKSDB_LIBRARY "Set to FALSE to use system ROCKSDB library instead of bundled" ON) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rocksdb/include") - if (USE_INTERNAL_ROCKSDB_LIBRARY) - message (WARNING "submodule contrib is missing. to fix try run: \n git submodule update --init") - message(${RECONFIGURE_MESSAGE_LEVEL} "cannot find internal rocksdb") - endif() - set (MISSING_INTERNAL_ROCKSDB 1) -endif () - -if (NOT USE_INTERNAL_ROCKSDB_LIBRARY) - find_library (ROCKSDB_LIBRARY rocksdb) - find_path (ROCKSDB_INCLUDE_DIR NAMES rocksdb/db.h PATHS ${ROCKSDB_INCLUDE_PATHS}) - if (NOT ROCKSDB_LIBRARY OR NOT ROCKSDB_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system rocksdb library") - endif() - - if (NOT SNAPPY_LIBRARY) - include(cmake/find/snappy.cmake) - endif() - if (NOT ZLIB_LIBRARY) - include(cmake/find/zlib.cmake) - endif() - - find_package(BZip2) - find_library(ZSTD_LIBRARY zstd) - find_library(LZ4_LIBRARY lz4) - find_library(GFLAGS_LIBRARY gflags) - - if(SNAPPY_LIBRARY AND ZLIB_LIBRARY AND LZ4_LIBRARY AND BZIP2_FOUND AND ZSTD_LIBRARY AND GFLAGS_LIBRARY) - list (APPEND ROCKSDB_LIBRARY ${SNAPPY_LIBRARY}) - list (APPEND ROCKSDB_LIBRARY ${ZLIB_LIBRARY}) - list (APPEND ROCKSDB_LIBRARY ${LZ4_LIBRARY}) - list (APPEND ROCKSDB_LIBRARY ${BZIP2_LIBRARY}) - list (APPEND ROCKSDB_LIBRARY ${ZSTD_LIBRARY}) - list (APPEND ROCKSDB_LIBRARY ${GFLAGS_LIBRARY}) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} - "Can't find system rocksdb: snappy=${SNAPPY_LIBRARY} ;" - " zlib=${ZLIB_LIBRARY} ;" - " lz4=${LZ4_LIBRARY} ;" - " bz2=${BZIP2_LIBRARY} ;" - " zstd=${ZSTD_LIBRARY} ;" - " gflags=${GFLAGS_LIBRARY} ;") - endif() -endif () - -if(ROCKSDB_LIBRARY AND ROCKSDB_INCLUDE_DIR) - set(USE_ROCKSDB 1) -elseif (NOT MISSING_INTERNAL_ROCKSDB) - set (USE_INTERNAL_ROCKSDB_LIBRARY 1) - - set (ROCKSDB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb/include") - set (ROCKSDB_LIBRARY "rocksdb") - set (USE_ROCKSDB 1) -endif () - -message (STATUS "Using ROCKSDB=${USE_ROCKSDB}: ${ROCKSDB_INCLUDE_DIR} : ${ROCKSDB_LIBRARY}") diff --git a/cmake/find/s2geometry.cmake b/cmake/find/s2geometry.cmake deleted file mode 100644 index 348805b342eb..000000000000 --- a/cmake/find/s2geometry.cmake +++ /dev/null @@ -1,24 +0,0 @@ - -option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) - -if (ENABLE_S2_GEOMETRY) - if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/s2geometry") - message (WARNING "submodule contrib/s2geometry is missing. to fix try run: \n git submodule update --init") - set (ENABLE_S2_GEOMETRY 0) - set (USE_S2_GEOMETRY 0) - else() - if (OPENSSL_FOUND) - set (S2_GEOMETRY_LIBRARY s2) - set (S2_GEOMETRY_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src/s2) - set (USE_S2_GEOMETRY 1) - else() - message (WARNING "S2 uses OpenSSL, but the latter is absent.") - endif() - endif() - - if (NOT USE_S2_GEOMETRY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable S2 geometry library") - endif() -endif() - -message (STATUS "Using s2geometry=${USE_S2_GEOMETRY} : ${S2_GEOMETRY_INCLUDE_DIR}") diff --git a/cmake/find/s3.cmake b/cmake/find/s3.cmake deleted file mode 100644 index 9a10c3f13efe..000000000000 --- a/cmake/find/s3.cmake +++ /dev/null @@ -1,45 +0,0 @@ -if(NOT OS_FREEBSD) - option(ENABLE_S3 "Enable S3" ${ENABLE_LIBRARIES}) -elseif(ENABLE_S3 OR USE_INTERNAL_AWS_S3_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use S3 on FreeBSD") -endif() - -if(NOT ENABLE_S3) - if(USE_INTERNAL_AWS_S3_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal S3 library with ENABLE_S3=OFF") - endif() - return() -endif() - -option(USE_INTERNAL_AWS_S3_LIBRARY "Set to FALSE to use system S3 instead of bundled (experimental set to OFF on your own risk)" - ON) - -if (NOT USE_INTERNAL_POCO_LIBRARY AND USE_INTERNAL_AWS_S3_LIBRARY) - message (FATAL_ERROR "Currently S3 support can be built only with internal POCO library") -endif() - -if (NOT USE_INTERNAL_AWS_S3_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Compilation with external S3 library is not supported yet") -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3") - message (WARNING "submodule contrib/aws is missing. to fix try run: \n git submodule update --init") - if (USE_INTERNAL_AWS_S3_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal S3 library") - endif () - set (MISSING_AWS_S3 1) -endif () - -if (USE_INTERNAL_AWS_S3_LIBRARY AND NOT MISSING_AWS_S3) - set(AWS_S3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3/include") - set(AWS_S3_CORE_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-core/include") - set(AWS_S3_LIBRARY aws_s3) - set(USE_INTERNAL_AWS_S3_LIBRARY 1) - set(USE_AWS_S3 1) -else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable S3") - set(USE_INTERNAL_AWS_S3_LIBRARY 0) - set(USE_AWS_S3 0) -endif () - -message (STATUS "Using aws_s3=${USE_AWS_S3}: ${AWS_S3_INCLUDE_DIR} : ${AWS_S3_LIBRARY}") diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake deleted file mode 100644 index e08cbad17295..000000000000 --- a/cmake/find/sentry.cmake +++ /dev/null @@ -1,23 +0,0 @@ -set (SENTRY_LIBRARY "sentry") - -set (SENTRY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native/include") -if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") - message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init") - if (USE_SENTRY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal sentry library") - endif() - return() -endif () - -if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) - option (USE_SENTRY "Use Sentry" ${ENABLE_LIBRARIES}) - set (SENTRY_TRANSPORT "curl" CACHE STRING "") - set (SENTRY_BACKEND "none" CACHE STRING "") - set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") - set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") - set (SENTRY_PIC OFF CACHE BOOL "") - set (BUILD_SHARED_LIBS OFF) - message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") -elseif (USE_SENTRY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Sentry is not supported in current configuration") -endif () diff --git a/cmake/find/simdjson.cmake b/cmake/find/simdjson.cmake deleted file mode 100644 index bf22a331f04c..000000000000 --- a/cmake/find/simdjson.cmake +++ /dev/null @@ -1,11 +0,0 @@ -option (USE_SIMDJSON "Use simdjson" ${ENABLE_LIBRARIES}) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson.h") - message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init") - if (USE_SIMDJSON) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal simdjson library") - endif() - return() -endif () - -message(STATUS "Using simdjson=${USE_SIMDJSON}") diff --git a/cmake/find/snappy.cmake b/cmake/find/snappy.cmake deleted file mode 100644 index 245b3a9a2ff5..000000000000 --- a/cmake/find/snappy.cmake +++ /dev/null @@ -1,21 +0,0 @@ -option(USE_SNAPPY "Enable snappy library" ON) - -if(NOT USE_SNAPPY) - if (USE_INTERNAL_SNAPPY_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal snappy library with USE_SNAPPY=OFF") - endif() - return() -endif() - -option (USE_INTERNAL_SNAPPY_LIBRARY "Set to FALSE to use system snappy library instead of bundled" ON) - -if(NOT USE_INTERNAL_SNAPPY_LIBRARY) - find_library(SNAPPY_LIBRARY snappy) - if (NOT SNAPPY_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system snappy library") - endif() -else () - set(SNAPPY_LIBRARY snappy) -endif() - -message (STATUS "Using snappy: ${SNAPPY_LIBRARY}") diff --git a/cmake/find/sparsehash.cmake b/cmake/find/sparsehash.cmake deleted file mode 100644 index f258f6c1c5b0..000000000000 --- a/cmake/find/sparsehash.cmake +++ /dev/null @@ -1,17 +0,0 @@ -option (USE_INTERNAL_SPARSEHASH_LIBRARY "Set to FALSE to use system sparsehash library instead of bundled" - ON) # ON by default as we are not aware of any system providing package for sparsehash-c11 - -if (NOT USE_INTERNAL_SPARSEHASH_LIBRARY) - find_path (SPARSEHASH_INCLUDE_DIR NAMES sparsehash/sparse_hash_map PATHS ${SPARSEHASH_INCLUDE_PATHS}) - if (NOT SPARSEHASH_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system sparsehash library") - endif () -endif () - -if (SPARSEHASH_INCLUDE_DIR) -else () - set (USE_INTERNAL_SPARSEHASH_LIBRARY 1) - set (SPARSEHASH_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sparsehash-c11") -endif () - -message (STATUS "Using sparsehash: ${SPARSEHASH_INCLUDE_DIR}") diff --git a/cmake/find/sqlite.cmake b/cmake/find/sqlite.cmake deleted file mode 100644 index 083a9faea592..000000000000 --- a/cmake/find/sqlite.cmake +++ /dev/null @@ -1,16 +0,0 @@ -option(ENABLE_SQLITE "Enable sqlite" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_SQLITE) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/sqlite-amalgamation/sqlite3.c") - message (WARNING "submodule contrib/sqlite3-amalgamation is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal sqlite library") - set (USE_SQLITE 0) - return() -endif() - -set (USE_SQLITE 1) -set (SQLITE_LIBRARY sqlite) -message (STATUS "Using sqlite=${USE_SQLITE}") diff --git a/cmake/find/ssl.cmake b/cmake/find/ssl.cmake deleted file mode 100644 index 1ac6a54ed205..000000000000 --- a/cmake/find/ssl.cmake +++ /dev/null @@ -1,133 +0,0 @@ -# Needed when securely connecting to an external server, e.g. -# clickhouse-client --host ... --secure -option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES}) - -if(NOT ENABLE_SSL) - if (USE_INTERNAL_SSL_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal ssl library with ENABLE_SSL=OFF") - endif() - return() -endif() - -option(USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead of bundled" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boringssl/README.md") - if(USE_INTERNAL_SSL_LIBRARY) - message(WARNING "submodule contrib/boringssl is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ssl library") - endif() - set(USE_INTERNAL_SSL_LIBRARY 0) - set(MISSING_INTERNAL_SSL_LIBRARY 1) -endif() - -set (OPENSSL_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES}) - -if (NOT USE_INTERNAL_SSL_LIBRARY) - if (APPLE) - set (OPENSSL_ROOT_DIR "/usr/local/opt/openssl" CACHE INTERNAL "") - # https://rt.openssl.org/Ticket/Display.html?user=guest&pass=guest&id=2232 - if (USE_STATIC_LIBRARIES) - message(WARNING "Disable USE_STATIC_LIBRARIES if you have linking problems with OpenSSL on MacOS") - endif () - endif () - find_package (OpenSSL) - - if (NOT OPENSSL_FOUND) - # Try to find manually. - set (OPENSSL_INCLUDE_PATHS "/usr/local/opt/openssl/include") - set (OPENSSL_PATHS "/usr/local/opt/openssl/lib") - find_path (OPENSSL_INCLUDE_DIR NAMES openssl/ssl.h PATHS ${OPENSSL_INCLUDE_PATHS}) - find_library (OPENSSL_SSL_LIBRARY ssl PATHS ${OPENSSL_PATHS}) - find_library (OPENSSL_CRYPTO_LIBRARY crypto PATHS ${OPENSSL_PATHS}) - if (OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY AND OPENSSL_INCLUDE_DIR) - set (OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) - set (OPENSSL_FOUND 1) - endif () - endif () - - if (NOT OPENSSL_FOUND) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system ssl") - endif() -endif () - -if (NOT OPENSSL_FOUND AND NOT MISSING_INTERNAL_SSL_LIBRARY) - set (USE_INTERNAL_SSL_LIBRARY 1) - set (OPENSSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/boringssl") - set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include") - set (OPENSSL_CRYPTO_LIBRARY crypto) - set (OPENSSL_SSL_LIBRARY ssl) - set (OPENSSL_FOUND 1) - set (OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) -endif () - -if(OPENSSL_FOUND) - # we need keep OPENSSL_FOUND for many libs in contrib - set(USE_SSL 1) -endif() - -# used by new poco -# part from /usr/share/cmake-*/Modules/FindOpenSSL.cmake, with removed all "EXISTS " -if(OPENSSL_FOUND AND NOT USE_INTERNAL_SSL_LIBRARY) - if(NOT TARGET OpenSSL::Crypto AND - (OPENSSL_CRYPTO_LIBRARY OR - LIB_EAY_LIBRARY_DEBUG OR - LIB_EAY_LIBRARY_RELEASE) - ) - add_library(OpenSSL::Crypto UNKNOWN IMPORTED) - set_target_properties(OpenSSL::Crypto PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}") - if(OPENSSL_CRYPTO_LIBRARY) - set_target_properties(OpenSSL::Crypto PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES "C" - IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}") - endif() - if(LIB_EAY_LIBRARY_RELEASE) - set_property(TARGET OpenSSL::Crypto APPEND PROPERTY - IMPORTED_CONFIGURATIONS RELEASE) - set_target_properties(OpenSSL::Crypto PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C" - IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}") - endif() - if(LIB_EAY_LIBRARY_DEBUG) - set_property(TARGET OpenSSL::Crypto APPEND PROPERTY - IMPORTED_CONFIGURATIONS DEBUG) - set_target_properties(OpenSSL::Crypto PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C" - IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}") - endif() - endif() - if(NOT TARGET OpenSSL::SSL AND - (OPENSSL_SSL_LIBRARY OR - SSL_EAY_LIBRARY_DEBUG OR - SSL_EAY_LIBRARY_RELEASE) - ) - add_library(OpenSSL::SSL UNKNOWN IMPORTED) - set_target_properties(OpenSSL::SSL PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}") - if(OPENSSL_SSL_LIBRARY) - set_target_properties(OpenSSL::SSL PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES "C" - IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}") - endif() - if(SSL_EAY_LIBRARY_RELEASE) - set_property(TARGET OpenSSL::SSL APPEND PROPERTY - IMPORTED_CONFIGURATIONS RELEASE) - set_target_properties(OpenSSL::SSL PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C" - IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}") - endif() - if(SSL_EAY_LIBRARY_DEBUG) - set_property(TARGET OpenSSL::SSL APPEND PROPERTY - IMPORTED_CONFIGURATIONS DEBUG) - set_target_properties(OpenSSL::SSL PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C" - IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}") - endif() - if(TARGET OpenSSL::Crypto) - set_target_properties(OpenSSL::SSL PROPERTIES - INTERFACE_LINK_LIBRARIES OpenSSL::Crypto) - endif() - endif() -endif() - -message (STATUS "Using ssl=${USE_SSL}: ${OPENSSL_INCLUDE_DIR} : ${OPENSSL_LIBRARIES}") diff --git a/cmake/find/thrift.cmake b/cmake/find/thrift.cmake deleted file mode 100644 index 08eeb60915e8..000000000000 --- a/cmake/find/thrift.cmake +++ /dev/null @@ -1,34 +0,0 @@ -option(ENABLE_THRIFT "Enable Thrift" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_THRIFT) - message (STATUS "thrift disabled") - set(USE_INTERNAL_THRIFT_LIBRARY 0) - return() -endif() - -option(USE_INTERNAL_THRIFT_LIBRARY "Set to FALSE to use system thrift library instead of bundled" ON) -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/thrift") - if (USE_INTERNAL_THRIFT_LIBRARY) - message (WARNING "submodule contrib/thrift is missing. to fix try run: \n git submodule update --init --recursive") - set(USE_INTERNAL_THRIFT_LIBRARY 0) - endif () -endif() - -if (USE_INTERNAL_THRIFT_LIBRARY) - if (MAKE_STATIC_LIBRARIES) - set(THRIFT_LIBRARY thrift_static) - else() - set(THRIFT_LIBRARY thrift) - endif() - set (THRIFT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src") - set(USE_THRIFT 1) -else() - find_library(THRIFT_LIBRARY thrift) - if (NOT THRIFT_LIBRARY) - set(USE_THRIFT 0) - else() - set(USE_THRIFT 1) - endif() -endif () - -message (STATUS "Using thrift=${USE_THRIFT}: ${THRIFT_INCLUDE_DIR} : ${THRIFT_LIBRARY}") diff --git a/cmake/find/xz.cmake b/cmake/find/xz.cmake deleted file mode 100644 index f25937fe87d1..000000000000 --- a/cmake/find/xz.cmake +++ /dev/null @@ -1,27 +0,0 @@ -option (USE_INTERNAL_XZ_LIBRARY "Set to OFF to use system xz (lzma) library instead of bundled" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/xz/src/liblzma/api/lzma.h") - if(USE_INTERNAL_XZ_LIBRARY) - message(WARNING "submodule contrib/xz is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal xz (lzma) library") - set(USE_INTERNAL_XZ_LIBRARY 0) - endif() - set(MISSING_INTERNAL_XZ_LIBRARY 1) -endif() - -if (NOT USE_INTERNAL_XZ_LIBRARY) - find_library (XZ_LIBRARY lzma) - find_path (XZ_INCLUDE_DIR NAMES lzma.h PATHS ${XZ_INCLUDE_PATHS}) - if (NOT XZ_LIBRARY OR NOT XZ_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system xz (lzma) library") - endif () -endif () - -if (XZ_LIBRARY AND XZ_INCLUDE_DIR) -elseif (NOT MISSING_INTERNAL_XZ_LIBRARY) - set (USE_INTERNAL_XZ_LIBRARY 1) - set (XZ_LIBRARY liblzma) - set (XZ_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/xz/src/liblzma/api) -endif () - -message (STATUS "Using xz (lzma): ${XZ_INCLUDE_DIR} : ${XZ_LIBRARY}") diff --git a/cmake/find/yaml-cpp.cmake b/cmake/find/yaml-cpp.cmake deleted file mode 100644 index 2aba6808e31b..000000000000 --- a/cmake/find/yaml-cpp.cmake +++ /dev/null @@ -1,9 +0,0 @@ -option(USE_YAML_CPP "Enable yaml-cpp" ${ENABLE_LIBRARIES}) - -if (NOT USE_YAML_CPP) - return() -endif() - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/yaml-cpp/README.md") - message (ERROR "submodule contrib/yaml-cpp is missing. to fix try run: \n git submodule update --init") -endif() diff --git a/cmake/find/zlib.cmake b/cmake/find/zlib.cmake deleted file mode 100644 index c2ee8217afad..000000000000 --- a/cmake/find/zlib.cmake +++ /dev/null @@ -1,42 +0,0 @@ -option (USE_INTERNAL_ZLIB_LIBRARY "Set to FALSE to use system zlib library instead of bundled" ON) - -if (NOT MSVC) - set (INTERNAL_ZLIB_NAME "zlib-ng" CACHE INTERNAL "") -else () - set (INTERNAL_ZLIB_NAME "zlib" CACHE INTERNAL "") - if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}") - message (WARNING "Will use standard zlib, please clone manually:\n git clone https://github.com/madler/zlib.git ${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal zlib library") - endif () -endif () - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}/zlib.h") - if(USE_INTERNAL_ZLIB_LIBRARY) - message(WARNING "submodule contrib/${INTERNAL_ZLIB_NAME} is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal zlib library") - endif() - set(USE_INTERNAL_ZLIB_LIBRARY 0) - set(MISSING_INTERNAL_ZLIB_LIBRARY 1) -endif() - -if (NOT USE_INTERNAL_ZLIB_LIBRARY) - find_package (ZLIB) - if (NOT ZLIB_FOUND) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system zlib library") - else() - set (ZLIB_NAME "libz") - endif() -endif () - -if (NOT ZLIB_FOUND AND NOT MISSING_INTERNAL_ZLIB_LIBRARY) - set (USE_INTERNAL_ZLIB_LIBRARY 1) - set (ZLIB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}" "${ClickHouse_BINARY_DIR}/contrib/${INTERNAL_ZLIB_NAME}" CACHE INTERNAL "") # generated zconf.h - set (ZLIB_INCLUDE_DIRS ${ZLIB_INCLUDE_DIR}) # for poco - set (ZLIB_INCLUDE_DIRECTORIES ${ZLIB_INCLUDE_DIR}) # for protobuf - set (ZLIB_FOUND 1) # for poco - set (ZLIB_LIBRARIES zlib CACHE INTERNAL "") - set (ZLIB_LIBRARY_NAME ${ZLIB_LIBRARIES}) # for cassandra - set (ZLIB_NAME "${INTERNAL_ZLIB_NAME}") -endif () - -message (STATUS "Using ${ZLIB_NAME}: ${ZLIB_INCLUDE_DIR} : ${ZLIB_LIBRARIES}") diff --git a/cmake/find/zstd.cmake b/cmake/find/zstd.cmake deleted file mode 100644 index 2b8dd53fbc31..000000000000 --- a/cmake/find/zstd.cmake +++ /dev/null @@ -1,27 +0,0 @@ -option (USE_INTERNAL_ZSTD_LIBRARY "Set to FALSE to use system zstd library instead of bundled" ON) - -if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib/zstd.h") - if(USE_INTERNAL_ZSTD_LIBRARY) - message(WARNING "submodule contrib/zstd is missing. to fix try run: \n git submodule update --init") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal zstd library") - set(USE_INTERNAL_ZSTD_LIBRARY 0) - endif() - set(MISSING_INTERNAL_ZSTD_LIBRARY 1) -endif() - -if (NOT USE_INTERNAL_ZSTD_LIBRARY) - find_library (ZSTD_LIBRARY zstd) - find_path (ZSTD_INCLUDE_DIR NAMES zstd.h PATHS ${ZSTD_INCLUDE_PATHS}) - if (NOT ZSTD_LIBRARY OR NOT ZSTD_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system zstd library") - endif () -endif () - -if (ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR) -elseif (NOT MISSING_INTERNAL_ZSTD_LIBRARY) - set (USE_INTERNAL_ZSTD_LIBRARY 1) - set (ZSTD_LIBRARY zstd) - set (ZSTD_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/zstd/lib) -endif () - -message (STATUS "Using zstd: ${ZSTD_INCLUDE_DIR} : ${ZSTD_LIBRARY}") diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index a5847c95387c..65d5f0511d93 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -22,17 +22,13 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -include (cmake/find/unwind.cmake) -include (cmake/find/cxx.cmake) - -add_library(global-group INTERFACE) +include (cmake/unwind.cmake) +include (cmake/cxx.cmake) target_link_libraries(global-group INTERFACE $ ) -link_libraries(global-group) - # FIXME: remove when all contribs will get custom cmake lists install( TARGETS global-group global-libs diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 426ae482ea3a..21bead7020c4 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -42,18 +42,15 @@ if (NOT OS_ANDROID) add_subdirectory(base/harmful) endif () -include (cmake/find/unwind.cmake) -include (cmake/find/cxx.cmake) +include (cmake/unwind.cmake) +include (cmake/cxx.cmake) -add_library(global-group INTERFACE) target_link_libraries(global-group INTERFACE -Wl,--start-group $ -Wl,--end-group ) -link_libraries(global-group) - # FIXME: remove when all contribs will get custom cmake lists install( TARGETS global-group global-libs diff --git a/cmake/print_include_directories.cmake b/cmake/print_include_directories.cmake deleted file mode 100644 index cc2098cb3975..000000000000 --- a/cmake/print_include_directories.cmake +++ /dev/null @@ -1,29 +0,0 @@ - -# TODO? Maybe recursive collect on all deps - -get_property (dirs1 TARGET dbms PROPERTY INCLUDE_DIRECTORIES) -list(APPEND dirs ${dirs1}) - -get_property (dirs1 TARGET clickhouse_common_io PROPERTY INCLUDE_DIRECTORIES) -list(APPEND dirs ${dirs1}) - -get_property (dirs1 TARGET common PROPERTY INCLUDE_DIRECTORIES) -list(APPEND dirs ${dirs1}) - -get_property (dirs1 TARGET cityhash PROPERTY INCLUDE_DIRECTORIES) -list(APPEND dirs ${dirs1}) - -get_property (dirs1 TARGET roaring PROPERTY INCLUDE_DIRECTORIES) -list(APPEND dirs ${dirs1}) - -if (TARGET double-conversion) - get_property (dirs1 TARGET double-conversion PROPERTY INCLUDE_DIRECTORIES) - list(APPEND dirs ${dirs1}) -endif () - -list(REMOVE_DUPLICATES dirs) -file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "") -foreach (dir ${dirs}) - string (REPLACE "${ClickHouse_SOURCE_DIR}" "." dir "${dir}") - file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "-I ${dir} ") -endforeach () diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index f052948e7318..736105450092 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -23,7 +23,7 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}") endif() - if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan") endif () if (COMPILER_GCC) @@ -48,7 +48,7 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory") endif() - if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan") endif () @@ -69,7 +69,7 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread") endif() - if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan") endif () if (COMPILER_GCC) @@ -101,7 +101,7 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") endif() - if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan") endif () if (COMPILER_GCC) diff --git a/cmake/target.cmake b/cmake/target.cmake index 4b109d165e70..ff216f866187 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -27,10 +27,10 @@ if (CMAKE_CROSSCOMPILING) if (ARCH_AARCH64) # FIXME: broken dependencies set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (USE_SENTRY OFF CACHE INTERNAL "") + set (ENABLE_SENTRY OFF CACHE INTERNAL "") elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (USE_SENTRY OFF CACHE INTERNAL "") + set (ENABLE_SENTRY OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies @@ -43,7 +43,7 @@ if (CMAKE_CROSSCOMPILING) endif () if (USE_MUSL) - set (USE_SENTRY OFF CACHE INTERNAL "") + set (ENABLE_SENTRY OFF CACHE INTERNAL "") set (ENABLE_ODBC OFF CACHE INTERNAL "") set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_HDFS OFF CACHE INTERNAL "") diff --git a/cmake/find/unwind.cmake b/cmake/unwind.cmake similarity index 100% rename from cmake/find/unwind.cmake rename to cmake/unwind.cmake diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index ecc31529dc4e..2093d3dcc870 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -55,11 +55,6 @@ if (COMPILER_CLANG) no_warning(weak-template-vtables) no_warning(weak-vtables) - # XXX: libstdc++ has some of these for 3way compare - if (NOT USE_LIBCXX) - no_warning(zero-as-null-pointer-constant) - endif() - # TODO Enable conversion, sign-conversion, double-promotion warnings. else () add_warning(comma) @@ -98,10 +93,7 @@ if (COMPILER_CLANG) add_warning(tautological-bitwise-compare) # XXX: libstdc++ has some of these for 3way compare - if (USE_LIBCXX) - add_warning(zero-as-null-pointer-constant) - endif() - + add_warning(zero-as-null-pointer-constant) endif () elseif (COMPILER_GCC) # Add compiler options only to c++ compiler @@ -183,11 +175,8 @@ elseif (COMPILER_GCC) add_cxx_compile_options(-Wundef) # Warn if vector operation is not implemented via SIMD capabilities of the architecture add_cxx_compile_options(-Wvector-operation-performance) - # XXX: libstdc++ has some of these for 3way compare - if (USE_LIBCXX) - # Warn when a literal 0 is used as null pointer constant. - add_cxx_compile_options(-Wzero-as-null-pointer-constant) - endif() + # Warn when a literal 0 is used as null pointer constant. + add_cxx_compile_options(-Wzero-as-null-pointer-constant) if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) # XXX: gcc10 stuck with this option while compiling GatherUtils code diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 17954159b3a2..9cf307c473ee 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,4 +1,4 @@ -# Third-party libraries may have substandard code. +#"${folder}/CMakeLists.txt" Third-party libraries may have substandard code. set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") @@ -21,290 +21,132 @@ endif() set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) -add_subdirectory (abseil-cpp-cmake) -add_subdirectory (magic-enum-cmake) -add_subdirectory (boost-cmake) -add_subdirectory (cctz-cmake) -add_subdirectory (consistent-hashing) -add_subdirectory (dragonbox-cmake) -add_subdirectory (hyperscan-cmake) -add_subdirectory (jemalloc-cmake) -add_subdirectory (libcpuid-cmake) -add_subdirectory (libdivide) -add_subdirectory (libmetrohash) -add_subdirectory (lz4-cmake) -add_subdirectory (murmurhash) -add_subdirectory (replxx-cmake) -add_subdirectory (unixodbc-cmake) -add_subdirectory (nanodbc-cmake) - -if (USE_INTERNAL_CAPNP_LIBRARY AND NOT MISSING_INTERNAL_CAPNP_LIBRARY) - add_subdirectory(capnproto-cmake) -endif () - -if (ENABLE_FUZZING) - add_subdirectory (libprotobuf-mutator-cmake) -endif() - -if (USE_YAML_CPP) - add_subdirectory (yaml-cpp-cmake) -endif() - -if (USE_INTERNAL_XZ_LIBRARY) - add_subdirectory (xz-cmake) -endif() - -add_subdirectory (poco-cmake) -add_subdirectory (croaring-cmake) - -# TODO: refactor the contrib libraries below this comment. - -if (USE_INTERNAL_ZSTD_LIBRARY) - add_subdirectory (zstd-cmake) -endif () - -if (USE_INTERNAL_RE2_LIBRARY) - add_subdirectory (re2-cmake) -endif () - -if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY) - add_subdirectory (double-conversion-cmake) -endif () - -if (USE_INTERNAL_CITYHASH_LIBRARY) - add_subdirectory (cityhash102) -endif () - -if (USE_INTERNAL_FARMHASH_LIBRARY) - add_subdirectory (libfarmhash) -endif () - -if (USE_INTERNAL_ZLIB_LIBRARY) - if (INTERNAL_ZLIB_NAME STREQUAL "zlib-ng") - add_subdirectory (zlib-ng-cmake) - else () - add_subdirectory (${INTERNAL_ZLIB_NAME}) - endif () -endif () - -if (USE_INTERNAL_H3_LIBRARY) - add_subdirectory(h3-cmake) -endif () - -if (USE_INTERNAL_SSL_LIBRARY) - add_subdirectory (boringssl-cmake) - - add_library(OpenSSL::Crypto ALIAS crypto) - add_library(OpenSSL::SSL ALIAS ssl) -endif () - -if (USE_INTERNAL_LDAP_LIBRARY) - add_subdirectory (openldap-cmake) -endif () - -if (USE_INTERNAL_MYSQL_LIBRARY) - add_subdirectory (mariadb-connector-c-cmake) -endif () - -if (USE_INTERNAL_RDKAFKA_LIBRARY) - add_subdirectory (librdkafka-cmake) - target_include_directories(rdkafka BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) - if(OPENSSL_INCLUDE_DIR) - target_include_directories(rdkafka BEFORE PRIVATE ${OPENSSL_INCLUDE_DIR}) +# add_contrib cmake_folder[ base_folder1[, ...base_folderN]] +function(add_contrib cmake_folder) + if (ARGN) + set(base_folders ${ARGN}) + else() + set(base_folders ${cmake_folder}) endif() -endif () -if (USE_RDKAFKA) - add_subdirectory (cppkafka-cmake) -endif() - -if (ENABLE_ICU AND USE_INTERNAL_ICU_LIBRARY) - add_subdirectory (icu-cmake) -endif () - -if(USE_INTERNAL_SNAPPY_LIBRARY) - set(SNAPPY_BUILD_TESTS 0 CACHE INTERNAL "") - - add_subdirectory(snappy-cmake) - - set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy") -endif() - -if (USE_INTERNAL_PARQUET_LIBRARY) - # We dont use arrow's cmakefiles because they uses too many depends and download some libs in compile time - # But you can update auto-generated parquet files manually: - # cd {BUILD_DIR}/contrib/arrow/cpp/src/parquet && mkdir -p build && cd build - # cmake .. -DARROW_COMPUTE=ON -DARROW_PARQUET=ON -DARROW_SIMD_LEVEL=NONE -DARROW_VERBOSE_THIRDPARTY_BUILD=ON - # -DARROW_BUILD_SHARED=1 -DARROW_BUILD_UTILITIES=OFF -DARROW_BUILD_INTEGRATION=OFF - # -DBoost_FOUND=1 -DARROW_TEST_LINKAGE="shared" - # make -j8 - # copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> {BUILD_DIR}/contrib/arrow-cmake/cpp/src/parquet/ - - # Also useful parquet reader: - # cd {BUILD_DIR}/contrib/arrow/cpp && mkdir -p build && cd build - # cmake .. -DARROW_PARQUET=1 -DARROW_WITH_SNAPPY=1 -DPARQUET_BUILD_EXECUTABLES=1 - # make -j8 - # {BUILD_DIR}/contrib/arrow/cpp/build/release/parquet-reader some_file.parquet - - add_subdirectory(arrow-cmake) - - # The library is large - avoid bloat. - target_compile_options (${ARROW_LIBRARY} PRIVATE -g0) - target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0) -endif() - -if (USE_INTERNAL_AVRO_LIBRARY) - add_subdirectory(avro-cmake) -endif() - -if(USE_INTERNAL_GTEST_LIBRARY) - add_subdirectory(googletest-cmake) -elseif(GTEST_SRC_DIR) - add_subdirectory(${GTEST_SRC_DIR}/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest) - target_compile_definitions(gtest INTERFACE GTEST_HAS_POSIX_RE=0) -endif() - -function(add_llvm) - # ld: unknown option: --color-diagnostics - if (APPLE) - set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "") - endif () - - # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind - set (CMAKE_INSTALL_RPATH "ON") - set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") - set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") - set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "") - set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "") - set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE STRING "") - - # Need to use C++17 since the compilation is not possible with C++20 currently, due to ambiguous operator != etc. - # LLVM project will set its default value for the -std=... but our global setting from CMake will override it. - set (CMAKE_CXX_STANDARD 17) - - add_subdirectory (llvm/llvm) + foreach (base_folder ${base_folders}) + # some typos in the code + if (NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/${base_folder}") + message(FATAL_ERROR "No such base folder '${base_folder}' (for '${cmake_folder}' cmake folder). Typo in the base folder name?") + endif() + + file(GLOB contrib_files "${base_folder}/*") + if (NOT contrib_files) + message(STATUS "submodule ${base_folder} is missing or empty. to fix try run:") + message(STATUS " git submodule update --init") + return() + endif() + endforeach() + + message(STATUS "Adding contrib module ${base_folders} (configuring with ${cmake_folder})") + add_subdirectory (${cmake_folder}) endfunction() -if (USE_EMBEDDED_COMPILER) - add_llvm() -endif () - -if (USE_INTERNAL_LIBGSASL_LIBRARY) - add_subdirectory(libgsasl-cmake) -endif() - -if (USE_INTERNAL_LIBXML2_LIBRARY) - add_subdirectory(libxml2-cmake) -endif () - -if (USE_INTERNAL_BROTLI_LIBRARY) - add_subdirectory(brotli-cmake) - target_compile_definitions(brotli PRIVATE BROTLI_BUILD_PORTABLE=1) -endif () -if (USE_INTERNAL_PROTOBUF_LIBRARY) - add_subdirectory(protobuf-cmake) -endif () +add_contrib (miniselect-cmake miniselect) +add_contrib (pdqsort-cmake pdqsort) +add_contrib (sparsehash-c11-cmake sparsehash-c11) +add_contrib (abseil-cpp-cmake abseil-cpp) +add_contrib (magic-enum-cmake magic_enum) +add_contrib (boost-cmake boost) +add_contrib (cctz-cmake cctz) +add_contrib (consistent-hashing) +add_contrib (dragonbox-cmake dragonbox) +add_contrib (hyperscan-cmake hyperscan) +add_contrib (jemalloc-cmake jemalloc) +add_contrib (libcpuid-cmake libcpuid) +add_contrib (libdivide) +add_contrib (libmetrohash) +add_contrib (lz4-cmake lz4) +add_contrib (murmurhash) +add_contrib (replxx-cmake replxx) +add_contrib (unixodbc-cmake unixodbc) +add_contrib (nanodbc-cmake nanodbc) +add_contrib (capnproto-cmake capnproto) +add_contrib (yaml-cpp-cmake yaml-cpp) +add_contrib (re2-cmake re2) +add_contrib (xz-cmake xz) +add_contrib (brotli-cmake brotli) +add_contrib (double-conversion-cmake double-conversion) +add_contrib (boringssl-cmake boringssl) +add_contrib (poco-cmake poco) +add_contrib (croaring-cmake croaring) +add_contrib (zstd-cmake zstd) +add_contrib (zlib-ng-cmake zlib-ng) +add_contrib (bzip2-cmake bzip2) +add_contrib (minizip-ng-cmake minizip-ng) +add_contrib (snappy-cmake snappy) +add_contrib (rocksdb-cmake rocksdb) +add_contrib (thrift-cmake thrift) +# parquet/arrow/orc +add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion +add_contrib (avro-cmake avro) # requires: snappy +add_contrib (protobuf-cmake protobuf) +add_contrib (openldap-cmake openldap) +add_contrib (grpc-cmake grpc) +add_contrib (msgpack-c-cmake msgpack-c) -if (USE_INTERNAL_THRIFT_LIBRARY) - add_subdirectory(thrift-cmake) -endif () - -if (USE_INTERNAL_HDFS3_LIBRARY) - add_subdirectory(libhdfs3-cmake) -endif () - -if (USE_INTERNAL_GRPC_LIBRARY) - add_subdirectory(grpc-cmake) -endif () - -if (USE_INTERNAL_AWS_S3_LIBRARY) - add_subdirectory(aws-s3-cmake) - - # The library is large - avoid bloat. - target_compile_options (aws_s3 PRIVATE -g0) - target_compile_options (aws_s3_checksums PRIVATE -g0) - -endif () - -if (USE_BASE64) - add_subdirectory (base64-cmake) -endif() - -if (USE_SIMDJSON) - add_subdirectory (simdjson-cmake) -endif() - -if (USE_FASTOPS) - add_subdirectory (fastops-cmake) -endif() - -if (USE_AMQPCPP OR USE_CASSANDRA) - add_subdirectory (libuv-cmake) -endif() -if (USE_AMQPCPP) - add_subdirectory (amqpcpp-cmake) -endif() -if (USE_CASSANDRA) - add_subdirectory (cassandra-cmake) -endif() - -# Should go before: -# - sentry-native -add_subdirectory (curl-cmake) - -if (USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY) - add_subdirectory(azure-cmake) -endif() - -if (USE_SENTRY) - add_subdirectory (sentry-native-cmake) -endif() - -add_subdirectory (fmtlib-cmake) - -if (USE_KRB5) - add_subdirectory (krb5-cmake) - if (USE_CYRUS_SASL) - add_subdirectory (cyrus-sasl-cmake) - endif() -endif() - -if (USE_INTERNAL_ROCKSDB_LIBRARY) - add_subdirectory(rocksdb-cmake) -endif() - -if (USE_LIBPQXX) - add_subdirectory (libpq-cmake) - add_subdirectory (libpqxx-cmake) -endif() - -if (USE_NURAFT) - add_subdirectory(nuraft-cmake) -endif() - -add_subdirectory(fast_float-cmake) - -if (USE_NLP) - add_subdirectory(libstemmer-c-cmake) - add_subdirectory(wordnet-blast-cmake) - add_subdirectory(lemmagen-c-cmake) -endif() - -if (USE_BZIP2) - add_subdirectory(bzip2-cmake) -endif() - -if (USE_SQLITE) - add_subdirectory(sqlite-cmake) -endif() - -if (USE_S2_GEOMETRY) - add_subdirectory(s2geometry-cmake) -endif() - -if (USE_HIVE) - add_subdirectory (hive-metastore-cmake) -endif() +if (ENABLE_FUZZING) + add_contrib (libprotobuf-mutator-cmake libprotobuf-mutator) +endif() + +add_contrib (cityhash102) +add_contrib (libfarmhash) +add_contrib (icu-cmake icu) +add_contrib (h3-cmake h3) +add_contrib (mariadb-connector-c-cmake mariadb-connector-c) + +if (ENABLE_TESTS) + add_contrib (googletest-cmake googletest) +endif() + +add_contrib (llvm-cmake llvm) +add_contrib (libxml2-cmake libxml2) +add_contrib (aws-s3-cmake + aws + aws-c-common + aws-c-event-stream + aws-checksums +) +add_contrib (base64-cmake base64) +add_contrib (simdjson-cmake simdjson) +add_contrib (rapidjson-cmake rapidjson) +add_contrib (fastops-cmake fastops) +add_contrib (libuv-cmake libuv) +add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv +add_contrib (cassandra-cmake cassandra) # requires: libuv +add_contrib (curl-cmake curl) +add_contrib (azure-cmake azure) +add_contrib (sentry-native-cmake sentry-native) # requires: curl +add_contrib (fmtlib-cmake fmtlib) +add_contrib (krb5-cmake krb5) +add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5 +add_contrib (libgsasl-cmake libgsasl) # requires krb5 +add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl +add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5 +add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3 +add_contrib (cppkafka-cmake cppkafka) +add_contrib (libpqxx-cmake libpqxx) +add_contrib (libpq-cmake libpq) +add_contrib (nuraft-cmake NuRaft) +add_contrib (fast_float-cmake fast_float) +add_contrib (datasketches-cpp-cmake datasketches-cpp) + +option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) +if (ENABLE_NLP) + add_contrib (libstemmer-c-cmake libstemmer_c) + add_contrib (wordnet-blast-cmake wordnet-blast) + add_contrib (lemmagen-c-cmake lemmagen-c) + add_contrib (nlp-data-cmake nlp-data) + add_contrib (cld2-cmake cld2) +endif() + +add_contrib (sqlite-cmake sqlite-amalgamation) +add_contrib (s2geometry-cmake s2geometry) # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear diff --git a/contrib/NuRaft b/contrib/NuRaft index c2043aa250e5..1707a7572aa6 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit c2043aa250e53ad5cf75e596e319d587af4dcb3c +Subproject commit 1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index 65e4c24ff5aa..4fb02327d17c 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -6,15 +6,17 @@ set(BUILD_TESTING OFF) set(ABSL_PROPAGATE_CXX_STD ON) add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp") -add_library(abseil_swiss_tables INTERFACE) +add_library(_abseil_swiss_tables INTERFACE) -target_link_libraries(abseil_swiss_tables INTERFACE +target_link_libraries(_abseil_swiss_tables INTERFACE absl::flat_hash_map absl::flat_hash_set ) get_target_property(FLAT_HASH_MAP_INCLUDE_DIR absl::flat_hash_map INTERFACE_INCLUDE_DIRECTORIES) -target_include_directories (abseil_swiss_tables SYSTEM BEFORE INTERFACE ${FLAT_HASH_MAP_INCLUDE_DIR}) +target_include_directories (_abseil_swiss_tables SYSTEM BEFORE INTERFACE ${FLAT_HASH_MAP_INCLUDE_DIR}) get_target_property(FLAT_HASH_SET_INCLUDE_DIR absl::flat_hash_set INTERFACE_INCLUDE_DIRECTORIES) -target_include_directories (abseil_swiss_tables SYSTEM BEFORE INTERFACE ${FLAT_HASH_SET_INCLUDE_DIR}) +target_include_directories (_abseil_swiss_tables SYSTEM BEFORE INTERFACE ${FLAT_HASH_SET_INCLUDE_DIR}) + +add_library(ch_contrib::abseil_swiss_tables ALIAS _abseil_swiss_tables) diff --git a/contrib/amqpcpp-cmake/CMakeLists.txt b/contrib/amqpcpp-cmake/CMakeLists.txt index faef7bd4a1c6..974d097e06fa 100644 --- a/contrib/amqpcpp-cmake/CMakeLists.txt +++ b/contrib/amqpcpp-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option(ENABLE_AMQPCPP "Enable AMQP-CPP" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_AMQPCPP) + message(STATUS "Not using AMQP-CPP") + return() +endif() + set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP") set (SRCS @@ -23,9 +30,9 @@ set (SRCS "${LIBRARY_DIR}/src/watchable.cpp" ) -add_library(amqp-cpp ${SRCS}) +add_library(_amqp-cpp ${SRCS}) -target_compile_options (amqp-cpp +target_compile_options (_amqp-cpp PRIVATE -Wno-old-style-cast -Wno-inconsistent-missing-destructor-override @@ -40,5 +47,6 @@ target_compile_options (amqp-cpp -w ) -target_include_directories (amqp-cpp SYSTEM PUBLIC "${LIBRARY_DIR}/include") -target_link_libraries(amqp-cpp PUBLIC ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) +target_include_directories (_amqp-cpp SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include" "${LIBRARY_DIR}") +target_link_libraries (_amqp-cpp PUBLIC OpenSSL::Crypto OpenSSL::SSL ch_contrib::uv) +add_library (ch_contrib::amqp_cpp ALIAS _amqp-cpp) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index a28a83a87c53..2e4059efc170 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -1,3 +1,34 @@ +# We dont use arrow's cmakefiles because they uses too many depends and download some libs in compile time +# But you can update auto-generated parquet files manually: +# cd {BUILD_DIR}/contrib/arrow/cpp/src/parquet && mkdir -p build && cd build +# cmake .. -DARROW_COMPUTE=ON -DARROW_PARQUET=ON -DARROW_SIMD_LEVEL=NONE -DARROW_VERBOSE_THIRDPARTY_BUILD=ON +# -DARROW_BUILD_SHARED=1 -DARROW_BUILD_UTILITIES=OFF -DARROW_BUILD_INTEGRATION=OFF +# -DBoost_FOUND=1 -DARROW_TEST_LINKAGE="shared" +# make -j8 +# copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> {BUILD_DIR}/contrib/arrow-cmake/cpp/src/parquet/ + +# Also useful parquet reader: +# cd {BUILD_DIR}/contrib/arrow/cpp && mkdir -p build && cd build +# cmake .. -DARROW_PARQUET=1 -DARROW_WITH_SNAPPY=1 -DPARQUET_BUILD_EXECUTABLES=1 +# make -j8 +# {BUILD_DIR}/contrib/arrow/cpp/build/release/parquet-reader some_file.parquet + +set (ENABLE_PARQUET_DEFAULT ${ENABLE_LIBRARIES}) +if (OS_FREEBSD) + set (ENABLE_PARQUET_DEFAULT OFF) +endif() +option (ENABLE_PARQUET "Enable parquet" ${ENABLE_PARQUET_DEFAULT}) + +if (NOT ENABLE_PARQUET) + message(STATUS "Building without Parquet support") + return() +endif() + +# Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory +if (OS_FREEBSD) + message (FATAL_ERROR "Using internal parquet library on FreeBSD is not supported") +endif() + set (CMAKE_CXX_STANDARD 17) set(ARROW_VERSION "6.0.1") @@ -27,11 +58,10 @@ set(ORC_SOURCE_SRC_DIR "${ORC_SOURCE_DIR}/src") set(ORC_BUILD_SRC_DIR "${CMAKE_CURRENT_BINARY_DIR}/../orc/c++/src") set(ORC_BUILD_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/../orc/c++/include") -set(GOOGLE_PROTOBUF_DIR "${Protobuf_INCLUDE_DIR}/") set(ORC_ADDITION_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(ARROW_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src") -set(PROTOBUF_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) +set(PROTOBUF_EXECUTABLE $) set(PROTO_DIR "${ORC_SOURCE_DIR}/../proto") @@ -48,7 +78,7 @@ set(FLATBUFFERS_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/flatbuffers") set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_SRC_DIR}/include") # set flatbuffers CMake options -if (MAKE_STATIC_LIBRARIES) +if (USE_STATIC_LIBRARIES) set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library") set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library") else () @@ -59,9 +89,16 @@ set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests") add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}") -message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}") +add_library(_flatbuffers INTERFACE) +if(USE_STATIC_LIBRARIES) + target_link_libraries(_flatbuffers INTERFACE flatbuffers) +else() + target_link_libraries(_flatbuffers INTERFACE flatbuffers_shared) +endif() +target_include_directories(_flatbuffers INTERFACE ${FLATBUFFERS_INCLUDE_DIR}) # === hdfs +# NOTE: cannot use ch_contrib::hdfs since it's INCLUDE_DIRECTORIES does not includes trailing "hdfs/" set(HDFS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/") # arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features. @@ -87,8 +124,6 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A # ARROW_ORC + adapters/orc/CMakefiles set(ORC_SRCS - "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc" - "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc" "${ORC_SOURCE_SRC_DIR}/Exceptions.cc" "${ORC_SOURCE_SRC_DIR}/OrcFile.cc" "${ORC_SOURCE_SRC_DIR}/Reader.cc" @@ -115,6 +150,22 @@ set(ORC_SRCS "${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc" ) +add_library(_orc ${ORC_SRCS}) +target_link_libraries(_orc PRIVATE + ch_contrib::protobuf + ch_contrib::lz4 + ch_contrib::snappy + ch_contrib::zlib + ch_contrib::zstd) +target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR}) +target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_BUILD_INCLUDE_DIR}) +target_include_directories(_orc SYSTEM PRIVATE + ${ORC_SOURCE_SRC_DIR} + ${ORC_SOURCE_WRAP_DIR} + ${ORC_BUILD_SRC_DIR} + ${ORC_ADDITION_SOURCE_DIR} + ${ARROW_SRC_DIR}) + # === arrow @@ -300,33 +351,18 @@ set(ARROW_SRCS "${LIBRARY_DIR}/ipc/reader.cc" "${LIBRARY_DIR}/ipc/writer.cc" - ${ORC_SRCS} + "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc" + "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc" ) -if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY) - set(ARROW_WITH_SNAPPY 1) -endif () - -if (ZLIB_INCLUDE_DIR AND ZLIB_LIBRARIES) - set(ARROW_WITH_ZLIB 1) -endif () - -if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY) - set(ARROW_WITH_ZSTD 1) -endif () - add_definitions(-DARROW_WITH_LZ4) SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_lz4.cc" ${ARROW_SRCS}) -if (ARROW_WITH_SNAPPY) - add_definitions(-DARROW_WITH_SNAPPY) - SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_snappy.cc" ${ARROW_SRCS}) -endif () +add_definitions(-DARROW_WITH_SNAPPY) +SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_snappy.cc" ${ARROW_SRCS}) -if (ARROW_WITH_ZLIB) - add_definitions(-DARROW_WITH_ZLIB) - SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS}) -endif () +add_definitions(-DARROW_WITH_ZLIB) +SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS}) if (ARROW_WITH_ZSTD) add_definitions(-DARROW_WITH_ZSTD) @@ -334,41 +370,30 @@ if (ARROW_WITH_ZSTD) endif () -add_library(${ARROW_LIBRARY} ${ARROW_SRCS}) +add_library(_arrow ${ARROW_SRCS}) -# Arrow dependencies -add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY}) +target_link_libraries(_arrow PRIVATE + boost::filesystem -target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem) + _flatbuffers -if (USE_INTERNAL_PROTOBUF_LIBRARY) - add_dependencies(${ARROW_LIBRARY} protoc) -endif () + ch_contrib::double_conversion -target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ARROW_SRC_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/cpp/src") -target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY}) -target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4) -if (ARROW_WITH_SNAPPY) - target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY}) -endif () -if (ARROW_WITH_ZLIB) - target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZLIB_LIBRARIES}) -endif () -if (ARROW_WITH_ZSTD) - target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZSTD_LIBRARY}) - target_include_directories(${ARROW_LIBRARY} SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) -endif () + ch_contrib::lz4 + ch_contrib::snappy + ch_contrib::zlib + ch_contrib::zstd + ch_contrib::zstd +) +target_link_libraries(_arrow PUBLIC _orc) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${ORC_INCLUDE_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${ORC_SOURCE_SRC_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${ORC_SOURCE_WRAP_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${GOOGLE_PROTOBUF_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${ORC_BUILD_SRC_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${ORC_BUILD_INCLUDE_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${ORC_ADDITION_SOURCE_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${FLATBUFFERS_INCLUDE_DIR}) -target_include_directories(${ARROW_LIBRARY} SYSTEM PRIVATE ${HDFS_INCLUDE_DIR}) +add_dependencies(_arrow protoc) + +target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ARROW_SRC_DIR}) +target_include_directories(_arrow SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/cpp/src") + +target_include_directories(_arrow SYSTEM PRIVATE ${ARROW_SRC_DIR}) +target_include_directories(_arrow SYSTEM PRIVATE ${HDFS_INCLUDE_DIR}) # === parquet @@ -411,13 +436,23 @@ set(PARQUET_SRCS "${GEN_LIBRARY_DIR}/parquet_types.cpp" ) #list(TRANSFORM PARQUET_SRCS PREPEND "${LIBRARY_DIR}/") # cmake 3.12 -add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS}) -target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src" PRIVATE ${OPENSSL_INCLUDE_DIR}) -target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex ${OPENSSL_LIBRARIES}) +add_library(_parquet ${PARQUET_SRCS}) +add_library(ch_contrib::parquet ALIAS _parquet) +target_include_directories(_parquet SYSTEM BEFORE + PUBLIC + "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" + "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src") +target_link_libraries(_parquet + PUBLIC _arrow + PRIVATE + ch_contrib::thrift + boost::headers_only + boost::regex + OpenSSL::Crypto OpenSSL::SSL) if (SANITIZE STREQUAL "undefined") - target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined) - target_compile_options(${ARROW_LIBRARY} PRIVATE -fno-sanitize=undefined) + target_compile_options(_parquet PRIVATE -fno-sanitize=undefined) + target_compile_options(_arrow PRIVATE -fno-sanitize=undefined) endif () # === tools @@ -426,5 +461,9 @@ set(TOOLS_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/tools/parquet") set(PARQUET_TOOLS parquet_dump_schema parquet_reader parquet_scan) foreach (TOOL ${PARQUET_TOOLS}) add_executable(${TOOL} "${TOOLS_DIR}/${TOOL}.cc") - target_link_libraries(${TOOL} PRIVATE ${PARQUET_LIBRARY}) + target_link_libraries(${TOOL} PRIVATE _parquet) endforeach () + +# The library is large - avoid bloat. +target_compile_options (_arrow PRIVATE -g0) +target_compile_options (_parquet PRIVATE -g0) diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index b56afd1598c6..d91ce40dd54d 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -1,3 +1,11 @@ +# Needed when using Apache Avro serialization format +option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_AVRO) + message(STATUS "Not using avro") + return() +endif() + set(AVROCPP_ROOT_DIR "${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++") set(AVROCPP_INCLUDE_DIR "${AVROCPP_ROOT_DIR}/api") set(AVROCPP_SOURCE_DIR "${AVROCPP_ROOT_DIR}/impl") @@ -40,18 +48,17 @@ set (AVROCPP_SOURCE_FILES "${AVROCPP_SOURCE_DIR}/Validator.cc" ) -add_library (avrocpp ${AVROCPP_SOURCE_FILES}) -set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}) +add_library (_avrocpp ${AVROCPP_SOURCE_FILES}) +add_library (ch_contrib::avrocpp ALIAS _avrocpp) +set_target_properties (_avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}) -target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR}) +target_include_directories(_avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR}) -target_link_libraries (avrocpp PRIVATE boost::headers_only boost::iostreams) +target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams) -if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY) - target_compile_definitions (avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE) - target_include_directories (avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR}) - target_link_libraries (avrocpp PRIVATE ${SNAPPY_LIBRARY}) -endif () +target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE) +target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR}) +target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy) if (COMPILER_GCC) set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor) @@ -59,11 +66,12 @@ elseif (COMPILER_CLANG) set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor) endif () -target_compile_options(avrocpp PRIVATE ${SUPPRESS_WARNINGS}) +target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS}) # create a symlink to include headers with ADD_CUSTOM_TARGET(avro_symlink_headers ALL COMMAND ${CMAKE_COMMAND} -E make_directory "${AVROCPP_ROOT_DIR}/include" COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVROCPP_ROOT_DIR}/include/avro" ) -add_dependencies(avrocpp avro_symlink_headers) +add_dependencies(_avrocpp avro_symlink_headers) +target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVROCPP_ROOT_DIR}/include") diff --git a/contrib/aws-s3-cmake/CMakeLists.txt b/contrib/aws-s3-cmake/CMakeLists.txt index 50f9482ef54f..de6486e58fd9 100644 --- a/contrib/aws-s3-cmake/CMakeLists.txt +++ b/contrib/aws-s3-cmake/CMakeLists.txt @@ -1,3 +1,14 @@ +if(NOT OS_FREEBSD) + option(ENABLE_S3 "Enable S3" ${ENABLE_LIBRARIES}) +elseif(ENABLE_S3) + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use S3 on FreeBSD") +endif() + +if(NOT ENABLE_S3) + message(STATUS "Not using S3") + return() +endif() + SET(AWS_S3_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3") SET(AWS_CORE_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-core") SET(AWS_CHECKSUMS_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-checksums") @@ -80,24 +91,30 @@ set(S3_INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/" ) -add_library(aws_s3_checksums ${AWS_CHECKSUMS_SOURCES}) -target_include_directories(aws_s3_checksums SYSTEM PUBLIC "${AWS_CHECKSUMS_LIBRARY_DIR}/include/") +add_library(_aws_s3_checksums ${AWS_CHECKSUMS_SOURCES}) +target_include_directories(_aws_s3_checksums SYSTEM PUBLIC "${AWS_CHECKSUMS_LIBRARY_DIR}/include/") if(CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - target_compile_definitions(aws_s3_checksums PRIVATE "-DDEBUG_BUILD") + target_compile_definitions(_aws_s3_checksums PRIVATE "-DDEBUG_BUILD") endif() -set_target_properties(aws_s3_checksums PROPERTIES LINKER_LANGUAGE C) -set_property(TARGET aws_s3_checksums PROPERTY C_STANDARD 99) +set_target_properties(_aws_s3_checksums PROPERTIES LINKER_LANGUAGE C) +set_property(TARGET _aws_s3_checksums PROPERTY C_STANDARD 99) -add_library(aws_s3 ${S3_UNIFIED_SRC}) +add_library(_aws_s3 ${S3_UNIFIED_SRC}) -target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_MAJOR=1") -target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_MINOR=7") -target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_PATCH=231") -target_include_directories(aws_s3 SYSTEM PUBLIC ${S3_INCLUDES}) +target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_MAJOR=1") +target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_MINOR=7") +target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_PATCH=231") +target_include_directories(_aws_s3 SYSTEM BEFORE PUBLIC ${S3_INCLUDES}) -if (OPENSSL_FOUND) - target_compile_definitions(aws_s3 PUBLIC -DENABLE_OPENSSL_ENCRYPTION) - target_link_libraries(aws_s3 PRIVATE ${OPENSSL_LIBRARIES}) +if (TARGET OpenSSL::SSL) + target_compile_definitions(_aws_s3 PUBLIC -DENABLE_OPENSSL_ENCRYPTION) + target_link_libraries(_aws_s3 PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() -target_link_libraries(aws_s3 PRIVATE aws_s3_checksums) +target_link_libraries(_aws_s3 PRIVATE _aws_s3_checksums) + +# The library is large - avoid bloat. +target_compile_options (_aws_s3 PRIVATE -g0) +target_compile_options (_aws_s3_checksums PRIVATE -g0) + +add_library(ch_contrib::aws_s3 ALIAS _aws_s3) diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt index 527503b85a2c..031d8dc9a0bf 100644 --- a/contrib/azure-cmake/CMakeLists.txt +++ b/contrib/azure-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_AZURE_BLOB_STORAGE) + message(STATUS "Not using Azure blob storage") + return() +endif() + set(AZURE_DIR "${ClickHouse_SOURCE_DIR}/contrib/azure") set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk") @@ -43,10 +50,10 @@ set(AZURE_SDK_INCLUDES include("${AZURE_DIR}/cmake-modules/AzureTransportAdapters.cmake") -add_library(azure_sdk ${AZURE_SDK_UNIFIED_SRC}) +add_library(_azure_sdk ${AZURE_SDK_UNIFIED_SRC}) if (COMPILER_CLANG) - target_compile_options(azure_sdk PRIVATE + target_compile_options(_azure_sdk PRIVATE -Wno-deprecated-copy-dtor -Wno-extra-semi -Wno-suggest-destructor-override @@ -55,20 +62,22 @@ if (COMPILER_CLANG) ) if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13) - target_compile_options(azure_sdk PRIVATE -Wno-reserved-identifier) + target_compile_options(_azure_sdk PRIVATE -Wno-reserved-identifier) endif() endif() # Originally, on Windows azure-core is built with bcrypt and crypt32 by default -if (OPENSSL_FOUND) - target_link_libraries(azure_sdk PRIVATE ${OPENSSL_LIBRARIES}) +if (TARGET OpenSSL::SSL) + target_link_libraries(_azure_sdk PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() # Originally, on Windows azure-core is built with winhttp by default -if (CURL_FOUND) - target_link_libraries(azure_sdk PRIVATE ${CURL_LIBRARY}) +if (TARGET ch_contrib::curl) + target_link_libraries(_azure_sdk PRIVATE ch_contrib::curl) endif() -target_link_libraries(azure_sdk PRIVATE ${LIBXML2_LIBRARIES}) +target_link_libraries(_azure_sdk PRIVATE ch_contrib::libxml2) + +target_include_directories(_azure_sdk SYSTEM BEFORE PUBLIC ${AZURE_SDK_INCLUDES}) -target_include_directories(azure_sdk SYSTEM PUBLIC ${AZURE_SDK_INCLUDES}) +add_library(ch_contrib::azure_sdk ALIAS _azure_sdk) diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt index 4ebb4e687282..69040a9bedc7 100644 --- a/contrib/base64-cmake/CMakeLists.txt +++ b/contrib/base64-cmake/CMakeLists.txt @@ -1,36 +1,47 @@ +if(ARCH_AMD64 OR ARCH_ARM) + option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES}) +elseif(ENABLE_BASE64) + message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64") +endif() + +if (NOT ENABLE_BASE64) + message(STATUS "Not using base64") + return() +endif() + SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base64") -add_library(base64_scalar OBJECT "${LIBRARY_DIR}/turbob64c.c" "${LIBRARY_DIR}/turbob64d.c") -add_library(base64_ssse3 OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This file also contains code for ARM NEON +add_library(_base64_scalar OBJECT "${LIBRARY_DIR}/turbob64c.c" "${LIBRARY_DIR}/turbob64d.c") +add_library(_base64_ssse3 OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This file also contains code for ARM NEON if (ARCH_AMD64) - add_library(base64_avx OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This is not a mistake. One file is compiled twice. - add_library(base64_avx2 OBJECT "${LIBRARY_DIR}/turbob64avx2.c") + add_library(_base64_avx OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This is not a mistake. One file is compiled twice. + add_library(_base64_avx2 OBJECT "${LIBRARY_DIR}/turbob64avx2.c") endif () -target_compile_options(base64_scalar PRIVATE -falign-loops) +target_compile_options(_base64_scalar PRIVATE -falign-loops) if (ARCH_AMD64) - target_compile_options(base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops) - target_compile_options(base64_avx PRIVATE -falign-loops -mavx) - target_compile_options(base64_avx2 PRIVATE -falign-loops -mavx2) + target_compile_options(_base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops) + target_compile_options(_base64_avx PRIVATE -falign-loops -mavx) + target_compile_options(_base64_avx2 PRIVATE -falign-loops -mavx2) else () - target_compile_options(base64_ssse3 PRIVATE -falign-loops) + target_compile_options(_base64_ssse3 PRIVATE -falign-loops) endif () if (ARCH_AMD64) - add_library(base64 - $ - $ - $ - $) + add_library(_base64 + $ + $ + $ + $) else () - add_library(base64 - $ - $) + add_library(_base64 + $ + $) endif () -target_include_directories(base64 SYSTEM PUBLIC ${LIBRARY_DIR}) +target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR}) if (XCODE OR XCODE_VERSION) # https://gitlab.kitware.com/cmake/cmake/issues/17457 @@ -39,5 +50,7 @@ if (XCODE OR XCODE_VERSION) if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "") endif () - target_sources(base64 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") + target_sources(_base64 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") endif () + +add_library(ch_contrib::base64 ALIAS _base64) diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 4a21b8a0e2d5..0215c68e683f 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -1,243 +1,181 @@ -option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ON) - -if (NOT USE_INTERNAL_BOOST_LIBRARY) - set(BOOST_VERSION 1.78) - - find_package(Boost ${BOOST_VERSION} COMPONENTS - system - filesystem - iostreams - program_options - regex - context - coroutine - graph - ) - - if(Boost_INCLUDE_DIR AND Boost_FILESYSTEM_LIBRARY AND - Boost_PROGRAM_OPTIONS_LIBRARY AND Boost_REGEX_LIBRARY AND Boost_SYSTEM_LIBRARY AND Boost_CONTEXT_LIBRARY AND - Boost_COROUTINE_LIBRARY AND Boost_GRAPH_LIBRARY) - - set(EXTERNAL_BOOST_FOUND 1) - - add_library (_boost_headers_only INTERFACE) - add_library (boost::headers_only ALIAS _boost_headers_only) - target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${Boost_INCLUDE_DIR}) - - add_library (_boost_filesystem INTERFACE) - add_library (_boost_iostreams INTERFACE) - add_library (_boost_program_options INTERFACE) - add_library (_boost_regex INTERFACE) - add_library (_boost_system INTERFACE) - add_library (_boost_context INTERFACE) - add_library (_boost_coroutine INTERFACE) - add_library (_boost_graph INTERFACE) - - target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY}) - target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY}) - target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY}) - target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY}) - target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY}) - target_link_libraries (_boost_context INTERFACE ${Boost_CONTEXT_LIBRARY}) - target_link_libraries (_boost_coroutine INTERFACE ${Boost_COROUTINE_LIBRARY}) - target_link_libraries (_boost_graph INTERFACE ${Boost_GRAPH_LIBRARY}) - - add_library (boost::filesystem ALIAS _boost_filesystem) - add_library (boost::iostreams ALIAS _boost_iostreams) - add_library (boost::program_options ALIAS _boost_program_options) - add_library (boost::regex ALIAS _boost_regex) - add_library (boost::system ALIAS _boost_system) - add_library (boost::context ALIAS _boost_context) - add_library (boost::coroutine ALIAS _boost_coroutine) - add_library (boost::graph ALIAS _boost_graph) - else() - set(EXTERNAL_BOOST_FOUND 0) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system boost") +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/boost") + +# filesystem + +set (SRCS_FILESYSTEM + "${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/directory.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/exception.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/operations.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/path.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/portability.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp" +) + +add_library (_boost_filesystem ${SRCS_FILESYSTEM}) +add_library (boost::filesystem ALIAS _boost_filesystem) +target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) + +# headers-only + +add_library (_boost_headers_only INTERFACE) +add_library (boost::headers_only ALIAS _boost_headers_only) +target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) + +# asio + +target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1) + +# iostreams + +set (SRCS_IOSTREAMS + "${LIBRARY_DIR}/libs/iostreams/src/file_descriptor.cpp" + "${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp" + "${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp" + "${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp" +) + +add_library (_boost_iostreams ${SRCS_IOSTREAMS}) +add_library (boost::iostreams ALIAS _boost_iostreams) +target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR}) +target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zlib) + +# program_options + +set (SRCS_PROGRAM_OPTIONS + "${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp" + "${LIBRARY_DIR}/libs/program_options/src/config_file.cpp" + "${LIBRARY_DIR}/libs/program_options/src/convert.cpp" + "${LIBRARY_DIR}/libs/program_options/src/options_description.cpp" + "${LIBRARY_DIR}/libs/program_options/src/parsers.cpp" + "${LIBRARY_DIR}/libs/program_options/src/positional_options.cpp" + "${LIBRARY_DIR}/libs/program_options/src/split.cpp" + "${LIBRARY_DIR}/libs/program_options/src/utf8_codecvt_facet.cpp" + "${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp" + "${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp" + "${LIBRARY_DIR}/libs/program_options/src/winmain.cpp" +) + +add_library (_boost_program_options ${SRCS_PROGRAM_OPTIONS}) +add_library (boost::program_options ALIAS _boost_program_options) +target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) + +# regex + +set (SRCS_REGEX + "${LIBRARY_DIR}/libs/regex/src/posix_api.cpp" + "${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp" + "${LIBRARY_DIR}/libs/regex/src/regex.cpp" + "${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp" + "${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp" +) + +add_library (_boost_regex ${SRCS_REGEX}) +add_library (boost::regex ALIAS _boost_regex) +target_include_directories (_boost_regex PRIVATE ${LIBRARY_DIR}) + +# system + +set (SRCS_SYSTEM + "${LIBRARY_DIR}/libs/system/src/error_code.cpp" +) + +add_library (_boost_system ${SRCS_SYSTEM}) +add_library (boost::system ALIAS _boost_system) +target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR}) + +# context +enable_language(ASM) +SET(ASM_OPTIONS "-x assembler-with-cpp") + +set (SRCS_CONTEXT + "${LIBRARY_DIR}/libs/context/src/dummy.cpp" + "${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp" +) + +if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread")) + add_compile_definitions(BOOST_USE_UCONTEXT) + + if (SANITIZE STREQUAL "address") + add_compile_definitions(BOOST_USE_ASAN) + elseif (SANITIZE STREQUAL "thread") + add_compile_definitions(BOOST_USE_TSAN) endif() -endif() -if (NOT EXTERNAL_BOOST_FOUND) - set (USE_INTERNAL_BOOST_LIBRARY 1) - set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/boost") - - # filesystem - - set (SRCS_FILESYSTEM - "${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/directory.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/exception.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/operations.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/path.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/portability.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp" + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/fiber.cpp" + "${LIBRARY_DIR}/libs/context/src/continuation.cpp" ) - - add_library (_boost_filesystem ${SRCS_FILESYSTEM}) - add_library (boost::filesystem ALIAS _boost_filesystem) - target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) - - # headers-only - - add_library (_boost_headers_only INTERFACE) - add_library (boost::headers_only ALIAS _boost_headers_only) - target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) - - # asio - - target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1) - - # iostreams - - set (SRCS_IOSTREAMS - "${LIBRARY_DIR}/libs/iostreams/src/file_descriptor.cpp" - "${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp" - "${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp" - "${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp" +endif() +if (ARCH_ARM) + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S" ) - - add_library (_boost_iostreams ${SRCS_IOSTREAMS}) - add_library (boost::iostreams ALIAS _boost_iostreams) - target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR}) - target_link_libraries (_boost_iostreams PRIVATE ${ZLIB_LIBRARIES}) - - # program_options - - set (SRCS_PROGRAM_OPTIONS - "${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp" - "${LIBRARY_DIR}/libs/program_options/src/config_file.cpp" - "${LIBRARY_DIR}/libs/program_options/src/convert.cpp" - "${LIBRARY_DIR}/libs/program_options/src/options_description.cpp" - "${LIBRARY_DIR}/libs/program_options/src/parsers.cpp" - "${LIBRARY_DIR}/libs/program_options/src/positional_options.cpp" - "${LIBRARY_DIR}/libs/program_options/src/split.cpp" - "${LIBRARY_DIR}/libs/program_options/src/utf8_codecvt_facet.cpp" - "${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp" - "${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp" - "${LIBRARY_DIR}/libs/program_options/src/winmain.cpp" +elseif (ARCH_PPC64LE) + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_ppc64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_ppc64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_ppc64_sysv_elf_gas.S" ) - - add_library (_boost_program_options ${SRCS_PROGRAM_OPTIONS}) - add_library (boost::program_options ALIAS _boost_program_options) - target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) - - # regex - - set (SRCS_REGEX - "${LIBRARY_DIR}/libs/regex/src/posix_api.cpp" - "${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp" - "${LIBRARY_DIR}/libs/regex/src/regex.cpp" - "${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp" - "${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp" +elseif (ARCH_RISCV64) + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_riscv64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_riscv64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_riscv64_sysv_elf_gas.S" ) - - add_library (_boost_regex ${SRCS_REGEX}) - add_library (boost::regex ALIAS _boost_regex) - target_include_directories (_boost_regex PRIVATE ${LIBRARY_DIR}) - - # system - - set (SRCS_SYSTEM - "${LIBRARY_DIR}/libs/system/src/error_code.cpp" +elseif(OS_DARWIN) + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_macho_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_macho_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_macho_gas.S" ) - - add_library (_boost_system ${SRCS_SYSTEM}) - add_library (boost::system ALIAS _boost_system) - target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR}) - - # context - enable_language(ASM) - SET(ASM_OPTIONS "-x assembler-with-cpp") - - set (SRCS_CONTEXT - "${LIBRARY_DIR}/libs/context/src/dummy.cpp" - "${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp" - ) - - if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread")) - add_compile_definitions(BOOST_USE_UCONTEXT) - - if (SANITIZE STREQUAL "address") - add_compile_definitions(BOOST_USE_ASAN) - elseif (SANITIZE STREQUAL "thread") - add_compile_definitions(BOOST_USE_TSAN) - endif() - - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/fiber.cpp" - "${LIBRARY_DIR}/libs/context/src/continuation.cpp" - ) - endif() - if (ARCH_ARM) - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S" - ) - elseif (ARCH_PPC64LE) - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/asm/jump_ppc64_sysv_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/make_ppc64_sysv_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/ontop_ppc64_sysv_elf_gas.S" - ) - elseif (ARCH_RISCV64) - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/asm/jump_riscv64_sysv_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/make_riscv64_sysv_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/ontop_riscv64_sysv_elf_gas.S" - ) - elseif(OS_DARWIN) - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_macho_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_macho_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_macho_gas.S" - ) - else() - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_elf_gas.S" - ) - endif() - - add_library (_boost_context ${SRCS_CONTEXT}) - add_library (boost::context ALIAS _boost_context) - target_include_directories (_boost_context PRIVATE ${LIBRARY_DIR}) - - # coroutine - - set (SRCS_COROUTINE - "${LIBRARY_DIR}/libs/coroutine/detail/coroutine_context.cpp" - "${LIBRARY_DIR}/libs/coroutine/exceptions.cpp" - "${LIBRARY_DIR}/libs/coroutine/posix/stack_traits.cpp" - ) - add_library (_boost_coroutine ${SRCS_COROUTINE}) - add_library (boost::coroutine ALIAS _boost_coroutine) - target_include_directories (_boost_coroutine PRIVATE ${LIBRARY_DIR}) - target_link_libraries(_boost_coroutine PRIVATE _boost_context) - - # graph - - set (SRCS_GRAPH - "${LIBRARY_DIR}/libs/graph/src/graphml.cpp" - "${LIBRARY_DIR}/libs/graph/src/read_graphviz_new.cpp" +else() + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_elf_gas.S" ) +endif() - add_library (_boost_graph ${SRCS_GRAPH}) - add_library (boost::graph ALIAS _boost_graph) - target_include_directories (_boost_graph PRIVATE ${LIBRARY_DIR}) - target_link_libraries(_boost_graph PRIVATE _boost_regex) - - # circular buffer - add_library(_boost_circular_buffer INTERFACE) - add_library(boost::circular_buffer ALIAS _boost_circular_buffer) - target_include_directories(_boost_circular_buffer SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) - - # heap - add_library(_boost_heap INTERFACE) - add_library(boost::heap ALIAS _boost_heap) - target_include_directories(_boost_heap SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) - -endif () +add_library (_boost_context ${SRCS_CONTEXT}) +add_library (boost::context ALIAS _boost_context) +target_include_directories (_boost_context PRIVATE ${LIBRARY_DIR}) + +# coroutine + +set (SRCS_COROUTINE + "${LIBRARY_DIR}/libs/coroutine/detail/coroutine_context.cpp" + "${LIBRARY_DIR}/libs/coroutine/exceptions.cpp" + "${LIBRARY_DIR}/libs/coroutine/posix/stack_traits.cpp" +) +add_library (_boost_coroutine ${SRCS_COROUTINE}) +add_library (boost::coroutine ALIAS _boost_coroutine) +target_include_directories (_boost_coroutine PRIVATE ${LIBRARY_DIR}) +target_link_libraries(_boost_coroutine PRIVATE _boost_context) + +# graph + +set (SRCS_GRAPH + "${LIBRARY_DIR}/libs/graph/src/graphml.cpp" + "${LIBRARY_DIR}/libs/graph/src/read_graphviz_new.cpp" +) + +add_library (_boost_graph ${SRCS_GRAPH}) +add_library (boost::graph ALIAS _boost_graph) +target_include_directories (_boost_graph PRIVATE ${LIBRARY_DIR}) +target_link_libraries(_boost_graph PRIVATE _boost_regex) + +# circular buffer +add_library(_boost_circular_buffer INTERFACE) +add_library(boost::circular_buffer ALIAS _boost_circular_buffer) +target_include_directories(_boost_circular_buffer SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) + +# heap +add_library(_boost_heap INTERFACE) +add_library(boost::heap ALIAS _boost_heap) +target_include_directories(_boost_heap SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) diff --git a/contrib/boringssl-cmake/CMakeLists.txt b/contrib/boringssl-cmake/CMakeLists.txt index d599351fd5c1..dd3332d70be0 100644 --- a/contrib/boringssl-cmake/CMakeLists.txt +++ b/contrib/boringssl-cmake/CMakeLists.txt @@ -1,3 +1,13 @@ +# Needed for: +# - securely connecting to an external server, e.g. clickhouse-client --host ... --secure +# - lots of thirdparty libraries +option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES}) + +if(NOT ENABLE_SSL) + message(STATUS "Not using openssl") + return() +endif() + # Copyright (c) 2019 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -358,7 +368,7 @@ elseif(WIN32) endif() add_library( - crypto + _crypto ${CRYPTO_ARCH_SOURCES} err_data.c @@ -595,7 +605,7 @@ add_library( ) add_library( - ssl + _ssl "${BORINGSSL_SOURCE_DIR}/ssl/bio_ssl.cc" "${BORINGSSL_SOURCE_DIR}/ssl/d1_both.cc" @@ -662,18 +672,22 @@ add_executable( "${BORINGSSL_SOURCE_DIR}/tool/transport_common.cc" ) -target_link_libraries(ssl crypto) -target_link_libraries(bssl ssl) +target_link_libraries(_ssl _crypto) +target_link_libraries(bssl _ssl) if(NOT WIN32 AND NOT ANDROID) - target_link_libraries(crypto pthread) + target_link_libraries(_crypto pthread) endif() +# NOTE: that ClickHouse does not support WIN32 anyway. if(WIN32) target_link_libraries(bssl ws2_32) endif() -target_include_directories(crypto SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include") -target_include_directories(ssl SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include") +target_include_directories(_crypto SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include") +target_include_directories(_ssl SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include") + +target_compile_options(_crypto PRIVATE -Wno-gnu-anonymous-struct) -target_compile_options(crypto PRIVATE -Wno-gnu-anonymous-struct) +add_library(OpenSSL::Crypto ALIAS _crypto) +add_library(OpenSSL::SSL ALIAS _ssl) diff --git a/contrib/brotli-cmake/CMakeLists.txt b/contrib/brotli-cmake/CMakeLists.txt index 7293cae0665f..c81a6bf9076c 100644 --- a/contrib/brotli-cmake/CMakeLists.txt +++ b/contrib/brotli-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option (ENABLE_BROTLI "Enable brotli" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_BROTLI) + message(STATUS "Not using brotli") + return() +endif() + set(BROTLI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/brotli/c") set(BROTLI_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/brotli/c") @@ -33,10 +40,12 @@ set(SRCS "${BROTLI_SOURCE_DIR}/common/constants.c" ) -add_library(brotli ${SRCS}) +add_library(_brotli ${SRCS}) +add_library(ch_contrib::brotli ALIAS _brotli) -target_include_directories(brotli PUBLIC "${BROTLI_SOURCE_DIR}/include") +target_include_directories(_brotli SYSTEM BEFORE PUBLIC "${BROTLI_SOURCE_DIR}/include") if(M_LIBRARY) - target_link_libraries(brotli PRIVATE ${M_LIBRARY}) + target_link_libraries(_brotli PRIVATE ${M_LIBRARY}) endif() +target_compile_definitions(_brotli PRIVATE BROTLI_BUILD_PORTABLE=1) diff --git a/contrib/bzip2-cmake/CMakeLists.txt b/contrib/bzip2-cmake/CMakeLists.txt index a9d2efa43c15..2e01a6240007 100644 --- a/contrib/bzip2-cmake/CMakeLists.txt +++ b/contrib/bzip2-cmake/CMakeLists.txt @@ -1,3 +1,9 @@ +option(ENABLE_BZIP2 "Enable bzip2 compression support" ${ENABLE_LIBRARIES}) +if (NOT ENABLE_BZIP2) + message (STATUS "bzip2 compression disabled") + return() +endif() + set(BZIP2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2") set(BZIP2_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/bzip2") @@ -18,6 +24,10 @@ configure_file ( "${BZIP2_BINARY_DIR}/bz_version.h" ) -add_library(bzip2 ${SRCS}) - -target_include_directories(bzip2 PUBLIC "${BZIP2_SOURCE_DIR}" "${BZIP2_BINARY_DIR}") +add_library(_bzip2 ${SRCS}) +add_library(ch_contrib::bzip2 ALIAS _bzip2) +# To avoid -Wreserved-id-macro we use SYSTEM: +# +# clickhouse/contrib/bzip2/bzlib.h:23:9: error: macro name is a reserved identifier [-Werror,-Wreserved-id-macro] +# #define _BZLIB_H +target_include_directories(_bzip2 SYSTEM BEFORE PUBLIC "${BZIP2_SOURCE_DIR}" "${BZIP2_BINARY_DIR}") diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt index 054463555350..297b847cd585 100644 --- a/contrib/capnproto-cmake/CMakeLists.txt +++ b/contrib/capnproto-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option (ENABLE_CAPNP "Enable Cap'n Proto" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_CAPNP) + message(STATUS "Not using Cap'n Proto library") + return() +endif() + set (CAPNPROTO_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/capnproto/c++/src") set (CMAKE_CXX_STANDARD 17) @@ -29,8 +36,8 @@ set (KJ_SRCS "${CAPNPROTO_SOURCE_DIR}/kj/parse/char.c++" ) -add_library(kj ${KJ_SRCS}) -target_include_directories(kj SYSTEM PUBLIC ${CAPNPROTO_SOURCE_DIR}) +add_library(_kj ${KJ_SRCS}) +target_include_directories(_kj SYSTEM PUBLIC ${CAPNPROTO_SOURCE_DIR}) set (CAPNP_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/c++.capnp.c++" @@ -51,11 +58,11 @@ set (CAPNP_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/stringify.c++" ) -add_library(capnp ${CAPNP_SRCS}) -set_target_properties(capnp +add_library(_capnp ${CAPNP_SRCS}) +set_target_properties(_capnp PROPERTIES LINKER_LANGUAGE CXX ) -target_link_libraries(capnp PUBLIC kj) +target_link_libraries(_capnp PUBLIC _kj) set (CAPNPC_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/type-id.c++" @@ -71,8 +78,8 @@ set (CAPNPC_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/serialize-text.c++" ) -add_library(capnpc ${CAPNPC_SRCS}) -target_link_libraries(capnpc PUBLIC capnp) +add_library(_capnpc ${CAPNPC_SRCS}) +target_link_libraries(_capnpc PUBLIC _capnp) # The library has substandard code if (COMPILER_GCC) @@ -82,6 +89,8 @@ elseif (COMPILER_CLANG) set (CAPNP_PRIVATE_CXX_FLAGS -fno-char8_t) endif () -target_compile_options(kj PRIVATE ${SUPPRESS_WARNINGS} ${CAPNP_PRIVATE_CXX_FLAGS}) -target_compile_options(capnp PRIVATE ${SUPPRESS_WARNINGS} ${CAPNP_PRIVATE_CXX_FLAGS}) -target_compile_options(capnpc PRIVATE ${SUPPRESS_WARNINGS} ${CAPNP_PRIVATE_CXX_FLAGS}) +target_compile_options(_kj PRIVATE ${SUPPRESS_WARNINGS} ${CAPNP_PRIVATE_CXX_FLAGS}) +target_compile_options(_capnp PRIVATE ${SUPPRESS_WARNINGS} ${CAPNP_PRIVATE_CXX_FLAGS}) +target_compile_options(_capnpc PRIVATE ${SUPPRESS_WARNINGS} ${CAPNP_PRIVATE_CXX_FLAGS}) + +add_library(ch_contrib::capnp ALIAS _capnpc) diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt index a8f2bec5e2b3..81c1fab38824 100644 --- a/contrib/cassandra-cmake/CMakeLists.txt +++ b/contrib/cassandra-cmake/CMakeLists.txt @@ -1,3 +1,14 @@ +option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_CASSANDRA) + message(STATUS "Not using cassandra") + return() +endif() + +if (APPLE) + set(CMAKE_MACOSX_RPATH ON) +endif() + # Need to use C++17 since the compilation is not possible with C++20 currently. set (CMAKE_CXX_STANDARD 17) @@ -42,22 +53,14 @@ endif() list(APPEND SOURCES ${CASS_SRC_DIR}/atomic/atomic_std.hpp) -add_library(curl_hostcheck OBJECT ${CASS_SRC_DIR}/third_party/curl/hostcheck.cpp) -add_library(hdr_histogram OBJECT ${CASS_SRC_DIR}/third_party/hdr_histogram/hdr_histogram.cpp) -add_library(http-parser OBJECT ${CASS_SRC_DIR}/third_party/http-parser/http_parser.c) -add_library(minizip OBJECT - ${CASS_SRC_DIR}/third_party/minizip/ioapi.c - ${CASS_SRC_DIR}/third_party/minizip/zip.c - ${CASS_SRC_DIR}/third_party/minizip/unzip.c) - -target_link_libraries(minizip zlib) -target_compile_definitions(minizip PRIVATE "-Dz_crc_t=unsigned long") +add_library(_curl_hostcheck OBJECT ${CASS_SRC_DIR}/third_party/curl/hostcheck.cpp) +add_library(_hdr_histogram OBJECT ${CASS_SRC_DIR}/third_party/hdr_histogram/hdr_histogram.cpp) +add_library(_http-parser OBJECT ${CASS_SRC_DIR}/third_party/http-parser/http_parser.c) list(APPEND INCLUDE_DIRS ${CASS_SRC_DIR}/third_party/curl ${CASS_SRC_DIR}/third_party/hdr_histogram ${CASS_SRC_DIR}/third_party/http-parser - ${CASS_SRC_DIR}/third_party/minizip ${CASS_SRC_DIR}/third_party/mt19937_64 ${CASS_SRC_DIR}/third_party/rapidjson/rapidjson ${CASS_SRC_DIR}/third_party/sparsehash/src) @@ -108,20 +111,21 @@ configure_file( ${CMAKE_CURRENT_BINARY_DIR}/driver_config.hpp) -add_library(cassandra +add_library(_cassandra ${SOURCES} - $ - $ - $ - $) + $ + $ + $) -target_link_libraries(cassandra zlib) -add_library(cassandra_static ALIAS cassandra) -target_include_directories(cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS}) -target_compile_definitions(cassandra PRIVATE CASS_BUILDING) +target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip) +target_include_directories(_cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS}) +target_include_directories(_cassandra SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR}) +target_compile_definitions(_cassandra PRIVATE CASS_BUILDING) -target_link_libraries(cassandra uv) +target_link_libraries(_cassandra ch_contrib::uv) if(CASS_USE_OPENSSL) - target_link_libraries(cassandra ssl) + target_link_libraries(_cassandra OpenSSL::SSL) endif() + +add_library(ch_contrib::cassandra ALIAS _cassandra) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 2248ba8b6127..f1ef9b53f7d7 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -1,106 +1,63 @@ -option (USE_INTERNAL_CCTZ_LIBRARY "Use internal cctz library" ON) - -if (NOT USE_INTERNAL_CCTZ_LIBRARY) - find_library (LIBRARY_CCTZ cctz) - find_path (INCLUDE_CCTZ NAMES cctz/civil_time.h) - - if (LIBRARY_CCTZ AND INCLUDE_CCTZ) - set (EXTERNAL_CCTZ_LIBRARY_FOUND 1) - - set(CMAKE_REQUIRED_LIBRARIES ${LIBRARY_CCTZ}) - set(CMAKE_REQUIRED_INCLUDES ${INCLUDE_CCTZ}) - check_cxx_source_compiles( - " - #include - int main() { - cctz::civil_day date; - } - " - EXTERNAL_CCTZ_LIBRARY_WORKS - ) - - if (NOT EXTERNAL_CCTZ_LIBRARY_WORKS) - message (${RECONFIGURE_MESSAGE_LEVEL} "External cctz is not working: ${LIBRARY_CCTZ} ${INCLUDE_CCTZ}") - else() - add_library (cctz UNKNOWN IMPORTED) - set_property (TARGET cctz PROPERTY IMPORTED_LOCATION ${LIBRARY_CCTZ}) - set_property (TARGET cctz PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_CCTZ}) - endif() - - set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") - file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {nullptr};\n" ) - - else() - set (EXTERNAL_CCTZ_LIBRARY_FOUND 0) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system cctz") - endif() -endif() - -if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS) - include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set(USE_INTERNAL_CCTZ_LIBRARY 1) - set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz") - - set (SRCS - "${LIBRARY_DIR}/src/civil_time_detail.cc" - "${LIBRARY_DIR}/src/time_zone_fixed.cc" - "${LIBRARY_DIR}/src/time_zone_format.cc" - "${LIBRARY_DIR}/src/time_zone_if.cc" - "${LIBRARY_DIR}/src/time_zone_impl.cc" - "${LIBRARY_DIR}/src/time_zone_info.cc" - "${LIBRARY_DIR}/src/time_zone_libc.cc" - "${LIBRARY_DIR}/src/time_zone_lookup.cc" - "${LIBRARY_DIR}/src/time_zone_posix.cc" - "${LIBRARY_DIR}/src/zone_info_source.cc" - ) - - add_library (cctz ${SRCS}) - target_include_directories (cctz SYSTEM PUBLIC "${LIBRARY_DIR}/include") - - if (OS_FREEBSD) - # yes, need linux, because bsd check inside linux in time_zone_libc.cc:24 - target_compile_definitions (cctz PRIVATE __USE_BSD linux _XOPEN_SOURCE=600) - endif () - - # Related to time_zones table: - # StorageSystemTimeZones.generated.cpp is autogenerated each time during a build - # data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX - # as the library that's built using embedded tzdata is also specific to OS_LINUX - set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") - # remove existing copies so that its generated fresh on each build. - file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) - - # get the list of timezones from tzdata shipped with cctz - set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo") - file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION) - set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}") - message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") - - set(TIMEZONE_RESOURCE_FILES) - - # each file in that dir (except of tab and localtime) store the info about timezone - execute_process(COMMAND - bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" - OUTPUT_STRIP_TRAILING_WHITESPACE - OUTPUT_VARIABLE TIMEZONES) - - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" ) - - foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n") - list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}") - endforeach(TIMEZONE) - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n") - clickhouse_embed_binaries( - TARGET tzdata - RESOURCE_DIR "${TZDIR}" - RESOURCES ${TIMEZONE_RESOURCE_FILES} - ) - add_dependencies(cctz tzdata) - target_link_libraries(cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") +include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz") + +set (SRCS + "${LIBRARY_DIR}/src/civil_time_detail.cc" + "${LIBRARY_DIR}/src/time_zone_fixed.cc" + "${LIBRARY_DIR}/src/time_zone_format.cc" + "${LIBRARY_DIR}/src/time_zone_if.cc" + "${LIBRARY_DIR}/src/time_zone_impl.cc" + "${LIBRARY_DIR}/src/time_zone_info.cc" + "${LIBRARY_DIR}/src/time_zone_libc.cc" + "${LIBRARY_DIR}/src/time_zone_lookup.cc" + "${LIBRARY_DIR}/src/time_zone_posix.cc" + "${LIBRARY_DIR}/src/zone_info_source.cc" +) + +add_library (_cctz ${SRCS}) +target_include_directories (_cctz PUBLIC "${LIBRARY_DIR}/include") + +if (OS_FREEBSD) + # yes, need linux, because bsd check inside linux in time_zone_libc.cc:24 + target_compile_definitions (_cctz PRIVATE __USE_BSD linux _XOPEN_SOURCE=600) endif () -message (STATUS "Using cctz") +# Related to time_zones table: +# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build +# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX +# as the library that's built using embedded tzdata is also specific to OS_LINUX +set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") +# remove existing copies so that its generated fresh on each build. +file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) + +# get the list of timezones from tzdata shipped with cctz +set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo") +file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION) +set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}") +message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") + +set(TIMEZONE_RESOURCE_FILES) + +# each file in that dir (except of tab and localtime) store the info about timezone +execute_process(COMMAND + bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE TIMEZONES) + +file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") +file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" ) + +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n") + list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}") +endforeach(TIMEZONE) +file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n") +clickhouse_embed_binaries( + TARGET tzdata + RESOURCE_DIR "${TZDIR}" + RESOURCES ${TIMEZONE_RESOURCE_FILES} +) +add_dependencies(_cctz tzdata) +target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") + +add_library(ch_contrib::cctz ALIAS _cctz) diff --git a/contrib/cityhash102/CMakeLists.txt b/contrib/cityhash102/CMakeLists.txt index f40a6d2408b0..744fa29f3b0f 100644 --- a/contrib/cityhash102/CMakeLists.txt +++ b/contrib/cityhash102/CMakeLists.txt @@ -1,8 +1,10 @@ -add_library(cityhash +add_library(_cityhash src/city.cc include/citycrc.h include/city.h src/config.h) -target_include_directories(cityhash SYSTEM BEFORE PUBLIC include) -target_include_directories(cityhash SYSTEM PRIVATE src) +target_include_directories(_cityhash SYSTEM BEFORE PUBLIC include) +target_include_directories(_cityhash SYSTEM PRIVATE src) + +add_library(ch_contrib::cityhash ALIAS _cityhash) diff --git a/contrib/cld2 b/contrib/cld2 new file mode 160000 index 000000000000..bc6d493a2f64 --- /dev/null +++ b/contrib/cld2 @@ -0,0 +1 @@ +Subproject commit bc6d493a2f64ed1fc1c4c4b4294a542a04e04217 diff --git a/contrib/cld2-cmake/CMakeLists.txt b/contrib/cld2-cmake/CMakeLists.txt new file mode 100644 index 000000000000..8600856ea36a --- /dev/null +++ b/contrib/cld2-cmake/CMakeLists.txt @@ -0,0 +1,33 @@ +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cld2") + +set (SRCS + "${LIBRARY_DIR}/internal/cldutil.cc" + "${LIBRARY_DIR}/internal/compact_lang_det.cc" + "${LIBRARY_DIR}/internal/cldutil_shared.cc" + "${LIBRARY_DIR}/internal/compact_lang_det_hint_code.cc" + "${LIBRARY_DIR}/internal/compact_lang_det_impl.cc" + "${LIBRARY_DIR}/internal/debug.cc" + "${LIBRARY_DIR}/internal/fixunicodevalue.cc" + "${LIBRARY_DIR}/internal/generated_entities.cc" + "${LIBRARY_DIR}/internal/generated_language.cc" + "${LIBRARY_DIR}/internal/generated_ulscript.cc" + "${LIBRARY_DIR}/internal/getonescriptspan.cc" + "${LIBRARY_DIR}/internal/lang_script.cc" + "${LIBRARY_DIR}/internal/offsetmap.cc" + "${LIBRARY_DIR}/internal/scoreonescriptspan.cc" + "${LIBRARY_DIR}/internal/tote.cc" + "${LIBRARY_DIR}/internal/utf8statetable.cc" + "${LIBRARY_DIR}/internal/cld_generated_cjk_uni_prop_80.cc" + "${LIBRARY_DIR}/internal/cld2_generated_cjk_compatible.cc" + "${LIBRARY_DIR}/internal/cld_generated_cjk_delta_bi_4.cc" + "${LIBRARY_DIR}/internal/generated_distinct_bi_0.cc" + "${LIBRARY_DIR}/internal/cld2_generated_quadchrome_2.cc" + "${LIBRARY_DIR}/internal/cld2_generated_deltaoctachrome.cc" + "${LIBRARY_DIR}/internal/cld2_generated_distinctoctachrome.cc" + "${LIBRARY_DIR}/internal/cld_generated_score_quad_octa_2.cc" +) +add_library(_cld2 ${SRCS}) +set_property(TARGET _cld2 PROPERTY POSITION_INDEPENDENT_CODE ON) +target_compile_options (_cld2 PRIVATE -Wno-reserved-id-macro -Wno-c++11-narrowing) +target_include_directories(_cld2 SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/public") +add_library(ch_contrib::cld2 ALIAS _cld2) diff --git a/contrib/consistent-hashing/CMakeLists.txt b/contrib/consistent-hashing/CMakeLists.txt index 7543022df462..5d979824434a 100644 --- a/contrib/consistent-hashing/CMakeLists.txt +++ b/contrib/consistent-hashing/CMakeLists.txt @@ -1,2 +1,3 @@ -add_library(consistent-hashing consistent_hashing.cpp popcount.cpp) -target_include_directories(consistent-hashing SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +add_library(_consistent_hashing consistent_hashing.cpp popcount.cpp) +target_include_directories(_consistent_hashing SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +add_library(ch_contrib::consistent_hashing ALIAS _consistent_hashing) diff --git a/contrib/cppkafka-cmake/CMakeLists.txt b/contrib/cppkafka-cmake/CMakeLists.txt index 0bc33ada5296..87bf2356a807 100644 --- a/contrib/cppkafka-cmake/CMakeLists.txt +++ b/contrib/cppkafka-cmake/CMakeLists.txt @@ -1,3 +1,8 @@ +if (NOT ENABLE_KAFKA) + message(STATUS "Not using librdkafka (skip cppkafka)") + return() +endif() + set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cppkafka") set(SRCS @@ -22,12 +27,13 @@ set(SRCS "${LIBRARY_DIR}/src/topic.cpp" ) -add_library(cppkafka ${SRCS}) +add_library(_cppkafka ${SRCS}) +add_library(ch_contrib::cppkafka ALIAS _cppkafka) -target_link_libraries(cppkafka +target_link_libraries(_cppkafka PRIVATE - ${RDKAFKA_LIBRARY} + ch_contrib::rdkafka boost::headers_only ) -target_include_directories(cppkafka PRIVATE "${LIBRARY_DIR}/include/cppkafka") -target_include_directories(cppkafka SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include") +target_include_directories(_cppkafka PRIVATE "${LIBRARY_DIR}/include/cppkafka") +target_include_directories(_cppkafka SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include") diff --git a/contrib/croaring-cmake/CMakeLists.txt b/contrib/croaring-cmake/CMakeLists.txt index 3d327d068c1e..0bb7d0bd221a 100644 --- a/contrib/croaring-cmake/CMakeLists.txt +++ b/contrib/croaring-cmake/CMakeLists.txt @@ -19,15 +19,15 @@ set(SRCS "${LIBRARY_DIR}/src/roaring_priority_queue.c" "${LIBRARY_DIR}/src/roaring_array.c") -add_library(roaring ${SRCS}) +add_library(_roaring ${SRCS}) -target_include_directories(roaring PRIVATE "${LIBRARY_DIR}/include/roaring") -target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include") -target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/cpp") +target_include_directories(_roaring PRIVATE "${LIBRARY_DIR}/include/roaring") +target_include_directories(_roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include") +target_include_directories(_roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/cpp") # We redirect malloc/free family of functions to different functions that will track memory in ClickHouse. # Also note that we exploit implicit function declarations. -target_compile_definitions(roaring PRIVATE +target_compile_definitions(_roaring PRIVATE -Dmalloc=clickhouse_malloc -Dcalloc=clickhouse_calloc -Drealloc=clickhouse_realloc @@ -35,4 +35,6 @@ target_compile_definitions(roaring PRIVATE -Dfree=clickhouse_free -Dposix_memalign=clickhouse_posix_memalign) -target_link_libraries(roaring PUBLIC clickhouse_common_io) +target_link_libraries(_roaring PUBLIC clickhouse_common_io) + +add_library(ch_contrib::roaring ALIAS _roaring) diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 63ac8da24b19..589f40384e38 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -1,4 +1,7 @@ -if (NOT USE_INTERNAL_CURL) +option (ENABLE_CURL "Enable curl" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_CURL) + message(STATUS "Not using curl") return() endif() @@ -144,36 +147,24 @@ set (SRCS "${LIBRARY_DIR}/lib/vssh/libssh.c" ) -add_library (curl ${SRCS}) +add_library (_curl ${SRCS}) -target_compile_definitions (curl PRIVATE +target_compile_definitions (_curl PRIVATE HAVE_CONFIG_H BUILDING_LIBCURL CURL_HIDDEN_SYMBOLS libcurl_EXPORTS OS="${CMAKE_SYSTEM_NAME}" ) -target_include_directories (curl SYSTEM PUBLIC +target_include_directories (_curl SYSTEM PUBLIC "${LIBRARY_DIR}/include" "${LIBRARY_DIR}/lib" . # curl_config.h ) -target_link_libraries (curl PRIVATE ssl) +target_link_libraries (_curl PRIVATE OpenSSL::SSL) # The library is large - avoid bloat (XXX: is it?) -target_compile_options (curl PRIVATE -g0) +target_compile_options (_curl PRIVATE -g0) -# find_package(CURL) compatibility for the following packages that uses -# find_package(CURL)/include(FindCURL): -# - mariadb-connector-c -# - aws-s3-cmake -# - sentry-native -set (CURL_FOUND ON CACHE BOOL "") -set (CURL_ROOT_DIR ${LIBRARY_DIR} CACHE PATH "") -set (CURL_INCLUDE_DIR "${LIBRARY_DIR}/include" CACHE PATH "") -set (CURL_INCLUDE_DIRS "${LIBRARY_DIR}/include" CACHE PATH "") -set (CURL_LIBRARY curl CACHE STRING "") -set (CURL_LIBRARIES ${CURL_LIBRARY} CACHE STRING "") -set (CURL_VERSION_STRING 7.67.0 CACHE STRING "") -add_library (CURL::libcurl ALIAS ${CURL_LIBRARY}) +add_library (ch_contrib::curl ALIAS _curl) diff --git a/contrib/cyrus-sasl-cmake/CMakeLists.txt b/contrib/cyrus-sasl-cmake/CMakeLists.txt index aa25a0787187..41deaae19a78 100644 --- a/contrib/cyrus-sasl-cmake/CMakeLists.txt +++ b/contrib/cyrus-sasl-cmake/CMakeLists.txt @@ -1,8 +1,20 @@ +if (${ENABLE_LIBRARIES} AND ${ENABLE_KRB5}) + set (DEFAULT_ENABLE_CYRUS_SASL 1) +else() + set (DEFAULT_ENABLE_CYRUS_SASL 0) +endif() +option(ENABLE_CYRUS_SASL "Enable cyrus-sasl" ${DEFAULT_ENABLE_CYRUS_SASL}) + +if (NOT ENABLE_CYRUS_SASL) + message(STATUS "Not using cyrus-sasl") + return() +endif() + set(CYRUS_SASL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/cyrus-sasl") -add_library(${CYRUS_SASL_LIBRARY}) +add_library(_sasl2) -target_sources(${CYRUS_SASL_LIBRARY} PRIVATE +target_sources(_sasl2 PRIVATE "${CYRUS_SASL_SOURCE_DIR}/plugins/gssapi.c" # "${CYRUS_SASL_SOURCE_DIR}/plugins/gssapiv2_init.c" "${CYRUS_SASL_SOURCE_DIR}/common/plugin_common.c" @@ -20,11 +32,11 @@ target_sources(${CYRUS_SASL_LIBRARY} PRIVATE "${CYRUS_SASL_SOURCE_DIR}/lib/checkpw.c" ) -target_include_directories(${CYRUS_SASL_LIBRARY} PUBLIC +target_include_directories(_sasl2 PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ) -target_include_directories(${CYRUS_SASL_LIBRARY} PRIVATE +target_include_directories(_sasl2 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} # for config.h "${CYRUS_SASL_SOURCE_DIR}/plugins" ${CYRUS_SASL_SOURCE_DIR} @@ -38,7 +50,7 @@ target_include_directories(${CYRUS_SASL_LIBRARY} PRIVATE "${CYRUS_SASL_SOURCE_DIR}/tests" ) -target_compile_definitions(${CYRUS_SASL_LIBRARY} PUBLIC +target_compile_definitions(_sasl2 PUBLIC HAVE_CONFIG_H # PLUGINDIR="/usr/local/lib/sasl2" PLUGINDIR="" @@ -64,6 +76,6 @@ file(COPY DESTINATION ${CMAKE_CURRENT_BINARY_DIR} ) -target_link_libraries(${CYRUS_SASL_LIBRARY} - PUBLIC ${KRB5_LIBRARY} -) +target_link_libraries(_sasl2 PUBLIC ch_contrib::krb5) + +add_library(ch_contrib::sasl2 ALIAS _sasl2) diff --git a/contrib/datasketches-cpp-cmake/CMakeLists.txt b/contrib/datasketches-cpp-cmake/CMakeLists.txt new file mode 100644 index 000000000000..b12a88ad57b1 --- /dev/null +++ b/contrib/datasketches-cpp-cmake/CMakeLists.txt @@ -0,0 +1,14 @@ +option (ENABLE_DATASKETCHES "Enable DataSketches" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_DATASKETCHES) + message(STATUS "Not using DataSketches") + return() +endif() + +set(DATASKETCHES_LIBRARY theta) +add_library(_datasketches INTERFACE) +target_include_directories(_datasketches SYSTEM BEFORE INTERFACE + "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include" + "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include") + +add_library(ch_contrib::datasketches ALIAS _datasketches) diff --git a/contrib/double-conversion-cmake/CMakeLists.txt b/contrib/double-conversion-cmake/CMakeLists.txt index c8bf1b34b8f6..dc5b1719abfe 100644 --- a/contrib/double-conversion-cmake/CMakeLists.txt +++ b/contrib/double-conversion-cmake/CMakeLists.txt @@ -1,6 +1,6 @@ SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/double-conversion") -add_library(double-conversion +add_library(_double-conversion "${LIBRARY_DIR}/double-conversion/bignum.cc" "${LIBRARY_DIR}/double-conversion/bignum-dtoa.cc" "${LIBRARY_DIR}/double-conversion/cached-powers.cc" @@ -10,4 +10,6 @@ add_library(double-conversion "${LIBRARY_DIR}/double-conversion/fixed-dtoa.cc" "${LIBRARY_DIR}/double-conversion/strtod.cc") -target_include_directories(double-conversion SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}") +target_include_directories(_double-conversion SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}") + +add_library(ch_contrib::double_conversion ALIAS _double-conversion) diff --git a/contrib/dragonbox-cmake/CMakeLists.txt b/contrib/dragonbox-cmake/CMakeLists.txt index 604394c6dcec..6644ac3c313f 100644 --- a/contrib/dragonbox-cmake/CMakeLists.txt +++ b/contrib/dragonbox-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/dragonbox") -add_library(dragonbox_to_chars "${LIBRARY_DIR}/source/dragonbox_to_chars.cpp") - -target_include_directories(dragonbox_to_chars SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include/") +add_library(_dragonbox_to_chars "${LIBRARY_DIR}/source/dragonbox_to_chars.cpp") +target_include_directories(_dragonbox_to_chars SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include/") +add_library(ch_contrib::dragonbox_to_chars ALIAS _dragonbox_to_chars) diff --git a/contrib/fast_float-cmake/CMakeLists.txt b/contrib/fast_float-cmake/CMakeLists.txt index cd945f79a20b..4ddd11c6d371 100644 --- a/contrib/fast_float-cmake/CMakeLists.txt +++ b/contrib/fast_float-cmake/CMakeLists.txt @@ -1,2 +1,3 @@ -add_library(fast_float INTERFACE) -target_include_directories(fast_float INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/") +add_library(_fast_float INTERFACE) +target_include_directories(_fast_float SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/") +add_library(ch_contrib::fast_float ALIAS _fast_float) diff --git a/contrib/fastops-cmake/CMakeLists.txt b/contrib/fastops-cmake/CMakeLists.txt index fe7293c614b1..17d6a7f5fcbf 100644 --- a/contrib/fastops-cmake/CMakeLists.txt +++ b/contrib/fastops-cmake/CMakeLists.txt @@ -1,3 +1,14 @@ +if(ARCH_AMD64 AND NOT OS_FREEBSD AND NOT OS_DARWIN) + option(ENABLE_FASTOPS "Enable fast vectorized mathematical functions library by Mikhail Parakhin" ${ENABLE_LIBRARIES}) +elseif(ENABLE_FASTOPS) + message (${RECONFIGURE_MESSAGE_LEVEL} "Fastops library is supported on x86_64 only, and not FreeBSD or Darwin") +endif() + +if(NOT ENABLE_FASTOPS) + message(STATUS "Not using fast vectorized mathematical functions library by Mikhail Parakhin") + return() +endif() + set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/fastops") set(SRCS "") @@ -14,6 +25,8 @@ endif() set (SRCS ${SRCS} "${LIBRARY_DIR}/fastops/plain/ops_plain.cpp" "${LIBRARY_DIR}/fastops/core/avx_id.cpp" "${LIBRARY_DIR}/fastops/fastops.cpp") -add_library(fastops ${SRCS}) +add_library(_fastops ${SRCS}) + +target_include_directories(_fastops SYSTEM PUBLIC "${LIBRARY_DIR}") -target_include_directories(fastops SYSTEM PUBLIC "${LIBRARY_DIR}") +add_library(ch_contrib::fastops ALIAS _fastops) diff --git a/contrib/fmtlib b/contrib/fmtlib index c108ee1d5900..b6f4ceaed0a0 160000 --- a/contrib/fmtlib +++ b/contrib/fmtlib @@ -1 +1 @@ -Subproject commit c108ee1d590089ccf642fc85652b845924067af2 +Subproject commit b6f4ceaed0a0a24ccf575fab6c56dd50ccf6f1a9 diff --git a/contrib/fmtlib-cmake/CMakeLists.txt b/contrib/fmtlib-cmake/CMakeLists.txt index f3bf73d7dbc5..fecec5f3e439 100644 --- a/contrib/fmtlib-cmake/CMakeLists.txt +++ b/contrib/fmtlib-cmake/CMakeLists.txt @@ -1,7 +1,10 @@ set (SRCS + # NOTE: do not build module for now: + # ../fmtlib/src/fmt.cc ../fmtlib/src/format.cc ../fmtlib/src/os.cc + ../fmtlib/include/fmt/args.h ../fmtlib/include/fmt/chrono.h ../fmtlib/include/fmt/color.h ../fmtlib/include/fmt/compile.h @@ -11,10 +14,11 @@ set (SRCS ../fmtlib/include/fmt/locale.h ../fmtlib/include/fmt/os.h ../fmtlib/include/fmt/ostream.h - ../fmtlib/include/fmt/posix.h ../fmtlib/include/fmt/printf.h ../fmtlib/include/fmt/ranges.h + ../fmtlib/include/fmt/xchar.h ) -add_library(fmt ${SRCS}) -target_include_directories(fmt SYSTEM PUBLIC ../fmtlib/include) +add_library(_fmt ${SRCS}) +target_include_directories(_fmt SYSTEM PUBLIC ../fmtlib/include) +add_library(ch_contrib::fmt ALIAS _fmt) diff --git a/contrib/googletest-cmake/CMakeLists.txt b/contrib/googletest-cmake/CMakeLists.txt index ec7ac91c4711..f116eddc337f 100644 --- a/contrib/googletest-cmake/CMakeLists.txt +++ b/contrib/googletest-cmake/CMakeLists.txt @@ -1,11 +1,15 @@ set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest") -add_library(gtest "${SRC_DIR}/src/gtest-all.cc") -set_target_properties(gtest PROPERTIES VERSION "1.0.0") -target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0) -target_include_directories(gtest SYSTEM PUBLIC "${SRC_DIR}/include") -target_include_directories(gtest PRIVATE "${SRC_DIR}") +add_library(_gtest "${SRC_DIR}/src/gtest-all.cc") +set_target_properties(_gtest PROPERTIES VERSION "1.0.0") +target_compile_definitions (_gtest INTERFACE GTEST_HAS_POSIX_RE=0) +target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/include") +target_include_directories(_gtest PRIVATE "${SRC_DIR}") -add_library(gtest_main "${SRC_DIR}/src/gtest_main.cc") -set_target_properties(gtest_main PROPERTIES VERSION "1.0.0") -target_link_libraries(gtest_main PUBLIC gtest) +add_library(_gtest_main "${SRC_DIR}/src/gtest_main.cc") +set_target_properties(_gtest_main PROPERTIES VERSION "1.0.0") +target_link_libraries(_gtest_main PUBLIC _gtest) + +add_library(_gtest_all INTERFACE) +target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main) +add_library(ch_contrib::gtest_all ALIAS _gtest_all) diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index b93968f62f96..520e04d198e2 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -1,47 +1,52 @@ +# disable grpc due to conflicts of abseil (required by grpc) dynamic annotations with libtsan.a +if (SANITIZE STREQUAL "thread" AND COMPILER_GCC) + set(ENABLE_GRPC_DEFAULT OFF) +else() + set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES}) +endif() +option(ENABLE_GRPC "Use gRPC" ${ENABLE_GRPC_DEFAULT}) + +if(NOT ENABLE_GRPC) + message(STATUS "Not using gRPC") + return() +endif() + set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc") set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc") # Use re2 from ClickHouse contrib, not from gRPC third_party. -if(NOT RE2_INCLUDE_DIR) - message(FATAL_ERROR " grpc: The location of the \"re2\" library is unknown") -endif() set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE) -set(_gRPC_RE2_INCLUDE_DIR "${RE2_INCLUDE_DIR}") -set(_gRPC_RE2_LIBRARIES "${RE2_LIBRARY}") +set(_gRPC_RE2_INCLUDE_DIR "") +set(_gRPC_RE2_LIBRARIES ch_contrib::re2) # Use zlib from ClickHouse contrib, not from gRPC third_party. -if(NOT ZLIB_INCLUDE_DIRS) - message(FATAL_ERROR " grpc: The location of the \"zlib\" library is unknown") -endif() set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE) -set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_INCLUDE_DIRS}") -set(_gRPC_ZLIB_LIBRARIES "${ZLIB_LIBRARIES}") +set(_gRPC_ZLIB_INCLUDE_DIR "") +set(_gRPC_ZLIB_LIBRARIES ch_contrib::zlib) # Use protobuf from ClickHouse contrib, not from gRPC third_party. -if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY) - message(FATAL_ERROR " grpc: The location of the \"protobuf\" library is unknown") -elseif (NOT Protobuf_PROTOC_EXECUTABLE) - message(FATAL_ERROR " grpc: The location of the protobuf compiler is unknown") -elseif (NOT Protobuf_PROTOC_LIBRARY) - message(FATAL_ERROR " grpc: The location of the protobuf compiler's library is unknown") -endif() set(gRPC_PROTOBUF_PROVIDER "clickhouse" CACHE STRING "" FORCE) -set(_gRPC_PROTOBUF_WELLKNOWN_INCLUDE_DIR "${Protobuf_INCLUDE_DIR}") -set(_gRPC_PROTOBUF_LIBRARIES "${Protobuf_LIBRARY}") +set(_gRPC_PROTOBUF_LIBRARIES ch_contrib::protobuf) set(_gRPC_PROTOBUF_PROTOC "protoc") -set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}") -set(_gRPC_PROTOBUF_PROTOC_LIBRARIES "${Protobuf_PROTOC_LIBRARY}") +set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE $) +set(_gRPC_PROTOBUF_PROTOC_LIBRARIES ch_contrib::protoc) + +if(TARGET OpenSSL::SSL) + set(gRPC_USE_UNSECURE_LIBRARIES FALSE) +else() + set(gRPC_USE_UNSECURE_LIBRARIES TRUE) +endif() # Use OpenSSL from ClickHouse contrib, not from gRPC third_party. set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) -set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR}) -set(_gRPC_SSL_LIBRARIES ${OPENSSL_LIBRARIES}) +set(_gRPC_SSL_INCLUDE_DIR "") +set(_gRPC_SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL) # Use abseil-cpp from ClickHouse contrib, not from gRPC third_party. set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) # Choose to build static or shared library for c-ares. -if (MAKE_STATIC_LIBRARIES) +if (USE_STATIC_LIBRARIES) set(CARES_STATIC ON CACHE BOOL "" FORCE) set(CARES_SHARED OFF CACHE BOOL "" FORCE) else () @@ -77,3 +82,17 @@ add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}") # The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes, # so we need to redefine it back. include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake") + +set(gRPC_CPP_PLUGIN $) +set(gRPC_PYTHON_PLUGIN $) + +set(gRPC_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/grpc/include") +if(gRPC_USE_UNSECURE_LIBRARIES) + set(gRPC_LIBRARIES grpc_unsecure grpc++_unsecure) +else() + set(gRPC_LIBRARIES grpc grpc++) +endif() +add_library(_ch_contrib_grpc INTERFACE) +target_link_libraries(_ch_contrib_grpc INTERFACE ${gRPC_LIBRARIES}) +target_include_directories(_ch_contrib_grpc SYSTEM INTERFACE ${gRPC_INCLUDE_DIRS}) +add_library(ch_contrib::grpc ALIAS _ch_contrib_grpc) diff --git a/contrib/h3-cmake/CMakeLists.txt b/contrib/h3-cmake/CMakeLists.txt index f4c70dc476f8..984d1b1ae7c0 100644 --- a/contrib/h3-cmake/CMakeLists.txt +++ b/contrib/h3-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option (ENABLE_H3 "Enable H3" ${ENABLE_LIBRARIES}) + +if(NOT ENABLE_H3) + message(STATUS "Not using H3") + return() +endif() + set(H3_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib") set(H3_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/h3/src/h3lib") @@ -23,10 +30,12 @@ set(SRCS configure_file("${H3_SOURCE_DIR}/include/h3api.h.in" "${H3_BINARY_DIR}/include/h3api.h") -add_library(h3 ${SRCS}) -target_include_directories(h3 SYSTEM PUBLIC "${H3_SOURCE_DIR}/include") -target_include_directories(h3 SYSTEM PUBLIC "${H3_BINARY_DIR}/include") -target_compile_definitions(h3 PRIVATE H3_HAVE_VLA) +add_library(_h3 ${SRCS}) +target_include_directories(_h3 SYSTEM PUBLIC "${H3_SOURCE_DIR}/include") +target_include_directories(_h3 SYSTEM PUBLIC "${H3_BINARY_DIR}/include") +target_compile_definitions(_h3 PRIVATE H3_HAVE_VLA) if(M_LIBRARY) - target_link_libraries(h3 PRIVATE ${M_LIBRARY}) + target_link_libraries(_h3 PRIVATE ${M_LIBRARY}) endif() + +add_library(ch_contrib::h3 ALIAS _h3) diff --git a/contrib/hive-metastore-cmake/CMakeLists.txt b/contrib/hive-metastore-cmake/CMakeLists.txt index c92405fa4e80..9069d46cea7b 100644 --- a/contrib/hive-metastore-cmake/CMakeLists.txt +++ b/contrib/hive-metastore-cmake/CMakeLists.txt @@ -1,9 +1,21 @@ +if (TARGET ch_contrib::hdfs) + option(ENABLE_HIVE "Enable Hive" ${ENABLE_LIBRARIES}) +elseif(ENABLE_HIVE) + message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use Hive without HDFS") +endif() + +if (NOT ENABLE_HIVE) + message("Hive disabled") + return() +endif() + set (SRCS ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/hive_metastore_constants.cpp ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/hive_metastore_types.cpp ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore/ThriftHiveMetastore.cpp ) -add_library(${HIVE_METASTORE_LIBRARY} ${SRCS}) -target_link_libraries(${HIVE_METASTORE_LIBRARY} PUBLIC ${THRIFT_LIBRARY}) -target_include_directories(${HIVE_METASTORE_LIBRARY} SYSTEM PUBLIC ${HIVE_METASTORE_INCLUDE_DIR}) +add_library(_hivemetastore ${SRCS}) +add_library(ch_contrib::hivemetastore ALIAS _hivemetastore) +target_link_libraries(_hivemetastore PUBLIC ch_contrib::thrift) +target_include_directories(_hivemetastore SYSTEM BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore) diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt index 248551d0b0c1..02c823a3a426 100644 --- a/contrib/hyperscan-cmake/CMakeLists.txt +++ b/contrib/hyperscan-cmake/CMakeLists.txt @@ -6,268 +6,234 @@ elseif(ENABLE_HYPERSCAN) endif () if (NOT ENABLE_HYPERSCAN) - if (USE_INTERNAL_HYPERSCAN_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal hyperscan with ENABLE_HYPERSCAN=OFF") - endif() - - add_library (hyperscan INTERFACE) - target_compile_definitions (hyperscan INTERFACE USE_HYPERSCAN=0) - message (STATUS "Not using hyperscan") return() endif() -option (USE_INTERNAL_HYPERSCAN_LIBRARY "Use internal hyperscan library" ON) - -if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY) - find_library (LIBRARY_HYPERSCAN hs) - find_path (INCLUDE_HYPERSCAN NAMES hs.h HINTS /usr/include/hs) # Ubuntu puts headers in this folder - - if (LIBRARY_HYPERSCAN AND INCLUDE_HYPERSCAN) - set (EXTERNAL_HYPERSCAN_LIBRARY_FOUND 1) - - add_library (hyperscan INTERFACE) - set_target_properties (hyperscan PROPERTIES INTERFACE_LINK_LIBRARIES ${LIBRARY_HYPERSCAN}) - set_target_properties (hyperscan PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_HYPERSCAN}) - set_property(TARGET hyperscan APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_HYPERSCAN=1) - else () - set (EXTERNAL_HYPERSCAN_LIBRARY_FOUND 0) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system hyperscan library") - endif () - -endif () - -if (NOT EXTERNAL_HYPERSCAN_LIBRARY_FOUND) - set (USE_INTERNAL_HYPERSCAN_LIBRARY 1) - - set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hyperscan") - - set (SRCS - "${LIBRARY_DIR}/src/alloc.c" - "${LIBRARY_DIR}/src/compiler/asserts.cpp" - "${LIBRARY_DIR}/src/compiler/compiler.cpp" - "${LIBRARY_DIR}/src/compiler/error.cpp" - "${LIBRARY_DIR}/src/crc32.c" - "${LIBRARY_DIR}/src/database.c" - "${LIBRARY_DIR}/src/fdr/engine_description.cpp" - "${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp" - "${LIBRARY_DIR}/src/fdr/fdr_compile.cpp" - "${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp" - "${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp" - "${LIBRARY_DIR}/src/fdr/fdr.c" - "${LIBRARY_DIR}/src/fdr/flood_compile.cpp" - "${LIBRARY_DIR}/src/fdr/teddy_compile.cpp" - "${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp" - "${LIBRARY_DIR}/src/fdr/teddy.c" - "${LIBRARY_DIR}/src/grey.cpp" - "${LIBRARY_DIR}/src/hs_valid_platform.c" - "${LIBRARY_DIR}/src/hs_version.c" - "${LIBRARY_DIR}/src/hs.cpp" - "${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp" - "${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp" - "${LIBRARY_DIR}/src/hwlm/hwlm.c" - "${LIBRARY_DIR}/src/hwlm/noodle_build.cpp" - "${LIBRARY_DIR}/src/hwlm/noodle_engine.c" - "${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp" - "${LIBRARY_DIR}/src/nfa/accel.c" - "${LIBRARY_DIR}/src/nfa/accelcompile.cpp" - "${LIBRARY_DIR}/src/nfa/castle.c" - "${LIBRARY_DIR}/src/nfa/castlecompile.cpp" - "${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp" - "${LIBRARY_DIR}/src/nfa/dfa_min.cpp" - "${LIBRARY_DIR}/src/nfa/gough.c" - "${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp" - "${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp" - "${LIBRARY_DIR}/src/nfa/goughcompile.cpp" - "${LIBRARY_DIR}/src/nfa/lbr.c" - "${LIBRARY_DIR}/src/nfa/limex_64.c" - "${LIBRARY_DIR}/src/nfa/limex_accel.c" - "${LIBRARY_DIR}/src/nfa/limex_compile.cpp" - "${LIBRARY_DIR}/src/nfa/limex_native.c" - "${LIBRARY_DIR}/src/nfa/limex_simd128.c" - "${LIBRARY_DIR}/src/nfa/limex_simd256.c" - "${LIBRARY_DIR}/src/nfa/limex_simd384.c" - "${LIBRARY_DIR}/src/nfa/limex_simd512.c" - "${LIBRARY_DIR}/src/nfa/mcclellan.c" - "${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp" - "${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp" - "${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp" - "${LIBRARY_DIR}/src/nfa/mcsheng_data.c" - "${LIBRARY_DIR}/src/nfa/mcsheng.c" - "${LIBRARY_DIR}/src/nfa/mpv.c" - "${LIBRARY_DIR}/src/nfa/mpvcompile.cpp" - "${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c" - "${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp" - "${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp" - "${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp" - "${LIBRARY_DIR}/src/nfa/rdfa.cpp" - "${LIBRARY_DIR}/src/nfa/repeat.c" - "${LIBRARY_DIR}/src/nfa/repeatcompile.cpp" - "${LIBRARY_DIR}/src/nfa/sheng.c" - "${LIBRARY_DIR}/src/nfa/shengcompile.cpp" - "${LIBRARY_DIR}/src/nfa/shufti.c" - "${LIBRARY_DIR}/src/nfa/shufticompile.cpp" - "${LIBRARY_DIR}/src/nfa/tamarama.c" - "${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp" - "${LIBRARY_DIR}/src/nfa/truffle.c" - "${LIBRARY_DIR}/src/nfa/trufflecompile.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_builder.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_calc_components.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_cyclic_redundancy.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_depth.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_dominators.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_edge_redundancy.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_equivalence.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_execute.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_expr_info.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_extparam.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_fixed_width.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_fuzzy.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_haig.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_mcclellan.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_misc_opt.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_netflow.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_prefilter.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_region.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_som.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_split.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_uncalc_components.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_utf8.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_util.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_width.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng.cpp" - "${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp" - "${LIBRARY_DIR}/src/parser/buildstate.cpp" - "${LIBRARY_DIR}/src/parser/check_refs.cpp" - "${LIBRARY_DIR}/src/parser/Component.cpp" - "${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp" - "${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp" - "${LIBRARY_DIR}/src/parser/ComponentAtomicGroup.cpp" - "${LIBRARY_DIR}/src/parser/ComponentBackReference.cpp" - "${LIBRARY_DIR}/src/parser/ComponentBoundary.cpp" - "${LIBRARY_DIR}/src/parser/ComponentByte.cpp" - "${LIBRARY_DIR}/src/parser/ComponentClass.cpp" - "${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp" - "${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp" - "${LIBRARY_DIR}/src/parser/ComponentEUS.cpp" - "${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp" - "${LIBRARY_DIR}/src/parser/ComponentSequence.cpp" - "${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp" - "${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp" - "${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp" - "${LIBRARY_DIR}/src/parser/control_verbs.cpp" - "${LIBRARY_DIR}/src/parser/logical_combination.cpp" - "${LIBRARY_DIR}/src/parser/parse_error.cpp" - "${LIBRARY_DIR}/src/parser/parser_util.cpp" - "${LIBRARY_DIR}/src/parser/Parser.cpp" - "${LIBRARY_DIR}/src/parser/prefilter.cpp" - "${LIBRARY_DIR}/src/parser/shortcut_literal.cpp" - "${LIBRARY_DIR}/src/parser/ucp_table.cpp" - "${LIBRARY_DIR}/src/parser/unsupported.cpp" - "${LIBRARY_DIR}/src/parser/utf8_validate.cpp" - "${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp" - "${LIBRARY_DIR}/src/rose/block.c" - "${LIBRARY_DIR}/src/rose/catchup.c" - "${LIBRARY_DIR}/src/rose/init.c" - "${LIBRARY_DIR}/src/rose/match.c" - "${LIBRARY_DIR}/src/rose/program_runtime.c" - "${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_add.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_castle.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_compile.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_convert.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_dedupe.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_engine_blob.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_exclusive.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_groups.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_infix.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_instructions.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_lit_accel.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_long_lit.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_lookaround.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_merge.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_misc.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_program.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp" - "${LIBRARY_DIR}/src/rose/rose_build_width.cpp" - "${LIBRARY_DIR}/src/rose/rose_in_util.cpp" - "${LIBRARY_DIR}/src/rose/stream.c" - "${LIBRARY_DIR}/src/runtime.c" - "${LIBRARY_DIR}/src/scratch.c" - "${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp" - "${LIBRARY_DIR}/src/som/slot_manager.cpp" - "${LIBRARY_DIR}/src/som/som_runtime.c" - "${LIBRARY_DIR}/src/som/som_stream.c" - "${LIBRARY_DIR}/src/stream_compress.c" - "${LIBRARY_DIR}/src/util/alloc.cpp" - "${LIBRARY_DIR}/src/util/charreach.cpp" - "${LIBRARY_DIR}/src/util/clique.cpp" - "${LIBRARY_DIR}/src/util/compile_context.cpp" - "${LIBRARY_DIR}/src/util/compile_error.cpp" - "${LIBRARY_DIR}/src/util/cpuid_flags.c" - "${LIBRARY_DIR}/src/util/depth.cpp" - "${LIBRARY_DIR}/src/util/fatbit_build.cpp" - "${LIBRARY_DIR}/src/util/multibit_build.cpp" - "${LIBRARY_DIR}/src/util/multibit.c" - "${LIBRARY_DIR}/src/util/report_manager.cpp" - "${LIBRARY_DIR}/src/util/simd_utils.c" - "${LIBRARY_DIR}/src/util/state_compress.c" - "${LIBRARY_DIR}/src/util/target_info.cpp" - "${LIBRARY_DIR}/src/util/ue2string.cpp" - ) +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hyperscan") - add_library (hyperscan ${SRCS}) +set (SRCS + "${LIBRARY_DIR}/src/alloc.c" + "${LIBRARY_DIR}/src/compiler/asserts.cpp" + "${LIBRARY_DIR}/src/compiler/compiler.cpp" + "${LIBRARY_DIR}/src/compiler/error.cpp" + "${LIBRARY_DIR}/src/crc32.c" + "${LIBRARY_DIR}/src/database.c" + "${LIBRARY_DIR}/src/fdr/engine_description.cpp" + "${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp" + "${LIBRARY_DIR}/src/fdr/fdr_compile.cpp" + "${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp" + "${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp" + "${LIBRARY_DIR}/src/fdr/fdr.c" + "${LIBRARY_DIR}/src/fdr/flood_compile.cpp" + "${LIBRARY_DIR}/src/fdr/teddy_compile.cpp" + "${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp" + "${LIBRARY_DIR}/src/fdr/teddy.c" + "${LIBRARY_DIR}/src/grey.cpp" + "${LIBRARY_DIR}/src/hs_valid_platform.c" + "${LIBRARY_DIR}/src/hs_version.c" + "${LIBRARY_DIR}/src/hs.cpp" + "${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp" + "${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp" + "${LIBRARY_DIR}/src/hwlm/hwlm.c" + "${LIBRARY_DIR}/src/hwlm/noodle_build.cpp" + "${LIBRARY_DIR}/src/hwlm/noodle_engine.c" + "${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp" + "${LIBRARY_DIR}/src/nfa/accel.c" + "${LIBRARY_DIR}/src/nfa/accelcompile.cpp" + "${LIBRARY_DIR}/src/nfa/castle.c" + "${LIBRARY_DIR}/src/nfa/castlecompile.cpp" + "${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp" + "${LIBRARY_DIR}/src/nfa/dfa_min.cpp" + "${LIBRARY_DIR}/src/nfa/gough.c" + "${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp" + "${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp" + "${LIBRARY_DIR}/src/nfa/goughcompile.cpp" + "${LIBRARY_DIR}/src/nfa/lbr.c" + "${LIBRARY_DIR}/src/nfa/limex_64.c" + "${LIBRARY_DIR}/src/nfa/limex_accel.c" + "${LIBRARY_DIR}/src/nfa/limex_compile.cpp" + "${LIBRARY_DIR}/src/nfa/limex_native.c" + "${LIBRARY_DIR}/src/nfa/limex_simd128.c" + "${LIBRARY_DIR}/src/nfa/limex_simd256.c" + "${LIBRARY_DIR}/src/nfa/limex_simd384.c" + "${LIBRARY_DIR}/src/nfa/limex_simd512.c" + "${LIBRARY_DIR}/src/nfa/mcclellan.c" + "${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp" + "${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp" + "${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp" + "${LIBRARY_DIR}/src/nfa/mcsheng_data.c" + "${LIBRARY_DIR}/src/nfa/mcsheng.c" + "${LIBRARY_DIR}/src/nfa/mpv.c" + "${LIBRARY_DIR}/src/nfa/mpvcompile.cpp" + "${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c" + "${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp" + "${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp" + "${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp" + "${LIBRARY_DIR}/src/nfa/rdfa.cpp" + "${LIBRARY_DIR}/src/nfa/repeat.c" + "${LIBRARY_DIR}/src/nfa/repeatcompile.cpp" + "${LIBRARY_DIR}/src/nfa/sheng.c" + "${LIBRARY_DIR}/src/nfa/shengcompile.cpp" + "${LIBRARY_DIR}/src/nfa/shufti.c" + "${LIBRARY_DIR}/src/nfa/shufticompile.cpp" + "${LIBRARY_DIR}/src/nfa/tamarama.c" + "${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp" + "${LIBRARY_DIR}/src/nfa/truffle.c" + "${LIBRARY_DIR}/src/nfa/trufflecompile.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_builder.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_calc_components.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_cyclic_redundancy.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_depth.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_dominators.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_edge_redundancy.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_equivalence.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_execute.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_expr_info.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_extparam.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_fixed_width.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_fuzzy.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_haig.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_mcclellan.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_misc_opt.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_netflow.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_prefilter.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_region.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_som.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_split.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_uncalc_components.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_utf8.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_util.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_width.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng.cpp" + "${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp" + "${LIBRARY_DIR}/src/parser/buildstate.cpp" + "${LIBRARY_DIR}/src/parser/check_refs.cpp" + "${LIBRARY_DIR}/src/parser/Component.cpp" + "${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp" + "${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp" + "${LIBRARY_DIR}/src/parser/ComponentAtomicGroup.cpp" + "${LIBRARY_DIR}/src/parser/ComponentBackReference.cpp" + "${LIBRARY_DIR}/src/parser/ComponentBoundary.cpp" + "${LIBRARY_DIR}/src/parser/ComponentByte.cpp" + "${LIBRARY_DIR}/src/parser/ComponentClass.cpp" + "${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp" + "${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp" + "${LIBRARY_DIR}/src/parser/ComponentEUS.cpp" + "${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp" + "${LIBRARY_DIR}/src/parser/ComponentSequence.cpp" + "${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp" + "${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp" + "${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp" + "${LIBRARY_DIR}/src/parser/control_verbs.cpp" + "${LIBRARY_DIR}/src/parser/logical_combination.cpp" + "${LIBRARY_DIR}/src/parser/parse_error.cpp" + "${LIBRARY_DIR}/src/parser/parser_util.cpp" + "${LIBRARY_DIR}/src/parser/Parser.cpp" + "${LIBRARY_DIR}/src/parser/prefilter.cpp" + "${LIBRARY_DIR}/src/parser/shortcut_literal.cpp" + "${LIBRARY_DIR}/src/parser/ucp_table.cpp" + "${LIBRARY_DIR}/src/parser/unsupported.cpp" + "${LIBRARY_DIR}/src/parser/utf8_validate.cpp" + "${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp" + "${LIBRARY_DIR}/src/rose/block.c" + "${LIBRARY_DIR}/src/rose/catchup.c" + "${LIBRARY_DIR}/src/rose/init.c" + "${LIBRARY_DIR}/src/rose/match.c" + "${LIBRARY_DIR}/src/rose/program_runtime.c" + "${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_add.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_castle.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_compile.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_convert.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_dedupe.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_engine_blob.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_exclusive.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_groups.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_infix.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_instructions.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_lit_accel.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_long_lit.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_lookaround.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_merge.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_misc.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_program.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_width.cpp" + "${LIBRARY_DIR}/src/rose/rose_in_util.cpp" + "${LIBRARY_DIR}/src/rose/stream.c" + "${LIBRARY_DIR}/src/runtime.c" + "${LIBRARY_DIR}/src/scratch.c" + "${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp" + "${LIBRARY_DIR}/src/som/slot_manager.cpp" + "${LIBRARY_DIR}/src/som/som_runtime.c" + "${LIBRARY_DIR}/src/som/som_stream.c" + "${LIBRARY_DIR}/src/stream_compress.c" + "${LIBRARY_DIR}/src/util/alloc.cpp" + "${LIBRARY_DIR}/src/util/charreach.cpp" + "${LIBRARY_DIR}/src/util/clique.cpp" + "${LIBRARY_DIR}/src/util/compile_context.cpp" + "${LIBRARY_DIR}/src/util/compile_error.cpp" + "${LIBRARY_DIR}/src/util/cpuid_flags.c" + "${LIBRARY_DIR}/src/util/depth.cpp" + "${LIBRARY_DIR}/src/util/fatbit_build.cpp" + "${LIBRARY_DIR}/src/util/multibit_build.cpp" + "${LIBRARY_DIR}/src/util/multibit.c" + "${LIBRARY_DIR}/src/util/report_manager.cpp" + "${LIBRARY_DIR}/src/util/simd_utils.c" + "${LIBRARY_DIR}/src/util/state_compress.c" + "${LIBRARY_DIR}/src/util/target_info.cpp" + "${LIBRARY_DIR}/src/util/ue2string.cpp" +) - target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1) - target_compile_options (hyperscan - PRIVATE -g0 # Library has too much debug information - -mno-avx -mno-avx2 # The library is using dynamic dispatch and is confused if AVX is enabled globally - -march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system - -fno-sanitize=undefined # Assume the library takes care of itself - ) - target_include_directories (hyperscan - PRIVATE - common - "${LIBRARY_DIR}/include" - ) - target_include_directories (hyperscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") - if (ARCH_AMD64) - target_include_directories (hyperscan PRIVATE x86_64) - endif () - target_link_libraries (hyperscan PRIVATE boost::headers_only) +add_library (_hyperscan ${SRCS}) - set (USE_INTERNAL_HYPERSCAN_LIBRARY 1) +target_compile_options (_hyperscan + PRIVATE -g0 # Library has too much debug information + -mno-avx -mno-avx2 # The library is using dynamic dispatch and is confused if AVX is enabled globally + -march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system + -fno-sanitize=undefined # Assume the library takes care of itself +) +target_include_directories (_hyperscan + PRIVATE + common + "${LIBRARY_DIR}/include" +) +target_include_directories (_hyperscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") +if (ARCH_AMD64) + target_include_directories (_hyperscan PRIVATE x86_64) endif () +target_link_libraries (_hyperscan PRIVATE boost::headers_only) -message (STATUS "Using hyperscan") +add_library (ch_contrib::hyperscan ALIAS _hyperscan) diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index 26f3bb11006b..ae19ef20e38f 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -1,3 +1,14 @@ +if (OS_LINUX) + option(ENABLE_ICU "Enable ICU" ${ENABLE_LIBRARIES}) +else () + option(ENABLE_ICU "Enable ICU" 0) +endif () + +if (NOT ENABLE_ICU) + message(STATUS "Not using icu") + return() +endif() + set(ICU_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/source") set(ICUDATA_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icudata/") @@ -447,19 +458,23 @@ set(ICUDATA_SOURCES # Note that we don't like any kind of binary plugins (because of runtime dependencies, vulnerabilities, ABI incompatibilities). add_definitions(-D_REENTRANT -DU_HAVE_ELF_H=1 -DU_HAVE_STRTOD_L=1 -DU_HAVE_XLOCALE_H=0 -DDEFAULT_ICU_PLUGINS="/dev/null") -add_library(icuuc ${ICUUC_SOURCES}) -add_library(icui18n ${ICUI18N_SOURCES}) -add_library(icudata ${ICUDATA_SOURCES}) +add_library(_icuuc ${ICUUC_SOURCES}) +add_library(_icui18n ${ICUI18N_SOURCES}) +add_library(_icudata ${ICUDATA_SOURCES}) -target_link_libraries(icuuc PRIVATE icudata) -target_link_libraries(icui18n PRIVATE icuuc) +target_link_libraries(_icuuc PRIVATE _icudata) +target_link_libraries(_icui18n PRIVATE _icuuc) -target_include_directories(icuuc SYSTEM PUBLIC "${ICU_SOURCE_DIR}/common/") -target_include_directories(icui18n SYSTEM PUBLIC "${ICU_SOURCE_DIR}/i18n/") +target_include_directories(_icuuc SYSTEM PUBLIC "${ICU_SOURCE_DIR}/common/") +target_include_directories(_icui18n SYSTEM PUBLIC "${ICU_SOURCE_DIR}/i18n/") -target_compile_definitions(icuuc PRIVATE -DU_COMMON_IMPLEMENTATION) -target_compile_definitions(icui18n PRIVATE -DU_I18N_IMPLEMENTATION) +target_compile_definitions(_icuuc PRIVATE -DU_COMMON_IMPLEMENTATION) +target_compile_definitions(_icui18n PRIVATE -DU_I18N_IMPLEMENTATION) if (COMPILER_CLANG) - target_compile_options(icudata PRIVATE -Wno-unused-command-line-argument) + target_compile_options(_icudata PRIVATE -Wno-unused-command-line-argument) endif () + +add_library(_icu INTERFACE) +target_link_libraries(_icu INTERFACE _icui18n _icuuc _icudata) +add_library(ch_contrib::icu ALIAS _icu) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index fb11879fb216..b3845c7d56b5 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -12,9 +12,6 @@ else () endif () if (NOT ENABLE_JEMALLOC) - add_library(jemalloc INTERFACE) - target_compile_definitions(jemalloc INTERFACE USE_JEMALLOC=0) - message (STATUS "Not using jemalloc") return() endif () @@ -90,9 +87,9 @@ if (OS_DARWIN) list(APPEND SRCS "${LIBRARY_DIR}/src/zone.c") endif () -add_library(jemalloc ${SRCS}) -target_include_directories(jemalloc PRIVATE "${LIBRARY_DIR}/include") -target_include_directories(jemalloc SYSTEM PUBLIC include) +add_library(_jemalloc ${SRCS}) +target_include_directories(_jemalloc PRIVATE "${LIBRARY_DIR}/include") +target_include_directories(_jemalloc SYSTEM PUBLIC include) set (JEMALLOC_INCLUDE_PREFIX) # OS_ @@ -120,26 +117,24 @@ endif () configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h) -target_include_directories(jemalloc SYSTEM PRIVATE +target_include_directories(_jemalloc SYSTEM PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal") -target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) +target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_DEBUG=1) + target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_DEBUG=1) endif () -target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_PROF=1) +target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) if (USE_UNWIND) - target_compile_definitions (jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1) - target_link_libraries (jemalloc PRIVATE unwind) + target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1) + target_link_libraries (_jemalloc PRIVATE unwind) endif () -target_compile_options(jemalloc PRIVATE -Wno-redundant-decls) +target_compile_options(_jemalloc PRIVATE -Wno-redundant-decls) # for RTLD_NEXT -target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE) - -set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1) +target_compile_options(_jemalloc PRIVATE -D_GNU_SOURCE) -message (STATUS "Using jemalloc") +add_library(ch_contrib::jemalloc ALIAS _jemalloc) diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index f7318a5bf8a0..685e8737ef0b 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -1,3 +1,15 @@ +set (ENABLE_KRB5_DEFAULT 1) +if (NOT CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND NOT CMAKE_CROSSCOMPILING)) + message (WARNING "krb5 disabled in non-Linux and non-native-Darwin environments") + set (ENABLE_KRB5_DEFAULT 0) +endif () +OPTION(ENABLE_KRB5 "Enable krb5" ${ENABLE_KRB5_DEFAULT}) + +if (NOT ENABLE_KRB5) + message(STATUS "Not using krb5") + return() +endif () + find_program(AWK_PROGRAM awk) if(NOT AWK_PROGRAM) message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.") @@ -546,10 +558,10 @@ add_custom_target( VERBATIM ) -add_library(${KRB5_LIBRARY}) +add_library(_krb5) add_dependencies( - ${KRB5_LIBRARY} + _krb5 ERRMAP_H ERROR_MAP_H KRB_5_H @@ -567,7 +579,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c") endif() -target_sources(${KRB5_LIBRARY} PRIVATE +target_sources(_krb5 PRIVATE ${ALL_SRCS} ) @@ -639,12 +651,12 @@ add_custom_command( -target_include_directories(${KRB5_LIBRARY} PUBLIC +target_include_directories(_krb5 SYSTEM BEFORE PUBLIC "${KRB5_SOURCE_DIR}/include" "${CMAKE_CURRENT_BINARY_DIR}/include" ) -target_include_directories(${KRB5_LIBRARY} PRIVATE +target_include_directories(_krb5 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/include_private" # For autoconf.h and other generated headers. ${KRB5_SOURCE_DIR} "${KRB5_SOURCE_DIR}/include" @@ -664,10 +676,9 @@ target_include_directories(${KRB5_LIBRARY} PRIVATE "${KRB5_SOURCE_DIR}/lib/krb5/rcache" "${KRB5_SOURCE_DIR}/lib/krb5/unicode" "${KRB5_SOURCE_DIR}/lib/krb5/os" - # ${OPENSSL_INCLUDE_DIR} ) -target_compile_definitions(${KRB5_LIBRARY} PRIVATE +target_compile_definitions(_krb5 PRIVATE KRB5_PRIVATE _GSS_STATIC_LINK=1 KRB5_DEPRECATED=1 @@ -677,6 +688,6 @@ target_compile_definitions(${KRB5_LIBRARY} PRIVATE LIBDIR="/usr/local/lib" ) -target_link_libraries(${KRB5_LIBRARY} - PRIVATE ${OPENSSL_CRYPTO_LIBRARY} -) +target_link_libraries(_krb5 PRIVATE OpenSSL::Crypto OpenSSL::SSL) + +add_library(ch_contrib::krb5 ALIAS _krb5) diff --git a/contrib/lemmagen-c-cmake/CMakeLists.txt b/contrib/lemmagen-c-cmake/CMakeLists.txt index 3a067916bf66..67e1e5791caf 100644 --- a/contrib/lemmagen-c-cmake/CMakeLists.txt +++ b/contrib/lemmagen-c-cmake/CMakeLists.txt @@ -5,5 +5,6 @@ set(SRCS "${LIBRARY_DIR}/src/RdrLemmatizer.cpp" ) -add_library(lemmagen STATIC ${SRCS}) -target_include_directories(lemmagen SYSTEM PUBLIC "${LEMMAGEN_INCLUDE_DIR}") +add_library(_lemmagen STATIC ${SRCS}) +target_include_directories(_lemmagen SYSTEM PUBLIC "${LEMMAGEN_INCLUDE_DIR}") +add_library(ch_contrib::lemmagen ALIAS _lemmagen) diff --git a/contrib/libcpuid-cmake/CMakeLists.txt b/contrib/libcpuid-cmake/CMakeLists.txt index 9baebb3ba1be..1940b39b6aa6 100644 --- a/contrib/libcpuid-cmake/CMakeLists.txt +++ b/contrib/libcpuid-cmake/CMakeLists.txt @@ -6,10 +6,7 @@ elseif(ENABLE_CPUID) endif() if (NOT ENABLE_CPUID) - add_library (cpuid INTERFACE) - - target_compile_definitions (cpuid INTERFACE USE_CPUID=0) - + message("Not using cpuid") return() endif() @@ -26,13 +23,12 @@ set (SRCS "${LIBRARY_DIR}/libcpuid/recog_intel.c" ) -add_library (cpuid ${SRCS}) +add_library (_cpuid ${SRCS}) -target_include_directories (cpuid SYSTEM PUBLIC "${LIBRARY_DIR}") -target_compile_definitions (cpuid PUBLIC USE_CPUID=1) -target_compile_definitions (cpuid PRIVATE VERSION="v0.4.1") +target_include_directories (_cpuid SYSTEM PUBLIC "${LIBRARY_DIR}") +target_compile_definitions (_cpuid PRIVATE VERSION="v0.4.1") if (COMPILER_CLANG) - target_compile_options (cpuid PRIVATE -Wno-reserved-id-macro) + target_compile_options (_cpuid PRIVATE -Wno-reserved-id-macro) endif () -message (STATUS "Using cpuid") +add_library(ch_contrib::cpuid ALIAS _cpuid) diff --git a/contrib/libdivide/CMakeLists.txt b/contrib/libdivide/CMakeLists.txt index 57e9f254db51..45cbc0a584b5 100644 --- a/contrib/libdivide/CMakeLists.txt +++ b/contrib/libdivide/CMakeLists.txt @@ -1,2 +1,3 @@ -add_library (libdivide INTERFACE) -target_include_directories (libdivide SYSTEM BEFORE INTERFACE .) +add_library (_libdivide INTERFACE) +target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .) +add_library (ch_contrib::libdivide ALIAS _libdivide) diff --git a/contrib/libfarmhash/CMakeLists.txt b/contrib/libfarmhash/CMakeLists.txt index 20bba58cde7c..a0533a93f173 100644 --- a/contrib/libfarmhash/CMakeLists.txt +++ b/contrib/libfarmhash/CMakeLists.txt @@ -1,9 +1,11 @@ -add_library(farmhash - farmhash.cc - farmhash.h) +add_library(_farmhash + farmhash.cc + farmhash.h) if (MSVC) - target_compile_definitions (farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1) + target_compile_definitions (_farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1) endif () -target_include_directories (farmhash PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories (_farmhash BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +add_library(ch_contrib::farmhash ALIAS _farmhash) diff --git a/contrib/libgsasl-cmake/CMakeLists.txt b/contrib/libgsasl-cmake/CMakeLists.txt index 102ef12b9f5a..4bb4ca9dc33b 100644 --- a/contrib/libgsasl-cmake/CMakeLists.txt +++ b/contrib/libgsasl-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option(ENABLE_GSASL_LIBRARY "Enable gsasl library" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_GSASL_LIBRARY) + message(STATUS "Not using gsasl library") + return() +endif() + set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/libgsasl") set(SRCS @@ -84,24 +91,26 @@ set(SRCS ${SRC_DIR}/login/server.c ) -if (USE_KRB5) +if (TARGET ch_contrib::krb5) set(SRCS ${SRCS} ${SRC_DIR}/gssapi/client.c ${SRC_DIR}/gssapi/mechinfo.c ${SRC_DIR}/gssapi/server.c) endif() -add_library(gsasl ${SRCS}) +add_library(_gsasl ${SRCS}) -target_include_directories(gsasl PUBLIC ${SRC_DIR}) -target_include_directories(gsasl PUBLIC ${SRC_DIR}/gl) -target_include_directories(gsasl PUBLIC ${SRC_DIR}/src) -target_include_directories(gsasl PUBLIC ${SRC_DIR}/digest-md5) -target_include_directories(gsasl PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libgsasl-cmake/linux_x86_64/include") +target_include_directories(_gsasl PUBLIC ${SRC_DIR}) +target_include_directories(_gsasl PUBLIC ${SRC_DIR}/gl) +target_include_directories(_gsasl PUBLIC ${SRC_DIR}/src) +target_include_directories(_gsasl PUBLIC ${SRC_DIR}/digest-md5) +target_include_directories(_gsasl PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/libgsasl-cmake/linux_x86_64/include") -target_compile_definitions (gsasl PRIVATE HAVE_CONFIG_H=1) +target_compile_definitions(_gsasl PRIVATE HAVE_CONFIG_H=1) -if (USE_KRB5) - target_link_libraries(gsasl PUBLIC ${KRB5_LIBRARY}) - target_compile_definitions (gsasl PRIVATE HAVE_GSSAPI_H=1 USE_GSSAPI=1) +if (TARGET ch_contrib::krb5) + target_link_libraries(_gsasl PUBLIC ch_contrib::krb5) + target_compile_definitions(_gsasl PRIVATE HAVE_GSSAPI_H=1 USE_GSSAPI=1) endif() + +add_library(ch_contrib::gsasl ALIAS _gsasl) diff --git a/contrib/libhdfs3-cmake/CMake/FindGSasl.cmake b/contrib/libhdfs3-cmake/CMake/FindGSasl.cmake deleted file mode 100644 index 19ca7c30d1e3..000000000000 --- a/contrib/libhdfs3-cmake/CMake/FindGSasl.cmake +++ /dev/null @@ -1,26 +0,0 @@ -# - Try to find the GNU sasl library (gsasl) -# -# Once done this will define -# -# GSASL_FOUND - System has gnutls -# GSASL_INCLUDE_DIR - The gnutls include directory -# GSASL_LIBRARIES - The libraries needed to use gnutls -# GSASL_DEFINITIONS - Compiler switches required for using gnutls - - -IF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES) - # in cache already - SET(GSasl_FIND_QUIETLY TRUE) -ENDIF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES) - -FIND_PATH(GSASL_INCLUDE_DIR gsasl.h) - -FIND_LIBRARY(GSASL_LIBRARIES gsasl) - -INCLUDE(FindPackageHandleStandardArgs) - -# handle the QUIETLY and REQUIRED arguments and set GSASL_FOUND to TRUE if -# all listed variables are TRUE -FIND_PACKAGE_HANDLE_STANDARD_ARGS(GSASL DEFAULT_MSG GSASL_LIBRARIES GSASL_INCLUDE_DIR) - -MARK_AS_ADVANCED(GSASL_INCLUDE_DIR GSASL_LIBRARIES) \ No newline at end of file diff --git a/contrib/libhdfs3-cmake/CMake/FindGoogleTest.cmake b/contrib/libhdfs3-cmake/CMake/FindGoogleTest.cmake deleted file mode 100644 index fd57c1e2abde..000000000000 --- a/contrib/libhdfs3-cmake/CMake/FindGoogleTest.cmake +++ /dev/null @@ -1,65 +0,0 @@ -include(CheckCXXSourceRuns) - -find_path(GTest_INCLUDE_DIR gtest/gtest.h - NO_DEFAULT_PATH - PATHS - "${PROJECT_SOURCE_DIR}/../thirdparty/googletest/googletest/include" - "/usr/local/include" - "/usr/include") - -find_path(GMock_INCLUDE_DIR gmock/gmock.h - NO_DEFAULT_PATH - PATHS - "${PROJECT_SOURCE_DIR}/../thirdparty/googletest/googlemock/include" - "/usr/local/include" - "/usr/include") - -find_library(Gtest_LIBRARY - NAMES libgtest.a - HINTS - "${PROJECT_SOURCE_DIR}/../thirdparty/googletest/build/googlemock/gtest" - "/usr/local/lib" - "/usr/lib") - -find_library(Gmock_LIBRARY - NAMES libgmock.a - HINTS - "${PROJECT_SOURCE_DIR}/../thirdparty/googletest/build/googlemock" - "/usr/local/lib" - "/usr/lib") - -message(STATUS "Find GoogleTest include path: ${GTest_INCLUDE_DIR}") -message(STATUS "Find GoogleMock include path: ${GMock_INCLUDE_DIR}") -message(STATUS "Find Gtest library path: ${Gtest_LIBRARY}") -message(STATUS "Find Gmock library path: ${Gmock_LIBRARY}") - -set(CMAKE_REQUIRED_INCLUDES ${GTest_INCLUDE_DIR} ${GMock_INCLUDE_DIR}) -set(CMAKE_REQUIRED_LIBRARIES ${Gtest_LIBRARY} ${Gmock_LIBRARY} -lpthread) -set(CMAKE_REQUIRED_FLAGS) -check_cxx_source_runs(" -#include -#include -int main(int argc, char *argv[]) -{ - double pi = 3.14; - EXPECT_EQ(pi, 3.14); - return 0; -} -" GoogleTest_CHECK_FINE) -message(STATUS "GoogleTest check: ${GoogleTest_CHECK_FINE}") - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args( - GoogleTest - REQUIRED_VARS - GTest_INCLUDE_DIR - GMock_INCLUDE_DIR - Gtest_LIBRARY - Gmock_LIBRARY - GoogleTest_CHECK_FINE) - -set(GoogleTest_INCLUDE_DIR ${GTest_INCLUDE_DIR} ${GMock_INCLUDE_DIR}) -set(GoogleTest_LIBRARIES ${Gtest_LIBRARY} ${Gmock_LIBRARY}) -mark_as_advanced( - GoogleTest_INCLUDE_DIR - GoogleTest_LIBRARIES) diff --git a/contrib/libhdfs3-cmake/CMake/FindKERBEROS.cmake b/contrib/libhdfs3-cmake/CMake/FindKERBEROS.cmake deleted file mode 100644 index 5fc58235a3fa..000000000000 --- a/contrib/libhdfs3-cmake/CMake/FindKERBEROS.cmake +++ /dev/null @@ -1,23 +0,0 @@ -# - Find kerberos -# Find the native KERBEROS includes and library -# -# KERBEROS_INCLUDE_DIRS - where to find krb5.h, etc. -# KERBEROS_LIBRARIES - List of libraries when using krb5. -# KERBEROS_FOUND - True if krb5 found. - -IF (KERBEROS_INCLUDE_DIRS) - # Already in cache, be silent - SET(KERBEROS_FIND_QUIETLY TRUE) -ENDIF (KERBEROS_INCLUDE_DIRS) - -FIND_PATH(KERBEROS_INCLUDE_DIRS krb5.h) - -SET(KERBEROS_NAMES krb5 k5crypto com_err) -FIND_LIBRARY(KERBEROS_LIBRARIES NAMES ${KERBEROS_NAMES}) - -# handle the QUIETLY and REQUIRED arguments and set KERBEROS_FOUND to TRUE if -# all listed variables are TRUE -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(KERBEROS DEFAULT_MSG KERBEROS_LIBRARIES KERBEROS_INCLUDE_DIRS) - -MARK_AS_ADVANCED(KERBEROS_LIBRARIES KERBEROS_INCLUDE_DIRS) diff --git a/contrib/libhdfs3-cmake/CMake/FindSSL.cmake b/contrib/libhdfs3-cmake/CMake/FindSSL.cmake deleted file mode 100644 index bcbc5d89653c..000000000000 --- a/contrib/libhdfs3-cmake/CMake/FindSSL.cmake +++ /dev/null @@ -1,26 +0,0 @@ -# - Try to find the Open ssl library (ssl) -# -# Once done this will define -# -# SSL_FOUND - System has gnutls -# SSL_INCLUDE_DIR - The gnutls include directory -# SSL_LIBRARIES - The libraries needed to use gnutls -# SSL_DEFINITIONS - Compiler switches required for using gnutls - - -IF (SSL_INCLUDE_DIR AND SSL_LIBRARIES) - # in cache already - SET(SSL_FIND_QUIETLY TRUE) -ENDIF (SSL_INCLUDE_DIR AND SSL_LIBRARIES) - -FIND_PATH(SSL_INCLUDE_DIR openssl/opensslv.h) - -FIND_LIBRARY(SSL_LIBRARIES crypto) - -INCLUDE(FindPackageHandleStandardArgs) - -# handle the QUIETLY and REQUIRED arguments and set SSL_FOUND to TRUE if -# all listed variables are TRUE -FIND_PACKAGE_HANDLE_STANDARD_ARGS(SSL DEFAULT_MSG SSL_LIBRARIES SSL_INCLUDE_DIR) - -MARK_AS_ADVANCED(SSL_INCLUDE_DIR SSL_LIBRARIES) \ No newline at end of file diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index fcc4a15666c5..b2f785fa06f9 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -1,4 +1,16 @@ -if (${ENABLE_KRB5}) +if(NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND NOT ARCH_PPC64LE) + option(ENABLE_HDFS "Enable HDFS" ${ENABLE_LIBRARIES}) +elseif(ENABLE_HDFS) + message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use HDFS3 with current configuration") +endif() + +if(NOT ENABLE_HDFS) + message(STATUS "Not using hdfs") + return() +endif() + +if (TARGET ch_contrib::krb5) + message(STATUS "Enable kerberos for HDFS") SET(WITH_KERBEROS 1) else() SET(WITH_KERBEROS 0) @@ -27,7 +39,7 @@ set(PROTO_FILES "${HDFS3_SOURCE_DIR}/proto/datatransfer.proto" ) -PROTOBUF_GENERATE_CPP(PROTO_SOURCES PROTO_HEADERS ${PROTO_FILES}) +PROTOBUF_GENERATE_CPP(PROTO_SOURCES PROTO_HEADERS ${PROTO_FILES} APPEND_PATH) configure_file("${HDFS3_SOURCE_DIR}/platform.h.in" "${CMAKE_CURRENT_BINARY_DIR}/platform.h") @@ -94,30 +106,26 @@ set(SRCS set_source_files_properties("${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp" PROPERTIES COMPILE_FLAGS "-DBOOST_UUID_RANDOM_PROVIDER_FORCE_POSIX=1") # target -add_library(hdfs3 ${SRCS}) +add_library(_hdfs3 ${SRCS}) -if(USE_INTERNAL_PROTOBUF_LIBRARY) - add_dependencies(hdfs3 protoc) -endif() +add_dependencies(_hdfs3 protoc) -target_include_directories(hdfs3 PRIVATE ${HDFS3_SOURCE_DIR}) -target_include_directories(hdfs3 PRIVATE ${HDFS3_COMMON_DIR}) -target_include_directories(hdfs3 PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(_hdfs3 PRIVATE ${HDFS3_SOURCE_DIR}) +target_include_directories(_hdfs3 PRIVATE ${HDFS3_COMMON_DIR}) +target_include_directories(_hdfs3 PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -target_include_directories(hdfs3 PRIVATE ${LIBGSASL_INCLUDE_DIR}) -target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR}) +target_include_directories(_hdfs3 SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include") -target_link_libraries(hdfs3 PRIVATE ${LIBGSASL_LIBRARY}) +target_link_libraries(_hdfs3 PRIVATE ch_contrib::gsasl) if (WITH_KERBEROS) - target_link_libraries(hdfs3 PRIVATE ${KRB5_LIBRARY}) + target_link_libraries(_hdfs3 PRIVATE ch_contrib::krb5) endif() -target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARIES}) +target_link_libraries(_hdfs3 PRIVATE ch_contrib::libxml2) # inherit from parent cmake -target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR}) -target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY} boost::headers_only) - -if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES) - target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR}) - target_link_libraries(hdfs3 PRIVATE ${OPENSSL_LIBRARIES}) +target_link_libraries(_hdfs3 PRIVATE ch_contrib::protobuf boost::headers_only) +if (TARGET OpenSSL::SSL) + target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() + +add_library(ch_contrib::hdfs ALIAS _hdfs3) diff --git a/contrib/libmetrohash/CMakeLists.txt b/contrib/libmetrohash/CMakeLists.txt index 4ec5a58717d4..9f7984acf8b0 100644 --- a/contrib/libmetrohash/CMakeLists.txt +++ b/contrib/libmetrohash/CMakeLists.txt @@ -2,5 +2,6 @@ set (SRCS src/metrohash64.cpp src/metrohash128.cpp ) -add_library(metrohash ${SRCS}) -target_include_directories(metrohash PUBLIC src) +add_library(_metrohash ${SRCS}) +target_include_directories(_metrohash PUBLIC src) +add_library(ch_contrib::metrohash ALIAS _metrohash) diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 2d2e0c428feb..280c0381393a 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -1,3 +1,7 @@ +if (NOT ENABLE_LIBPQXX) + return() +endif() + set(LIBPQ_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpq") set(SRCS @@ -53,10 +57,12 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/port/explicit_bzero.c" ) -add_library(libpq ${SRCS}) +add_library(_libpq ${SRCS}) + +target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR}) +target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include") +target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs") -target_include_directories (libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR}) -target_include_directories (libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include") -target_include_directories (libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs") +target_link_libraries (_libpq PRIVATE OpenSSL::SSL) -target_link_libraries (libpq PRIVATE ssl) +add_library(ch_contrib::libpq ALIAS _libpq) diff --git a/contrib/libpqxx-cmake/CMakeLists.txt b/contrib/libpqxx-cmake/CMakeLists.txt index 2804a875436c..a3317404f958 100644 --- a/contrib/libpqxx-cmake/CMakeLists.txt +++ b/contrib/libpqxx-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option(ENABLE_LIBPQXX "Enalbe libpqxx" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_LIBPQXX) + message(STATUS "Not using libpqxx") + return() +endif() + set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpqxx") set (SRCS @@ -63,8 +70,9 @@ set (HDRS "${LIBRARY_DIR}/include/pqxx/zview.hxx" ) -add_library(libpqxx ${SRCS} ${HDRS}) +add_library(_libpqxx ${SRCS} ${HDRS}) -target_link_libraries(libpqxx PUBLIC ${LIBPQ_LIBRARY}) -target_include_directories (libpqxx SYSTEM PRIVATE "${LIBRARY_DIR}/include") +target_link_libraries(_libpqxx PUBLIC ch_contrib::libpq) +target_include_directories (_libpqxx SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include") +add_library(ch_contrib::libpqxx ALIAS _libpqxx) diff --git a/contrib/libprotobuf-mutator-cmake/CMakeLists.txt b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt index 978b1e732bad..a623f95c418c 100644 --- a/contrib/libprotobuf-mutator-cmake/CMakeLists.txt +++ b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt @@ -1,6 +1,12 @@ +option(USE_LIBPROTOBUF_MUTATOR "Enable libprotobuf-mutator" ${ENABLE_FUZZING}) + +if (NOT USE_LIBPROTOBUF_MUTATOR) + return() +endif() + set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator) -add_library(protobuf-mutator +add_library(_protobuf-mutator ${LIBRARY_DIR}/src/libfuzzer/libfuzzer_macro.cc ${LIBRARY_DIR}/src/libfuzzer/libfuzzer_mutator.cc ${LIBRARY_DIR}/src/binary_format.cc @@ -8,7 +14,9 @@ add_library(protobuf-mutator ${LIBRARY_DIR}/src/text_format.cc ${LIBRARY_DIR}/src/utf8_fix.cc) -target_include_directories(protobuf-mutator BEFORE PRIVATE "${LIBRARY_DIR}") -target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") +target_include_directories(_protobuf-mutator BEFORE INTERFACE "${LIBRARY_DIR}") +target_include_directories(_protobuf-mutator BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") + +target_link_libraries(_protobuf-mutator ch_contrib::protobuf) -target_link_libraries(protobuf-mutator ${Protobuf_LIBRARY}) +add_library(ch_contrib::protobuf_mutator ALIAS _protobuf-mutator) diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 97b6a7e1ec53..d84abd06dec0 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option (ENABLE_KAFKA "Enable kafka" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_KAFKA) + message(STATUS "Not using librdkafka") + return() +endif() + set(RDKAFKA_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src") set(SRCS @@ -80,24 +87,18 @@ set(SRCS "${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c" ) -if(${ENABLE_CYRUS_SASL}) +if(TARGET ch_contrib::sasl2) message (STATUS "librdkafka with SASL support") set(WITH_SASL_CYRUS 1) endif() -if(OPENSSL_FOUND) - message (STATUS "librdkafka with SSL support") - set(WITH_SSL 1) - - if(${ENABLE_CYRUS_SASL}) - set(WITH_SASL_SCRAM 1) - set(WITH_SASL_OAUTHBEARER 1) - endif() -endif() - -if(WITH_SSL) - list(APPEND SRCS "${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c") +message (STATUS "librdkafka with SSL support") +set(WITH_SSL 1) +if(WITH_SASL_CYRUS) + set(WITH_SASL_SCRAM 1) + set(WITH_SASL_OAUTHBEARER 1) endif() +list(APPEND SRCS "${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c") if(WITH_SASL_CYRUS) list(APPEND SRCS "${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c") # needed to support Kerberos, requires cyrus-sasl @@ -111,19 +112,23 @@ if(WITH_SASL_OAUTHBEARER) list(APPEND SRCS "${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_oauthbearer.c") endif() -add_library(rdkafka ${SRCS}) -target_compile_options(rdkafka PRIVATE -fno-sanitize=undefined) -# target_include_directories(rdkafka SYSTEM PUBLIC include) -target_include_directories(rdkafka SYSTEM PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") # for "librdkafka/rdkafka.h" -target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used. -target_include_directories(rdkafka SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/auxdir") # for "../config.h" -target_include_directories(rdkafka SYSTEM PRIVATE "${ZSTD_INCLUDE_DIR}/common") # Because wrong path to "zstd_errors.h" is used. -target_link_libraries(rdkafka PRIVATE lz4 ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY}) -if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY) - target_link_libraries(rdkafka PRIVATE ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) -endif() -if(${ENABLE_CYRUS_SASL}) - target_link_libraries(rdkafka PRIVATE ${CYRUS_SASL_LIBRARY}) +add_library(_rdkafka ${SRCS}) +add_library(ch_contrib::rdkafka ALIAS _rdkafka) + +target_compile_options(_rdkafka PRIVATE -fno-sanitize=undefined) +# target_include_directories(_rdkafka SYSTEM PUBLIC include) +target_include_directories(_rdkafka SYSTEM PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") # for "librdkafka/rdkafka.h" +target_include_directories(_rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used. +target_include_directories(_rdkafka SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/auxdir") # for "../config.h" +target_link_libraries(_rdkafka + PRIVATE + ch_contrib::lz4 + ch_contrib::zlib + ch_contrib::zstd + OpenSSL::Crypto OpenSSL::SSL +) +if(WITH_SASL_CYRUS) + target_link_libraries(_rdkafka PRIVATE ch_contrib::sasl2) endif() file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auxdir") diff --git a/contrib/libstemmer-c-cmake/CMakeLists.txt b/contrib/libstemmer-c-cmake/CMakeLists.txt index b5cd59e46334..6dc65c56c6c8 100644 --- a/contrib/libstemmer-c-cmake/CMakeLists.txt +++ b/contrib/libstemmer-c-cmake/CMakeLists.txt @@ -27,5 +27,6 @@ FOREACH ( LINE ${_CONTENT} ) endforeach () # all the sources parsed. Now just add the lib -add_library ( stemmer STATIC ${_SOURCES} ${_HEADERS} ) -target_include_directories (stemmer SYSTEM PUBLIC "${STEMMER_INCLUDE_DIR}") +add_library(_stemmer STATIC ${_SOURCES} ${_HEADERS} ) +target_include_directories(_stemmer SYSTEM PUBLIC "${STEMMER_INCLUDE_DIR}") +add_library(ch_contrib::stemmer ALIAS _stemmer) diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index dc47b0bf496c..45f6d8e20832 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -1,3 +1,8 @@ +if (OS_DARWIN AND COMPILER_GCC) + message (WARNING "libuv cannot be built with GCC in macOS due to a bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082") + return() +endif() + # This file is a modified version of contrib/libuv/CMakeLists.txt set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv") @@ -122,19 +127,17 @@ set(uv_sources "${uv_sources_tmp}") list(APPEND uv_defines CLICKHOUSE_GLIBC_COMPATIBILITY) -add_library(uv ${uv_sources}) -target_compile_definitions(uv - INTERFACE USING_UV_SHARED=1 - PRIVATE ${uv_defines} BUILDING_UV_SHARED=1) -target_compile_options(uv PRIVATE ${uv_cflags}) -target_include_directories(uv PUBLIC ${SOURCE_DIR}/include PRIVATE ${SOURCE_DIR}/src) -target_link_libraries(uv ${uv_libraries}) +add_library(_uv ${uv_sources}) +add_library(ch_contrib::uv ALIAS _uv) -add_library(uv_a STATIC ${uv_sources}) -target_compile_definitions(uv_a PRIVATE ${uv_defines}) -target_compile_options(uv_a PRIVATE ${uv_cflags}) -target_include_directories(uv_a PUBLIC ${SOURCE_DIR}/include PRIVATE ${SOURCE_DIR}/src) -target_link_libraries(uv_a ${uv_libraries}) +target_compile_definitions(_uv PRIVATE ${uv_defines}) +target_include_directories(_uv SYSTEM PUBLIC ${SOURCE_DIR}/include PRIVATE ${SOURCE_DIR}/src) +target_link_libraries(_uv ${uv_libraries}) +if (NOT USE_STATIC_LIBRARIES) + target_compile_definitions(_uv + INTERFACE USING_UV_SHARED=1 + PRIVATE BUILDING_UV_SHARED=1) +endif() if(UNIX) # Now for some gibbering horrors from beyond the stars... @@ -145,7 +148,6 @@ if(UNIX) string(REGEX MATCH [0-9]+[.][0-9]+[.][0-9]+ PACKAGE_VERSION "${configure_ac}") string(REGEX MATCH ^[0-9]+ UV_VERSION_MAJOR "${PACKAGE_VERSION}") # The version in the filename is mirroring the behaviour of autotools. - set_target_properties(uv PROPERTIES VERSION ${UV_VERSION_MAJOR}.0.0 - SOVERSION ${UV_VERSION_MAJOR}) + set_target_properties(_uv PROPERTIES VERSION ${UV_VERSION_MAJOR}.0.0 + SOVERSION ${UV_VERSION_MAJOR}) endif() - diff --git a/contrib/libxml2-cmake/CMakeLists.txt b/contrib/libxml2-cmake/CMakeLists.txt index 8fda0399ea32..e9c4641c1616 100644 --- a/contrib/libxml2-cmake/CMakeLists.txt +++ b/contrib/libxml2-cmake/CMakeLists.txt @@ -50,13 +50,14 @@ set(SRCS "${LIBXML2_SOURCE_DIR}/schematron.c" "${LIBXML2_SOURCE_DIR}/xzlib.c" ) -add_library(libxml2 ${SRCS}) +add_library(_libxml2 ${SRCS}) -target_link_libraries(libxml2 PRIVATE ${ZLIB_LIBRARIES}) +target_link_libraries(_libxml2 PRIVATE ch_contrib::zlib) if(M_LIBRARY) - target_link_libraries(libxml2 PRIVATE ${M_LIBRARY}) + target_link_libraries(_libxml2 PRIVATE ${M_LIBRARY}) endif() -target_include_directories(libxml2 PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/include") -target_include_directories(libxml2 PUBLIC "${LIBXML2_SOURCE_DIR}/include") -target_include_directories(libxml2 SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) +target_include_directories(_libxml2 BEFORE PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/include") +target_include_directories(_libxml2 BEFORE PUBLIC "${LIBXML2_SOURCE_DIR}/include") + +add_library(ch_contrib::libxml2 ALIAS _libxml2) diff --git a/contrib/llvm-cmake/CMakeLists.txt b/contrib/llvm-cmake/CMakeLists.txt new file mode 100644 index 000000000000..d240924cac3e --- /dev/null +++ b/contrib/llvm-cmake/CMakeLists.txt @@ -0,0 +1,98 @@ +if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") + set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) +else() + set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) +endif() +option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) + +if (NOT ENABLE_EMBEDDED_COMPILER) + set (USE_EMBEDDED_COMPILER 0) + return() +endif() + +set (LLVM_FOUND 1) +set (LLVM_VERSION "12.0.0bundled") +set (LLVM_INCLUDE_DIRS + "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/include" + "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/include" +) +set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm") + +# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. +set (REQUIRED_LLVM_LIBRARIES + LLVMExecutionEngine + LLVMRuntimeDyld + LLVMX86CodeGen + LLVMX86Desc + LLVMX86Info + LLVMAsmPrinter + LLVMDebugInfoDWARF + LLVMGlobalISel + LLVMSelectionDAG + LLVMMCDisassembler + LLVMPasses + LLVMCodeGen + LLVMipo + LLVMBitWriter + LLVMInstrumentation + LLVMScalarOpts + LLVMAggressiveInstCombine + LLVMInstCombine + LLVMVectorize + LLVMTransformUtils + LLVMTarget + LLVMAnalysis + LLVMProfileData + LLVMObject + LLVMBitReader + LLVMCore + LLVMRemarks + LLVMBitstreamReader + LLVMMCParser + LLVMMC + LLVMBinaryFormat + LLVMDebugInfoCodeView + LLVMSupport + LLVMDemangle +) + +#function(llvm_libs_all REQUIRED_LLVM_LIBRARIES) +# llvm_map_components_to_libnames (result all) +# if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result) +# list (REMOVE_ITEM result "LTO" "LLVM") +# else() +# set (result "LLVM") +# endif () +# list (APPEND result ${CMAKE_DL_LIBS} ch_contrib::zlib) +# set (${REQUIRED_LLVM_LIBRARIES} ${result} PARENT_SCOPE) +#endfunction() + +message (STATUS "LLVM include Directory: ${LLVM_INCLUDE_DIRS}") +message (STATUS "LLVM library Directory: ${LLVM_LIBRARY_DIRS}") +message (STATUS "LLVM C++ compiler flags: ${LLVM_CXXFLAGS}") + +# ld: unknown option: --color-diagnostics +if (APPLE) + set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "") +endif () + +# Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind +set (CMAKE_INSTALL_RPATH "ON") +set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") +set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") +set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "") +set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "") +set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE STRING "") + +# Need to use C++17 since the compilation is not possible with C++20 currently, due to ambiguous operator != etc. +# LLVM project will set its default value for the -std=... but our global setting from CMake will override it. +set (CMAKE_CXX_STANDARD 17) + +set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm") +set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm") +add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") + +add_library (_llvm INTERFACE) +target_link_libraries (_llvm INTERFACE ${REQUIRED_LLVM_LIBRARIES}) +target_include_directories (_llvm SYSTEM BEFORE INTERFACE ${LLVM_INCLUDE_DIRS}) +add_library(ch_contrib::llvm ALIAS _llvm) diff --git a/contrib/lz4 b/contrib/lz4 index f39b79fb0296..4c9431e9af59 160000 --- a/contrib/lz4 +++ b/contrib/lz4 @@ -1 +1 @@ -Subproject commit f39b79fb02962a1cd880bbdecb6dffba4f754a11 +Subproject commit 4c9431e9af596af0556e5da0ae99305bafb2b10b diff --git a/contrib/lz4-cmake/CMakeLists.txt b/contrib/lz4-cmake/CMakeLists.txt index 2c412d6e36a1..94def0294102 100644 --- a/contrib/lz4-cmake/CMakeLists.txt +++ b/contrib/lz4-cmake/CMakeLists.txt @@ -1,37 +1,18 @@ -option (USE_INTERNAL_LZ4_LIBRARY "Use internal lz4 library" ON) +# lz4 is the main compression method, cannot be disabled. -if (NOT USE_INTERNAL_LZ4_LIBRARY) - find_library (LIBRARY_LZ4 lz4) - find_path (INCLUDE_LZ4 lz4.h) +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/lz4") +set (SRCS + "${LIBRARY_DIR}/lib/lz4.c" + "${LIBRARY_DIR}/lib/lz4hc.c" + "${LIBRARY_DIR}/lib/lz4frame.c" + "${LIBRARY_DIR}/lib/xxhash.c" +) - if (LIBRARY_LZ4 AND INCLUDE_LZ4) - set(EXTERNAL_LZ4_LIBRARY_FOUND 1) - add_library (lz4 INTERFACE) - set_property (TARGET lz4 PROPERTY INTERFACE_LINK_LIBRARIES ${LIBRARY_LZ4}) - set_property (TARGET lz4 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_LZ4}) - set_property (TARGET lz4 APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_XXHASH=0) - else() - set(EXTERNAL_LZ4_LIBRARY_FOUND 0) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system lz4") - endif() -endif() +add_library (_lz4 ${SRCS}) +add_library (ch_contrib::lz4 ALIAS _lz4) -if (NOT EXTERNAL_LZ4_LIBRARY_FOUND) - set (USE_INTERNAL_LZ4_LIBRARY 1) - set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/lz4") - - set (SRCS - "${LIBRARY_DIR}/lib/lz4.c" - "${LIBRARY_DIR}/lib/lz4hc.c" - "${LIBRARY_DIR}/lib/lz4frame.c" - "${LIBRARY_DIR}/lib/xxhash.c" - ) - - add_library (lz4 ${SRCS}) - - target_compile_definitions (lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1 USE_XXHASH=1) - if (SANITIZE STREQUAL "undefined") - target_compile_options (lz4 PRIVATE -fno-sanitize=undefined) - endif () - target_include_directories(lz4 PUBLIC "${LIBRARY_DIR}/lib") +target_compile_definitions (_lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1) +if (SANITIZE STREQUAL "undefined") + target_compile_options (_lz4 PRIVATE -fno-sanitize=undefined) endif () +target_include_directories(_lz4 PUBLIC "${LIBRARY_DIR}/lib") diff --git a/contrib/magic-enum-cmake/CMakeLists.txt b/contrib/magic-enum-cmake/CMakeLists.txt index fae2c9c2d05e..f1face02de1a 100644 --- a/contrib/magic-enum-cmake/CMakeLists.txt +++ b/contrib/magic-enum-cmake/CMakeLists.txt @@ -1,3 +1,4 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/magic_enum") -add_library (magic_enum INTERFACE) -target_include_directories(magic_enum SYSTEM INTERFACE ${LIBRARY_DIR}/include) +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/magic_enum") +add_library(_magic_enum INTERFACE) +target_include_directories(_magic_enum SYSTEM INTERFACE ${LIBRARY_DIR}/include) +add_library(ch_contrib::magic_enum ALIAS _magic_enum) diff --git a/contrib/mariadb-connector-c-cmake/CMakeLists.txt b/contrib/mariadb-connector-c-cmake/CMakeLists.txt index 7c3f25cdf877..1d9f750aceea 100644 --- a/contrib/mariadb-connector-c-cmake/CMakeLists.txt +++ b/contrib/mariadb-connector-c-cmake/CMakeLists.txt @@ -1,3 +1,14 @@ +if(OS_LINUX AND TARGET OpenSSL::SSL) + option(ENABLE_MYSQL "Enable MySQL" ${ENABLE_LIBRARIES}) +else () + option(ENABLE_MYSQL "Enable MySQL" FALSE) +endif () + +if(NOT ENABLE_MYSQL) + message (STATUS "Build without mysqlclient (support for MYSQL dictionary source will be disabled)") + return() +endif() + if (GLIBC_COMPATIBILITY) set(LIBM glibc-compatibility) endif() @@ -80,7 +91,7 @@ set(SIZEOF_SIZE_T 8) set(SOCKET_SIZE_TYPE socklen_t) -set(SYSTEM_LIBS ${SYSTEM_LIBS} zlib) +set(SYSTEM_LIBS ${SYSTEM_LIBS} ch_contrib::zlib) if(CMAKE_HAVE_PTHREAD_H) set(CMAKE_REQUIRED_INCLUDES pthread.h) @@ -93,8 +104,7 @@ set(HAVE_THREADS 1) set(DEFAULT_CHARSET "utf8mb4") add_definitions(-DHAVE_OPENSSL -DHAVE_TLS) -set(SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) -include_directories(BEFORE ${OPENSSL_INCLUDE_DIR}) +set(SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL) set(TLS_LIBRARY_VERSION "OpenSSL ${OPENSSL_VERSION}") set(ENABLED_LOCAL_INFILE OFF) @@ -222,10 +232,6 @@ if(ICONV_INCLUDE_DIR) endif() add_definitions(-DLIBICONV_PLUG) -if(ZLIB_FOUND AND WITH_EXTERNAL_ZLIB) - include_directories(${ZLIB_INCLUDE_DIR}) -endif() - if(WITH_DYNCOL) set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb_dyncol.c) endif() @@ -233,10 +239,12 @@ endif() set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb_async.c ${CC_SOURCE_DIR}/libmariadb/ma_context.c) -add_library(mariadbclient STATIC ${LIBMARIADB_SOURCES}) -target_link_libraries(mariadbclient ${SYSTEM_LIBS}) +add_library(_mariadbclient STATIC ${LIBMARIADB_SOURCES}) +target_link_libraries(_mariadbclient ${SYSTEM_LIBS}) + +target_include_directories(_mariadbclient PRIVATE ${CC_BINARY_DIR}/include-private) +target_include_directories(_mariadbclient SYSTEM PUBLIC ${CC_BINARY_DIR}/include-public ${CC_SOURCE_DIR}/include ${CC_SOURCE_DIR}/libmariadb) -target_include_directories(mariadbclient PRIVATE ${CC_BINARY_DIR}/include-private) -target_include_directories(mariadbclient SYSTEM PUBLIC ${CC_BINARY_DIR}/include-public ${CC_SOURCE_DIR}/include ${CC_SOURCE_DIR}/libmariadb) +set_target_properties(_mariadbclient PROPERTIES IMPORTED_INTERFACE_LINK_LIBRARIES "${SYSTEM_LIBS}") -set_target_properties(mariadbclient PROPERTIES IMPORTED_INTERFACE_LINK_LIBRARIES "${SYSTEM_LIBS}") +add_library(ch_contrib::mariadbclient ALIAS _mariadbclient) diff --git a/contrib/miniselect-cmake/CMakeLists.txt b/contrib/miniselect-cmake/CMakeLists.txt new file mode 100644 index 000000000000..f6dda7a14745 --- /dev/null +++ b/contrib/miniselect-cmake/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(_miniselect INTERFACE) +target_include_directories(_miniselect BEFORE INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/miniselect/include) +add_library(ch_contrib::miniselect ALIAS _miniselect) diff --git a/contrib/minizip-ng b/contrib/minizip-ng new file mode 160000 index 000000000000..6cffc9518516 --- /dev/null +++ b/contrib/minizip-ng @@ -0,0 +1 @@ +Subproject commit 6cffc951851620e0fac1993be75e4713c334de03 diff --git a/contrib/minizip-ng-cmake/CMakeLists.txt b/contrib/minizip-ng-cmake/CMakeLists.txt new file mode 100644 index 000000000000..4aabbd3c9fb3 --- /dev/null +++ b/contrib/minizip-ng-cmake/CMakeLists.txt @@ -0,0 +1,168 @@ +option(ENABLE_MINIZIP "Enable minizip-ng the zip manipulation library" ${ENABLE_LIBRARIES}) +if (NOT ENABLE_MINIZIP) + message (STATUS "minizip-ng disabled") + return() +endif() + +set(_MINIZIP_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/minizip-ng") + +# Initial source files +set(MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_crypt.c + ${_MINIZIP_SOURCE_DIR}/mz_os.c + ${_MINIZIP_SOURCE_DIR}/mz_strm.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_buf.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_mem.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_split.c + ${_MINIZIP_SOURCE_DIR}/mz_zip.c + ${_MINIZIP_SOURCE_DIR}/mz_zip_rw.c) + +# Initial header files +set(MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz.h + ${_MINIZIP_SOURCE_DIR}/mz_os.h + ${_MINIZIP_SOURCE_DIR}/mz_crypt.h + ${_MINIZIP_SOURCE_DIR}/mz_strm.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_buf.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_mem.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_split.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_os.h + ${_MINIZIP_SOURCE_DIR}/mz_zip.h + ${_MINIZIP_SOURCE_DIR}/mz_zip_rw.h) + +set(MINIZIP_INC ${_MINIZIP_SOURCE_DIR}) + +set(MINIZIP_DEF) +set(MINIZIP_PUBLIC_DEF) +set(MINIZIP_LIB) + +# Check if zlib is present +set(MZ_ZLIB ON) +if(MZ_ZLIB) + # Use zlib from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::zlib) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.h) + + list(APPEND MINIZIP_DEF "-DHAVE_ZLIB") +endif() + +# Check if bzip2 is present +set(MZ_BZIP2 ${ENABLE_BZIP2}) +if(MZ_BZIP2) + # Use bzip2 from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::bzip2) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.h) + + list(APPEND MINIZIP_DEF "-DHAVE_BZIP2") +endif() + +# Check if liblzma is present +set(MZ_LZMA ON) +if(MZ_LZMA) + # Use liblzma from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::xz) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.h) + + list(APPEND MINIZIP_DEF "-DHAVE_LZMA") +endif() + +# Check if zstd is present +set(MZ_ZSTD ON) +if(MZ_ZSTD) + # Use zstd from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::zstd) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.h) + + list(APPEND MINIZIP_DEF "-DHAVE_ZSTD") +endif() + +if(NOT MZ_ZLIB AND NOT MZ_ZSTD AND NOT MZ_BZIP2 AND NOT MZ_LZMA) + message(STATUS "Compression not supported due to missing libraries") + + list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_DECOMPRESSION) + list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_COMPRESSION) +endif() + +# Check to see if openssl installation is present +set(MZ_OPENSSL ${ENABLE_SSL}) +if(MZ_OPENSSL) + # Use openssl from ClickHouse contrib + list(APPEND MINIZIP_LIB OpenSSL::SSL OpenSSL::Crypto) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_crypt_openssl.c) +endif() + +# Include WinZIP AES encryption +set(MZ_WZAES ${ENABLE_SSL}) +if(MZ_WZAES) + list(APPEND MINIZIP_DEF -DHAVE_WZAES) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.h) +endif() + +# Include traditional PKWare encryption +set(MZ_PKCRYPT ON) +if(MZ_PKCRYPT) + list(APPEND MINIZIP_DEF -DHAVE_PKCRYPT) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.h) +endif() + +# Unix specific +if(UNIX) + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_os_posix.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_os_posix.c) +endif() + +# Include compatibility layer +set(MZ_COMPAT ON) +if(MZ_COMPAT) + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_compat.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_compat.h + zip.h + unzip.h) + + list(APPEND MINIZIP_INC "${CMAKE_CURRENT_SOURCE_DIR}") + list(APPEND MINIZIP_PUBLIC_DEF "-DMZ_COMPAT_VERSION=110") +endif() + +add_library(_minizip ${MINIZIP_SRC} ${MINIZIP_HDR}) +target_include_directories(_minizip PUBLIC ${MINIZIP_INC}) +target_compile_definitions(_minizip PUBLIC ${MINIZIP_PUBLIC_DEF}) +target_compile_definitions(_minizip PRIVATE ${MINIZIP_DEF}) +target_link_libraries(_minizip PRIVATE ${MINIZIP_LIB}) + +add_library(ch_contrib::minizip ALIAS _minizip) diff --git a/contrib/minizip-ng-cmake/unzip.h b/contrib/minizip-ng-cmake/unzip.h new file mode 100644 index 000000000000..61cbd974e31f --- /dev/null +++ b/contrib/minizip-ng-cmake/unzip.h @@ -0,0 +1,13 @@ +/* unzip.h -- Compatibility layer shim + part of the minizip-ng project + + This program is distributed under the terms of the same license as zlib. + See the accompanying LICENSE file for the full text of the license. +*/ + +#ifndef MZ_COMPAT_UNZIP +#define MZ_COMPAT_UNZIP + +#include "mz_compat.h" + +#endif diff --git a/contrib/minizip-ng-cmake/zip.h b/contrib/minizip-ng-cmake/zip.h new file mode 100644 index 000000000000..cf38ac91a04f --- /dev/null +++ b/contrib/minizip-ng-cmake/zip.h @@ -0,0 +1,13 @@ +/* zip.h -- Compatibility layer shim + part of the minizip-ng project + + This program is distributed under the terms of the same license as zlib. + See the accompanying LICENSE file for the full text of the license. +*/ + +#ifndef MZ_COMPAT_ZIP +#define MZ_COMPAT_ZIP + +#include "mz_compat.h" + +#endif diff --git a/contrib/msgpack-c-cmake/CMakeLists.txt b/contrib/msgpack-c-cmake/CMakeLists.txt new file mode 100644 index 000000000000..3232b0a9534e --- /dev/null +++ b/contrib/msgpack-c-cmake/CMakeLists.txt @@ -0,0 +1,10 @@ +option (ENABLE_MSGPACK "Enable msgpack library" ${ENABLE_LIBRARIES}) + +if(NOT ENABLE_MSGPACK) + message(STATUS "Not using msgpack") + return() +endif() + +add_library(_msgpack INTERFACE) +target_include_directories(_msgpack SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include") +add_library(ch_contrib::msgpack ALIAS _msgpack) diff --git a/contrib/murmurhash/CMakeLists.txt b/contrib/murmurhash/CMakeLists.txt index 2d9cb3e63824..5b04974d3c53 100644 --- a/contrib/murmurhash/CMakeLists.txt +++ b/contrib/murmurhash/CMakeLists.txt @@ -1,7 +1,8 @@ -add_library(murmurhash +add_library(_murmurhash src/MurmurHash2.cpp src/MurmurHash3.cpp include/MurmurHash2.h include/MurmurHash3.h) -target_include_directories (murmurhash PUBLIC include) +target_include_directories(_murmurhash PUBLIC include) +add_library(ch_contrib::murmurhash ALIAS _murmurhash) diff --git a/contrib/nanodbc-cmake/CMakeLists.txt b/contrib/nanodbc-cmake/CMakeLists.txt index 26a030c3995a..9ed6c9525b67 100644 --- a/contrib/nanodbc-cmake/CMakeLists.txt +++ b/contrib/nanodbc-cmake/CMakeLists.txt @@ -1,10 +1,10 @@ -if (NOT USE_INTERNAL_NANODBC_LIBRARY) +if (NOT ENABLE_ODBC) return () endif () set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc") -if (NOT TARGET unixodbc) +if (NOT TARGET ch_contrib::unixodbc) message(FATAL_ERROR "Configuration error: unixodbc is not a target") endif() @@ -12,7 +12,7 @@ set (SRCS "${LIBRARY_DIR}/nanodbc/nanodbc.cpp" ) -add_library(nanodbc ${SRCS}) - -target_link_libraries (nanodbc PUBLIC unixodbc) -target_include_directories (nanodbc SYSTEM PUBLIC "${LIBRARY_DIR}/") +add_library(_nanodbc ${SRCS}) +target_link_libraries(_nanodbc PUBLIC ch_contrib::unixodbc) +target_include_directories(_nanodbc SYSTEM PUBLIC "${LIBRARY_DIR}/") +add_library(ch_contrib::nanodbc ALIAS _nanodbc) diff --git a/contrib/nlp-data b/contrib/nlp-data new file mode 160000 index 000000000000..5591f91f5e74 --- /dev/null +++ b/contrib/nlp-data @@ -0,0 +1 @@ +Subproject commit 5591f91f5e748cba8fb9ef81564176feae774853 diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt new file mode 100644 index 000000000000..5380269c4799 --- /dev/null +++ b/contrib/nlp-data-cmake/CMakeLists.txt @@ -0,0 +1,15 @@ +include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) + +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data") + +add_library (_nlp_data INTERFACE) + +clickhouse_embed_binaries( + TARGET nlp_dictionaries + RESOURCE_DIR "${LIBRARY_DIR}" + RESOURCES charset.zst tonality_ru.zst programming.zst +) + +add_dependencies(_nlp_data nlp_dictionaries) +target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") +add_library(ch_contrib::nlp_data ALIAS _nlp_data) diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt index d9e0aa6efc71..eaca00566d61 100644 --- a/contrib/nuraft-cmake/CMakeLists.txt +++ b/contrib/nuraft-cmake/CMakeLists.txt @@ -1,3 +1,15 @@ +set(ENABLE_NURAFT_DEFAULT ${ENABLE_LIBRARIES}) +if (OS_FREEBSD) + set(ENABLE_NURAFT_DEFAULT OFF) + message (STATUS "Using internal NuRaft library on FreeBSD and Darwin is not supported") +endif() +option(ENABLE_NURAFT "Enable NuRaft" ${ENABLE_NURAFT_DEFAULT}) + +if (NOT ENABLE_NURAFT) + message(STATUS "Not using NuRaft") + return() +endif() + set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/NuRaft") set(SRCS @@ -29,23 +41,25 @@ set(SRCS ) -add_library(nuraft ${SRCS}) +add_library(_nuraft ${SRCS}) -if (NOT OPENSSL_SSL_LIBRARY OR NOT OPENSSL_CRYPTO_LIBRARY) - target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1 SSL_LIBRARY_NOT_FOUND=1) +if(NOT TARGET OpenSSL::Crypto) + target_compile_definitions(_nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1 SSL_LIBRARY_NOT_FOUND=1) else() - target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1) + target_compile_definitions(_nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1) endif() -target_include_directories (nuraft SYSTEM PRIVATE "${LIBRARY_DIR}/include/libnuraft") +target_include_directories (_nuraft SYSTEM PRIVATE "${LIBRARY_DIR}/include/libnuraft") # for some reason include "asio.h" directly without "boost/" prefix. -target_include_directories (nuraft SYSTEM PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/boost/boost") +target_include_directories (_nuraft SYSTEM PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/boost/boost") -target_link_libraries (nuraft PRIVATE boost::headers_only boost::coroutine) +target_link_libraries (_nuraft PRIVATE boost::headers_only boost::coroutine) -if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY) - target_link_libraries (nuraft PRIVATE ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) +if(TARGET OpenSSL::Crypto) + target_link_libraries (_nuraft PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() -target_include_directories (nuraft SYSTEM PUBLIC "${LIBRARY_DIR}/include") +target_include_directories (_nuraft SYSTEM PUBLIC "${LIBRARY_DIR}/include") + +add_library(ch_contrib::nuraft ALIAS _nuraft) diff --git a/contrib/openldap-cmake/CMakeLists.txt b/contrib/openldap-cmake/CMakeLists.txt index 0892403bb625..f5966474b0d4 100644 --- a/contrib/openldap-cmake/CMakeLists.txt +++ b/contrib/openldap-cmake/CMakeLists.txt @@ -1,13 +1,37 @@ +option (ENABLE_LDAP "Enable LDAP" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_LDAP) + message(STATUS "Not using ldap") + return() +endif() + +string (TOLOWER "${CMAKE_SYSTEM_NAME}" _system_name) +string (TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _system_processor) +if ( + "${_system_processor}" STREQUAL "amd64" OR + "${_system_processor}" STREQUAL "x64" +) + set (_system_processor "x86_64") +elseif ("${_system_processor}" STREQUAL "arm64") + set (_system_processor "aarch64") +endif () +if (NOT( + ( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "x86_64" ) OR + ( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "aarch64" ) OR + ( "${_system_name}" STREQUAL "linux" AND "${_system_processor}" STREQUAL "ppc64le" ) OR + ( "${_system_name}" STREQUAL "freebsd" AND "${_system_processor}" STREQUAL "x86_64" ) OR + ( "${_system_name}" STREQUAL "freebsd" AND "${_system_processor}" STREQUAL "aarch64" ) OR + ( "${_system_name}" STREQUAL "darwin" AND "${_system_processor}" STREQUAL "x86_64" ) OR + ( "${_system_name}" STREQUAL "darwin" AND "${_system_processor}" STREQUAL "aarch64" ) +)) + message (${RECONFIGURE_MESSAGE_LEVEL} "LDAP support using the bundled library is not implemented for ${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR} platform.") +endif () + set(OPENLDAP_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openldap") # How these lists were generated? # I compiled the original OpenLDAP with it's original build system and copied the list of source files from build commands. -set(_libs_type SHARED) -if(OPENLDAP_USE_STATIC_LIBS) - set(_libs_type STATIC) -endif() - set(OPENLDAP_VERSION_STRING "2.5.X") macro(mkversion _lib_name) @@ -51,23 +75,22 @@ set(_lber_srcs mkversion(lber) -add_library(lber ${_libs_type} +add_library(_lber ${_lber_srcs} "${CMAKE_CURRENT_BINARY_DIR}/lber-version.c" ) -target_link_libraries(lber - PRIVATE ${OPENSSL_LIBRARIES} +target_link_libraries(_lber + PRIVATE OpenSSL::Crypto OpenSSL::SSL ) -target_include_directories(lber - PRIVATE ${_extra_build_dir}/include - PRIVATE "${OPENLDAP_SOURCE_DIR}/include" +target_include_directories(_lber SYSTEM + PUBLIC ${_extra_build_dir}/include + PUBLIC "${OPENLDAP_SOURCE_DIR}/include" PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/liblber" - PRIVATE ${OPENSSL_INCLUDE_DIR} ) -target_compile_definitions(lber +target_compile_definitions(_lber PRIVATE LBER_LIBRARY ) @@ -141,24 +164,23 @@ set(_ldap_srcs mkversion(ldap) -add_library(ldap ${_libs_type} +add_library(_ldap ${_ldap_srcs} "${CMAKE_CURRENT_BINARY_DIR}/ldap-version.c" ) -target_link_libraries(ldap - PRIVATE lber - PRIVATE ${OPENSSL_LIBRARIES} +target_link_libraries(_ldap + PRIVATE _lber + PRIVATE OpenSSL::Crypto OpenSSL::SSL ) -target_include_directories(ldap - PRIVATE ${_extra_build_dir}/include - PRIVATE "${OPENLDAP_SOURCE_DIR}/include" +target_include_directories(_ldap SYSTEM + PUBLIC ${_extra_build_dir}/include + PUBLIC "${OPENLDAP_SOURCE_DIR}/include" PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap" - PRIVATE ${OPENSSL_INCLUDE_DIR} ) -target_compile_definitions(ldap +target_compile_definitions(_ldap PRIVATE LDAP_LIBRARY ) @@ -177,26 +199,28 @@ set(_ldap_r_specific_srcs mkversion(ldap_r) -add_library(ldap_r ${_libs_type} +add_library(_ldap_r ${_ldap_r_specific_srcs} ${_ldap_srcs} "${CMAKE_CURRENT_BINARY_DIR}/ldap_r-version.c" ) -target_link_libraries(ldap_r - PRIVATE lber - PRIVATE ${OPENSSL_LIBRARIES} +target_link_libraries(_ldap_r + PRIVATE _lber + PRIVATE OpenSSL::Crypto OpenSSL::SSL ) -target_include_directories(ldap_r - PRIVATE ${_extra_build_dir}/include - PRIVATE "${OPENLDAP_SOURCE_DIR}/include" +target_include_directories(_ldap_r SYSTEM + PUBLIC ${_extra_build_dir}/include + PUBLIC "${OPENLDAP_SOURCE_DIR}/include" PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r" PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap" - PRIVATE ${OPENSSL_INCLUDE_DIR} ) -target_compile_definitions(ldap_r +target_compile_definitions(_ldap_r PRIVATE LDAP_R_COMPILE PRIVATE LDAP_LIBRARY ) + +add_library(ch_contrib::ldap ALIAS _ldap_r) +add_library(ch_contrib::lber ALIAS _lber) diff --git a/contrib/orc b/contrib/orc index 0a936f6bbdb9..f9a393ed2433 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 0a936f6bbdb9303308973073f8623b5a8d82eae1 +Subproject commit f9a393ed2433a60034795284f82d093b348f2102 diff --git a/contrib/pdqsort-cmake/CMakeLists.txt b/contrib/pdqsort-cmake/CMakeLists.txt new file mode 100644 index 000000000000..485f345807e6 --- /dev/null +++ b/contrib/pdqsort-cmake/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(_pdqsort INTERFACE) +target_include_directories(_pdqsort SYSTEM BEFORE INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/pdqsort) +add_library(ch_contrib::pdqsort ALIAS _pdqsort) diff --git a/contrib/poco-cmake/Crypto/CMakeLists.txt b/contrib/poco-cmake/Crypto/CMakeLists.txt index e93ed5cf17d9..9886a05b21b9 100644 --- a/contrib/poco-cmake/Crypto/CMakeLists.txt +++ b/contrib/poco-cmake/Crypto/CMakeLists.txt @@ -1,46 +1,35 @@ if (ENABLE_SSL) - if (USE_INTERNAL_POCO_LIBRARY) - set (SRCS - "${LIBRARY_DIR}/Crypto/src/Cipher.cpp" - "${LIBRARY_DIR}/Crypto/src/CipherFactory.cpp" - "${LIBRARY_DIR}/Crypto/src/CipherImpl.cpp" - "${LIBRARY_DIR}/Crypto/src/CipherKey.cpp" - "${LIBRARY_DIR}/Crypto/src/CipherKeyImpl.cpp" - "${LIBRARY_DIR}/Crypto/src/CryptoException.cpp" - "${LIBRARY_DIR}/Crypto/src/CryptoStream.cpp" - "${LIBRARY_DIR}/Crypto/src/CryptoTransform.cpp" - "${LIBRARY_DIR}/Crypto/src/DigestEngine.cpp" - "${LIBRARY_DIR}/Crypto/src/ECDSADigestEngine.cpp" - "${LIBRARY_DIR}/Crypto/src/ECKey.cpp" - "${LIBRARY_DIR}/Crypto/src/ECKeyImpl.cpp" - "${LIBRARY_DIR}/Crypto/src/EVPPKey.cpp" - "${LIBRARY_DIR}/Crypto/src/KeyPair.cpp" - "${LIBRARY_DIR}/Crypto/src/KeyPairImpl.cpp" - "${LIBRARY_DIR}/Crypto/src/OpenSSLInitializer.cpp" - "${LIBRARY_DIR}/Crypto/src/PKCS12Container.cpp" - "${LIBRARY_DIR}/Crypto/src/RSACipherImpl.cpp" - "${LIBRARY_DIR}/Crypto/src/RSADigestEngine.cpp" - "${LIBRARY_DIR}/Crypto/src/RSAKey.cpp" - "${LIBRARY_DIR}/Crypto/src/RSAKeyImpl.cpp" - "${LIBRARY_DIR}/Crypto/src/X509Certificate.cpp" - ) + set (SRCS + "${LIBRARY_DIR}/Crypto/src/Cipher.cpp" + "${LIBRARY_DIR}/Crypto/src/CipherFactory.cpp" + "${LIBRARY_DIR}/Crypto/src/CipherImpl.cpp" + "${LIBRARY_DIR}/Crypto/src/CipherKey.cpp" + "${LIBRARY_DIR}/Crypto/src/CipherKeyImpl.cpp" + "${LIBRARY_DIR}/Crypto/src/CryptoException.cpp" + "${LIBRARY_DIR}/Crypto/src/CryptoStream.cpp" + "${LIBRARY_DIR}/Crypto/src/CryptoTransform.cpp" + "${LIBRARY_DIR}/Crypto/src/DigestEngine.cpp" + "${LIBRARY_DIR}/Crypto/src/ECDSADigestEngine.cpp" + "${LIBRARY_DIR}/Crypto/src/ECKey.cpp" + "${LIBRARY_DIR}/Crypto/src/ECKeyImpl.cpp" + "${LIBRARY_DIR}/Crypto/src/EVPPKey.cpp" + "${LIBRARY_DIR}/Crypto/src/KeyPair.cpp" + "${LIBRARY_DIR}/Crypto/src/KeyPairImpl.cpp" + "${LIBRARY_DIR}/Crypto/src/OpenSSLInitializer.cpp" + "${LIBRARY_DIR}/Crypto/src/PKCS12Container.cpp" + "${LIBRARY_DIR}/Crypto/src/RSACipherImpl.cpp" + "${LIBRARY_DIR}/Crypto/src/RSADigestEngine.cpp" + "${LIBRARY_DIR}/Crypto/src/RSAKey.cpp" + "${LIBRARY_DIR}/Crypto/src/RSAKeyImpl.cpp" + "${LIBRARY_DIR}/Crypto/src/X509Certificate.cpp" + ) - add_library (_poco_crypto ${SRCS}) - add_library (Poco::Crypto ALIAS _poco_crypto) - - target_compile_options (_poco_crypto PRIVATE -Wno-newline-eof) - target_include_directories (_poco_crypto SYSTEM PUBLIC "${LIBRARY_DIR}/Crypto/include") - target_link_libraries (_poco_crypto PUBLIC Poco::Foundation ssl crypto) - else () - add_library (Poco::Crypto UNKNOWN IMPORTED GLOBAL) - - find_library(LIBRARY_POCO_CRYPTO PocoCrypto) - find_path(INCLUDE_POCO_CRYPTO Poco/Crypto/Crypto.h) - set_target_properties (Poco::Crypto PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_CRYPTO}) - set_target_properties (Poco::Crypto PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_CRYPTO}) + add_library (_poco_crypto ${SRCS}) + add_library (Poco::Crypto ALIAS _poco_crypto) - target_link_libraries (Poco::Crypto INTERFACE Poco::Foundation) - endif () + target_compile_options (_poco_crypto PRIVATE -Wno-newline-eof) + target_include_directories (_poco_crypto SYSTEM PUBLIC "${LIBRARY_DIR}/Crypto/include") + target_link_libraries (_poco_crypto PUBLIC Poco::Foundation OpenSSL::SSL OpenSSL::Crypto) message (STATUS "Using Poco::Crypto") else () diff --git a/contrib/poco-cmake/Data/CMakeLists.txt b/contrib/poco-cmake/Data/CMakeLists.txt index 4fdd755b45dc..b13c07583ad3 100644 --- a/contrib/poco-cmake/Data/CMakeLists.txt +++ b/contrib/poco-cmake/Data/CMakeLists.txt @@ -1,60 +1,46 @@ -if (USE_INTERNAL_POCO_LIBRARY) - set (SRCS - "${LIBRARY_DIR}/Data/src/AbstractBinder.cpp" - "${LIBRARY_DIR}/Data/src/AbstractBinding.cpp" - "${LIBRARY_DIR}/Data/src/AbstractExtraction.cpp" - "${LIBRARY_DIR}/Data/src/AbstractExtractor.cpp" - "${LIBRARY_DIR}/Data/src/AbstractPreparation.cpp" - "${LIBRARY_DIR}/Data/src/AbstractPreparator.cpp" - "${LIBRARY_DIR}/Data/src/ArchiveStrategy.cpp" - "${LIBRARY_DIR}/Data/src/Bulk.cpp" - "${LIBRARY_DIR}/Data/src/Connector.cpp" - "${LIBRARY_DIR}/Data/src/DataException.cpp" - "${LIBRARY_DIR}/Data/src/Date.cpp" - "${LIBRARY_DIR}/Data/src/DynamicLOB.cpp" - "${LIBRARY_DIR}/Data/src/Limit.cpp" - "${LIBRARY_DIR}/Data/src/MetaColumn.cpp" - "${LIBRARY_DIR}/Data/src/PooledSessionHolder.cpp" - "${LIBRARY_DIR}/Data/src/PooledSessionImpl.cpp" - "${LIBRARY_DIR}/Data/src/Position.cpp" - "${LIBRARY_DIR}/Data/src/Range.cpp" - "${LIBRARY_DIR}/Data/src/RecordSet.cpp" - "${LIBRARY_DIR}/Data/src/Row.cpp" - "${LIBRARY_DIR}/Data/src/RowFilter.cpp" - "${LIBRARY_DIR}/Data/src/RowFormatter.cpp" - "${LIBRARY_DIR}/Data/src/RowIterator.cpp" - "${LIBRARY_DIR}/Data/src/Session.cpp" - "${LIBRARY_DIR}/Data/src/SessionFactory.cpp" - "${LIBRARY_DIR}/Data/src/SessionImpl.cpp" - "${LIBRARY_DIR}/Data/src/SessionPool.cpp" - "${LIBRARY_DIR}/Data/src/SessionPoolContainer.cpp" - "${LIBRARY_DIR}/Data/src/SimpleRowFormatter.cpp" - "${LIBRARY_DIR}/Data/src/SQLChannel.cpp" - "${LIBRARY_DIR}/Data/src/Statement.cpp" - "${LIBRARY_DIR}/Data/src/StatementCreator.cpp" - "${LIBRARY_DIR}/Data/src/StatementImpl.cpp" - "${LIBRARY_DIR}/Data/src/Time.cpp" - "${LIBRARY_DIR}/Data/src/Transaction.cpp" - ) +set (SRCS + "${LIBRARY_DIR}/Data/src/AbstractBinder.cpp" + "${LIBRARY_DIR}/Data/src/AbstractBinding.cpp" + "${LIBRARY_DIR}/Data/src/AbstractExtraction.cpp" + "${LIBRARY_DIR}/Data/src/AbstractExtractor.cpp" + "${LIBRARY_DIR}/Data/src/AbstractPreparation.cpp" + "${LIBRARY_DIR}/Data/src/AbstractPreparator.cpp" + "${LIBRARY_DIR}/Data/src/ArchiveStrategy.cpp" + "${LIBRARY_DIR}/Data/src/Bulk.cpp" + "${LIBRARY_DIR}/Data/src/Connector.cpp" + "${LIBRARY_DIR}/Data/src/DataException.cpp" + "${LIBRARY_DIR}/Data/src/Date.cpp" + "${LIBRARY_DIR}/Data/src/DynamicLOB.cpp" + "${LIBRARY_DIR}/Data/src/Limit.cpp" + "${LIBRARY_DIR}/Data/src/MetaColumn.cpp" + "${LIBRARY_DIR}/Data/src/PooledSessionHolder.cpp" + "${LIBRARY_DIR}/Data/src/PooledSessionImpl.cpp" + "${LIBRARY_DIR}/Data/src/Position.cpp" + "${LIBRARY_DIR}/Data/src/Range.cpp" + "${LIBRARY_DIR}/Data/src/RecordSet.cpp" + "${LIBRARY_DIR}/Data/src/Row.cpp" + "${LIBRARY_DIR}/Data/src/RowFilter.cpp" + "${LIBRARY_DIR}/Data/src/RowFormatter.cpp" + "${LIBRARY_DIR}/Data/src/RowIterator.cpp" + "${LIBRARY_DIR}/Data/src/Session.cpp" + "${LIBRARY_DIR}/Data/src/SessionFactory.cpp" + "${LIBRARY_DIR}/Data/src/SessionImpl.cpp" + "${LIBRARY_DIR}/Data/src/SessionPool.cpp" + "${LIBRARY_DIR}/Data/src/SessionPoolContainer.cpp" + "${LIBRARY_DIR}/Data/src/SimpleRowFormatter.cpp" + "${LIBRARY_DIR}/Data/src/SQLChannel.cpp" + "${LIBRARY_DIR}/Data/src/Statement.cpp" + "${LIBRARY_DIR}/Data/src/StatementCreator.cpp" + "${LIBRARY_DIR}/Data/src/StatementImpl.cpp" + "${LIBRARY_DIR}/Data/src/Time.cpp" + "${LIBRARY_DIR}/Data/src/Transaction.cpp" +) - add_library (_poco_data ${SRCS}) - add_library (Poco::Data ALIAS _poco_data) +add_library (_poco_data ${SRCS}) +add_library (Poco::Data ALIAS _poco_data) - if (COMPILER_GCC) - target_compile_options (_poco_data PRIVATE -Wno-deprecated-copy) - endif () - target_include_directories (_poco_data SYSTEM PUBLIC "${LIBRARY_DIR}/Data/include") - target_link_libraries (_poco_data PUBLIC Poco::Foundation) -else () - # NOTE: don't know why, but the GLOBAL is required here. - add_library (Poco::Data UNKNOWN IMPORTED GLOBAL) - - find_library(LIBRARY_POCO_DATA PocoData) - find_path(INCLUDE_POCO_DATA Poco/Data/Data.h) - set_target_properties (Poco::Data PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_DATA}) - set_target_properties (Poco::Data PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_DATA}) - - target_link_libraries (Poco::Data INTERFACE Poco::Foundation) - - message (STATUS "Using Poco::Data: ${LIBRARY_POCO_DATA} ${INCLUDE_POCO_DATA}") +if (COMPILER_GCC) + target_compile_options (_poco_data PRIVATE -Wno-deprecated-copy) endif () +target_include_directories (_poco_data SYSTEM PUBLIC "${LIBRARY_DIR}/Data/include") +target_link_libraries (_poco_data PUBLIC Poco::Foundation) diff --git a/contrib/poco-cmake/Data/ODBC/CMakeLists.txt b/contrib/poco-cmake/Data/ODBC/CMakeLists.txt index a35613045412..7de77cdacf7e 100644 --- a/contrib/poco-cmake/Data/ODBC/CMakeLists.txt +++ b/contrib/poco-cmake/Data/ODBC/CMakeLists.txt @@ -1,48 +1,39 @@ if (ENABLE_ODBC) - if (NOT TARGET unixodbc) + if (NOT TARGET ch_contrib::unixodbc) message(FATAL_ERROR "Configuration error: unixodbc is not a target") endif() - if (USE_INTERNAL_POCO_LIBRARY) - set (SRCS - "${LIBRARY_DIR}/Data/ODBC/src/Binder.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/ConnectionHandle.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/Connector.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/EnvironmentHandle.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/Extractor.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/ODBCException.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/ODBCMetaColumn.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/ODBCStatementImpl.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/Parameter.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/Preparator.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/SessionImpl.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/TypeInfo.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/Unicode.cpp" - "${LIBRARY_DIR}/Data/ODBC/src/Utility.cpp" - ) + set (SRCS + "${LIBRARY_DIR}/Data/ODBC/src/Binder.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/ConnectionHandle.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/Connector.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/EnvironmentHandle.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/Extractor.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/ODBCException.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/ODBCMetaColumn.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/ODBCStatementImpl.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/Parameter.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/Preparator.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/SessionImpl.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/TypeInfo.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/Unicode.cpp" + "${LIBRARY_DIR}/Data/ODBC/src/Utility.cpp" + ) - add_library (_poco_data_odbc ${SRCS}) - add_library (Poco::Data::ODBC ALIAS _poco_data_odbc) - - target_compile_options (_poco_data_odbc PRIVATE -Wno-unused-variable) - target_include_directories (_poco_data_odbc SYSTEM PUBLIC "${LIBRARY_DIR}/Data/ODBC/include") - target_link_libraries (_poco_data_odbc PUBLIC Poco::Data unixodbc) - else () - add_library (Poco::Data::ODBC UNKNOWN IMPORTED GLOBAL) - - find_library(LIBRARY_POCO_DATA_ODBC PocoDataODBC) - find_path(INCLUDE_POCO_DATA_ODBC Poco/Data/ODBC/ODBC.h) - set_target_properties (Poco::Data::ODBC PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_DATA_ODBC}) - set_target_properties (Poco::Data::ODBC PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_DATA_ODBC}) + add_library (_poco_data_odbc ${SRCS}) + add_library (Poco::Data::ODBC ALIAS _poco_data_odbc) - target_link_libraries (Poco::Data::ODBC INTERFACE Poco::Data) - endif () + target_compile_options (_poco_data_odbc PRIVATE -Wno-unused-variable) + target_include_directories (_poco_data_odbc SYSTEM PUBLIC "${LIBRARY_DIR}/Data/ODBC/include") + target_link_libraries (_poco_data_odbc PUBLIC Poco::Data ch_contrib::unixodbc) message (STATUS "Using Poco::Data::ODBC") else () add_library (_poco_data_odbc INTERFACE) add_library (Poco::Data::ODBC ALIAS _poco_data_odbc) - target_link_libraries (_poco_data_odbc INTERFACE unixodbc) + if (TARGET ch_contrib::unixodbc) + target_link_libraries (_poco_data_odbc INTERFACE ch_contrib::unixodbc) + endif() message (STATUS "Not using Poco::Data::ODBC") endif () diff --git a/contrib/poco-cmake/Foundation/CMakeLists.txt b/contrib/poco-cmake/Foundation/CMakeLists.txt index 0c13d1093441..795ec985cb4e 100644 --- a/contrib/poco-cmake/Foundation/CMakeLists.txt +++ b/contrib/poco-cmake/Foundation/CMakeLists.txt @@ -1,239 +1,232 @@ -if (USE_INTERNAL_POCO_LIBRARY) - # Foundation (pcre) +# Foundation (pcre) - set (SRCS_PCRE - "${LIBRARY_DIR}/Foundation/src/pcre_config.c" - "${LIBRARY_DIR}/Foundation/src/pcre_byte_order.c" - "${LIBRARY_DIR}/Foundation/src/pcre_chartables.c" - "${LIBRARY_DIR}/Foundation/src/pcre_compile.c" - "${LIBRARY_DIR}/Foundation/src/pcre_exec.c" - "${LIBRARY_DIR}/Foundation/src/pcre_fullinfo.c" - "${LIBRARY_DIR}/Foundation/src/pcre_globals.c" - "${LIBRARY_DIR}/Foundation/src/pcre_maketables.c" - "${LIBRARY_DIR}/Foundation/src/pcre_newline.c" - "${LIBRARY_DIR}/Foundation/src/pcre_ord2utf8.c" - "${LIBRARY_DIR}/Foundation/src/pcre_study.c" - "${LIBRARY_DIR}/Foundation/src/pcre_tables.c" - "${LIBRARY_DIR}/Foundation/src/pcre_dfa_exec.c" - "${LIBRARY_DIR}/Foundation/src/pcre_get.c" - "${LIBRARY_DIR}/Foundation/src/pcre_jit_compile.c" - "${LIBRARY_DIR}/Foundation/src/pcre_refcount.c" - "${LIBRARY_DIR}/Foundation/src/pcre_string_utils.c" - "${LIBRARY_DIR}/Foundation/src/pcre_version.c" - "${LIBRARY_DIR}/Foundation/src/pcre_ucd.c" - "${LIBRARY_DIR}/Foundation/src/pcre_valid_utf8.c" - "${LIBRARY_DIR}/Foundation/src/pcre_xclass.c" - ) +set (SRCS_PCRE + "${LIBRARY_DIR}/Foundation/src/pcre_config.c" + "${LIBRARY_DIR}/Foundation/src/pcre_byte_order.c" + "${LIBRARY_DIR}/Foundation/src/pcre_chartables.c" + "${LIBRARY_DIR}/Foundation/src/pcre_compile.c" + "${LIBRARY_DIR}/Foundation/src/pcre_exec.c" + "${LIBRARY_DIR}/Foundation/src/pcre_fullinfo.c" + "${LIBRARY_DIR}/Foundation/src/pcre_globals.c" + "${LIBRARY_DIR}/Foundation/src/pcre_maketables.c" + "${LIBRARY_DIR}/Foundation/src/pcre_newline.c" + "${LIBRARY_DIR}/Foundation/src/pcre_ord2utf8.c" + "${LIBRARY_DIR}/Foundation/src/pcre_study.c" + "${LIBRARY_DIR}/Foundation/src/pcre_tables.c" + "${LIBRARY_DIR}/Foundation/src/pcre_dfa_exec.c" + "${LIBRARY_DIR}/Foundation/src/pcre_get.c" + "${LIBRARY_DIR}/Foundation/src/pcre_jit_compile.c" + "${LIBRARY_DIR}/Foundation/src/pcre_refcount.c" + "${LIBRARY_DIR}/Foundation/src/pcre_string_utils.c" + "${LIBRARY_DIR}/Foundation/src/pcre_version.c" + "${LIBRARY_DIR}/Foundation/src/pcre_ucd.c" + "${LIBRARY_DIR}/Foundation/src/pcre_valid_utf8.c" + "${LIBRARY_DIR}/Foundation/src/pcre_xclass.c" +) - add_library (_poco_foundation_pcre ${SRCS_PCRE}) - add_library (Poco::Foundation::PCRE ALIAS _poco_foundation_pcre) +add_library (_poco_foundation_pcre ${SRCS_PCRE}) +add_library (Poco::Foundation::PCRE ALIAS _poco_foundation_pcre) - target_compile_options (_poco_foundation_pcre PRIVATE -Wno-sign-compare) +target_compile_options (_poco_foundation_pcre PRIVATE -Wno-sign-compare) - # Foundation +# Foundation - set (SRCS - "${LIBRARY_DIR}/Foundation/src/AbstractObserver.cpp" - "${LIBRARY_DIR}/Foundation/src/ActiveDispatcher.cpp" - "${LIBRARY_DIR}/Foundation/src/ArchiveStrategy.cpp" - "${LIBRARY_DIR}/Foundation/src/Ascii.cpp" - "${LIBRARY_DIR}/Foundation/src/ASCIIEncoding.cpp" - "${LIBRARY_DIR}/Foundation/src/AsyncChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/AtomicCounter.cpp" - "${LIBRARY_DIR}/Foundation/src/Base32Decoder.cpp" - "${LIBRARY_DIR}/Foundation/src/Base32Encoder.cpp" - "${LIBRARY_DIR}/Foundation/src/Base64Decoder.cpp" - "${LIBRARY_DIR}/Foundation/src/Base64Encoder.cpp" - "${LIBRARY_DIR}/Foundation/src/BinaryReader.cpp" - "${LIBRARY_DIR}/Foundation/src/BinaryWriter.cpp" - "${LIBRARY_DIR}/Foundation/src/Bugcheck.cpp" - "${LIBRARY_DIR}/Foundation/src/ByteOrder.cpp" - "${LIBRARY_DIR}/Foundation/src/Channel.cpp" - "${LIBRARY_DIR}/Foundation/src/Checksum.cpp" - "${LIBRARY_DIR}/Foundation/src/Clock.cpp" - "${LIBRARY_DIR}/Foundation/src/CompressedLogFile.cpp" - "${LIBRARY_DIR}/Foundation/src/Condition.cpp" - "${LIBRARY_DIR}/Foundation/src/Configurable.cpp" - "${LIBRARY_DIR}/Foundation/src/ConsoleChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/CountingStream.cpp" - "${LIBRARY_DIR}/Foundation/src/DateTime.cpp" - "${LIBRARY_DIR}/Foundation/src/DateTimeFormat.cpp" - "${LIBRARY_DIR}/Foundation/src/DateTimeFormatter.cpp" - "${LIBRARY_DIR}/Foundation/src/DateTimeParser.cpp" - "${LIBRARY_DIR}/Foundation/src/Debugger.cpp" - "${LIBRARY_DIR}/Foundation/src/DeflatingStream.cpp" - "${LIBRARY_DIR}/Foundation/src/DigestEngine.cpp" - "${LIBRARY_DIR}/Foundation/src/DigestStream.cpp" - "${LIBRARY_DIR}/Foundation/src/DirectoryIterator.cpp" - "${LIBRARY_DIR}/Foundation/src/DirectoryIteratorStrategy.cpp" - "${LIBRARY_DIR}/Foundation/src/DirectoryWatcher.cpp" - "${LIBRARY_DIR}/Foundation/src/Environment.cpp" - "${LIBRARY_DIR}/Foundation/src/Error.cpp" - "${LIBRARY_DIR}/Foundation/src/ErrorHandler.cpp" - "${LIBRARY_DIR}/Foundation/src/Event.cpp" - "${LIBRARY_DIR}/Foundation/src/EventArgs.cpp" - "${LIBRARY_DIR}/Foundation/src/EventChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/Exception.cpp" - "${LIBRARY_DIR}/Foundation/src/FIFOBufferStream.cpp" - "${LIBRARY_DIR}/Foundation/src/File.cpp" - "${LIBRARY_DIR}/Foundation/src/FileChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/FileStream.cpp" - "${LIBRARY_DIR}/Foundation/src/FileStreamFactory.cpp" - "${LIBRARY_DIR}/Foundation/src/Format.cpp" - "${LIBRARY_DIR}/Foundation/src/Formatter.cpp" - "${LIBRARY_DIR}/Foundation/src/FormattingChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/FPEnvironment.cpp" - "${LIBRARY_DIR}/Foundation/src/Glob.cpp" - "${LIBRARY_DIR}/Foundation/src/Hash.cpp" - "${LIBRARY_DIR}/Foundation/src/HashStatistic.cpp" - "${LIBRARY_DIR}/Foundation/src/HexBinaryDecoder.cpp" - "${LIBRARY_DIR}/Foundation/src/HexBinaryEncoder.cpp" - "${LIBRARY_DIR}/Foundation/src/InflatingStream.cpp" - "${LIBRARY_DIR}/Foundation/src/JSONString.cpp" - "${LIBRARY_DIR}/Foundation/src/Latin1Encoding.cpp" - "${LIBRARY_DIR}/Foundation/src/Latin2Encoding.cpp" - "${LIBRARY_DIR}/Foundation/src/Latin9Encoding.cpp" - "${LIBRARY_DIR}/Foundation/src/LineEndingConverter.cpp" - "${LIBRARY_DIR}/Foundation/src/LocalDateTime.cpp" - "${LIBRARY_DIR}/Foundation/src/LogFile.cpp" - "${LIBRARY_DIR}/Foundation/src/Logger.cpp" - "${LIBRARY_DIR}/Foundation/src/LoggingFactory.cpp" - "${LIBRARY_DIR}/Foundation/src/LoggingRegistry.cpp" - "${LIBRARY_DIR}/Foundation/src/LogStream.cpp" - "${LIBRARY_DIR}/Foundation/src/Manifest.cpp" - "${LIBRARY_DIR}/Foundation/src/MD4Engine.cpp" - "${LIBRARY_DIR}/Foundation/src/MD5Engine.cpp" - "${LIBRARY_DIR}/Foundation/src/MemoryPool.cpp" - "${LIBRARY_DIR}/Foundation/src/MemoryStream.cpp" - "${LIBRARY_DIR}/Foundation/src/Message.cpp" - "${LIBRARY_DIR}/Foundation/src/Mutex.cpp" - "${LIBRARY_DIR}/Foundation/src/NamedEvent.cpp" - "${LIBRARY_DIR}/Foundation/src/NamedMutex.cpp" - "${LIBRARY_DIR}/Foundation/src/NestedDiagnosticContext.cpp" - "${LIBRARY_DIR}/Foundation/src/Notification.cpp" - "${LIBRARY_DIR}/Foundation/src/NotificationCenter.cpp" - "${LIBRARY_DIR}/Foundation/src/NotificationQueue.cpp" - "${LIBRARY_DIR}/Foundation/src/NullChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/NullStream.cpp" - "${LIBRARY_DIR}/Foundation/src/NumberFormatter.cpp" - "${LIBRARY_DIR}/Foundation/src/NumberParser.cpp" - "${LIBRARY_DIR}/Foundation/src/NumericString.cpp" - "${LIBRARY_DIR}/Foundation/src/Path.cpp" - "${LIBRARY_DIR}/Foundation/src/PatternFormatter.cpp" - "${LIBRARY_DIR}/Foundation/src/Pipe.cpp" - "${LIBRARY_DIR}/Foundation/src/PipeImpl.cpp" - "${LIBRARY_DIR}/Foundation/src/PipeStream.cpp" - "${LIBRARY_DIR}/Foundation/src/PriorityNotificationQueue.cpp" - "${LIBRARY_DIR}/Foundation/src/Process.cpp" - "${LIBRARY_DIR}/Foundation/src/PurgeStrategy.cpp" - "${LIBRARY_DIR}/Foundation/src/Random.cpp" - "${LIBRARY_DIR}/Foundation/src/RandomStream.cpp" - "${LIBRARY_DIR}/Foundation/src/RefCountedObject.cpp" - "${LIBRARY_DIR}/Foundation/src/RegularExpression.cpp" - "${LIBRARY_DIR}/Foundation/src/RotateStrategy.cpp" - "${LIBRARY_DIR}/Foundation/src/Runnable.cpp" - "${LIBRARY_DIR}/Foundation/src/RWLock.cpp" - "${LIBRARY_DIR}/Foundation/src/Semaphore.cpp" - "${LIBRARY_DIR}/Foundation/src/SHA1Engine.cpp" - "${LIBRARY_DIR}/Foundation/src/SharedLibrary.cpp" - "${LIBRARY_DIR}/Foundation/src/SharedMemory.cpp" - "${LIBRARY_DIR}/Foundation/src/SignalHandler.cpp" - "${LIBRARY_DIR}/Foundation/src/SimpleFileChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/SortedDirectoryIterator.cpp" - "${LIBRARY_DIR}/Foundation/src/SplitterChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/Stopwatch.cpp" - "${LIBRARY_DIR}/Foundation/src/StreamChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/StreamConverter.cpp" - "${LIBRARY_DIR}/Foundation/src/StreamCopier.cpp" - "${LIBRARY_DIR}/Foundation/src/StreamTokenizer.cpp" - "${LIBRARY_DIR}/Foundation/src/String.cpp" - "${LIBRARY_DIR}/Foundation/src/StringTokenizer.cpp" - "${LIBRARY_DIR}/Foundation/src/SynchronizedObject.cpp" - "${LIBRARY_DIR}/Foundation/src/SyslogChannel.cpp" - "${LIBRARY_DIR}/Foundation/src/Task.cpp" - "${LIBRARY_DIR}/Foundation/src/TaskManager.cpp" - "${LIBRARY_DIR}/Foundation/src/TaskNotification.cpp" - "${LIBRARY_DIR}/Foundation/src/TeeStream.cpp" - "${LIBRARY_DIR}/Foundation/src/TemporaryFile.cpp" - "${LIBRARY_DIR}/Foundation/src/TextBufferIterator.cpp" - "${LIBRARY_DIR}/Foundation/src/TextConverter.cpp" - "${LIBRARY_DIR}/Foundation/src/TextEncoding.cpp" - "${LIBRARY_DIR}/Foundation/src/TextIterator.cpp" - "${LIBRARY_DIR}/Foundation/src/Thread.cpp" - "${LIBRARY_DIR}/Foundation/src/ThreadLocal.cpp" - "${LIBRARY_DIR}/Foundation/src/ThreadPool.cpp" - "${LIBRARY_DIR}/Foundation/src/ThreadTarget.cpp" - "${LIBRARY_DIR}/Foundation/src/TimedNotificationQueue.cpp" - "${LIBRARY_DIR}/Foundation/src/Timer.cpp" - "${LIBRARY_DIR}/Foundation/src/Timespan.cpp" - "${LIBRARY_DIR}/Foundation/src/Timestamp.cpp" - "${LIBRARY_DIR}/Foundation/src/Timezone.cpp" - "${LIBRARY_DIR}/Foundation/src/Token.cpp" - "${LIBRARY_DIR}/Foundation/src/Unicode.cpp" - "${LIBRARY_DIR}/Foundation/src/UnicodeConverter.cpp" - "${LIBRARY_DIR}/Foundation/src/URI.cpp" - "${LIBRARY_DIR}/Foundation/src/URIStreamFactory.cpp" - "${LIBRARY_DIR}/Foundation/src/URIStreamOpener.cpp" - "${LIBRARY_DIR}/Foundation/src/UTF16Encoding.cpp" - "${LIBRARY_DIR}/Foundation/src/UTF32Encoding.cpp" - "${LIBRARY_DIR}/Foundation/src/UTF8Encoding.cpp" - "${LIBRARY_DIR}/Foundation/src/UTF8String.cpp" - "${LIBRARY_DIR}/Foundation/src/UUID.cpp" - "${LIBRARY_DIR}/Foundation/src/UUIDGenerator.cpp" - "${LIBRARY_DIR}/Foundation/src/Var.cpp" - "${LIBRARY_DIR}/Foundation/src/VarHolder.cpp" - "${LIBRARY_DIR}/Foundation/src/VarIterator.cpp" - "${LIBRARY_DIR}/Foundation/src/Void.cpp" - "${LIBRARY_DIR}/Foundation/src/Windows1250Encoding.cpp" - "${LIBRARY_DIR}/Foundation/src/Windows1251Encoding.cpp" - "${LIBRARY_DIR}/Foundation/src/Windows1252Encoding.cpp" - ) +set (SRCS + "${LIBRARY_DIR}/Foundation/src/AbstractObserver.cpp" + "${LIBRARY_DIR}/Foundation/src/ActiveDispatcher.cpp" + "${LIBRARY_DIR}/Foundation/src/ArchiveStrategy.cpp" + "${LIBRARY_DIR}/Foundation/src/Ascii.cpp" + "${LIBRARY_DIR}/Foundation/src/ASCIIEncoding.cpp" + "${LIBRARY_DIR}/Foundation/src/AsyncChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/AtomicCounter.cpp" + "${LIBRARY_DIR}/Foundation/src/Base32Decoder.cpp" + "${LIBRARY_DIR}/Foundation/src/Base32Encoder.cpp" + "${LIBRARY_DIR}/Foundation/src/Base64Decoder.cpp" + "${LIBRARY_DIR}/Foundation/src/Base64Encoder.cpp" + "${LIBRARY_DIR}/Foundation/src/BinaryReader.cpp" + "${LIBRARY_DIR}/Foundation/src/BinaryWriter.cpp" + "${LIBRARY_DIR}/Foundation/src/Bugcheck.cpp" + "${LIBRARY_DIR}/Foundation/src/ByteOrder.cpp" + "${LIBRARY_DIR}/Foundation/src/Channel.cpp" + "${LIBRARY_DIR}/Foundation/src/Checksum.cpp" + "${LIBRARY_DIR}/Foundation/src/Clock.cpp" + "${LIBRARY_DIR}/Foundation/src/CompressedLogFile.cpp" + "${LIBRARY_DIR}/Foundation/src/Condition.cpp" + "${LIBRARY_DIR}/Foundation/src/Configurable.cpp" + "${LIBRARY_DIR}/Foundation/src/ConsoleChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/CountingStream.cpp" + "${LIBRARY_DIR}/Foundation/src/DateTime.cpp" + "${LIBRARY_DIR}/Foundation/src/DateTimeFormat.cpp" + "${LIBRARY_DIR}/Foundation/src/DateTimeFormatter.cpp" + "${LIBRARY_DIR}/Foundation/src/DateTimeParser.cpp" + "${LIBRARY_DIR}/Foundation/src/Debugger.cpp" + "${LIBRARY_DIR}/Foundation/src/DeflatingStream.cpp" + "${LIBRARY_DIR}/Foundation/src/DigestEngine.cpp" + "${LIBRARY_DIR}/Foundation/src/DigestStream.cpp" + "${LIBRARY_DIR}/Foundation/src/DirectoryIterator.cpp" + "${LIBRARY_DIR}/Foundation/src/DirectoryIteratorStrategy.cpp" + "${LIBRARY_DIR}/Foundation/src/DirectoryWatcher.cpp" + "${LIBRARY_DIR}/Foundation/src/Environment.cpp" + "${LIBRARY_DIR}/Foundation/src/Error.cpp" + "${LIBRARY_DIR}/Foundation/src/ErrorHandler.cpp" + "${LIBRARY_DIR}/Foundation/src/Event.cpp" + "${LIBRARY_DIR}/Foundation/src/EventArgs.cpp" + "${LIBRARY_DIR}/Foundation/src/EventChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/Exception.cpp" + "${LIBRARY_DIR}/Foundation/src/FIFOBufferStream.cpp" + "${LIBRARY_DIR}/Foundation/src/File.cpp" + "${LIBRARY_DIR}/Foundation/src/FileChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/FileStream.cpp" + "${LIBRARY_DIR}/Foundation/src/FileStreamFactory.cpp" + "${LIBRARY_DIR}/Foundation/src/Format.cpp" + "${LIBRARY_DIR}/Foundation/src/Formatter.cpp" + "${LIBRARY_DIR}/Foundation/src/FormattingChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/FPEnvironment.cpp" + "${LIBRARY_DIR}/Foundation/src/Glob.cpp" + "${LIBRARY_DIR}/Foundation/src/Hash.cpp" + "${LIBRARY_DIR}/Foundation/src/HashStatistic.cpp" + "${LIBRARY_DIR}/Foundation/src/HexBinaryDecoder.cpp" + "${LIBRARY_DIR}/Foundation/src/HexBinaryEncoder.cpp" + "${LIBRARY_DIR}/Foundation/src/InflatingStream.cpp" + "${LIBRARY_DIR}/Foundation/src/JSONString.cpp" + "${LIBRARY_DIR}/Foundation/src/Latin1Encoding.cpp" + "${LIBRARY_DIR}/Foundation/src/Latin2Encoding.cpp" + "${LIBRARY_DIR}/Foundation/src/Latin9Encoding.cpp" + "${LIBRARY_DIR}/Foundation/src/LineEndingConverter.cpp" + "${LIBRARY_DIR}/Foundation/src/LocalDateTime.cpp" + "${LIBRARY_DIR}/Foundation/src/LogFile.cpp" + "${LIBRARY_DIR}/Foundation/src/Logger.cpp" + "${LIBRARY_DIR}/Foundation/src/LoggingFactory.cpp" + "${LIBRARY_DIR}/Foundation/src/LoggingRegistry.cpp" + "${LIBRARY_DIR}/Foundation/src/LogStream.cpp" + "${LIBRARY_DIR}/Foundation/src/Manifest.cpp" + "${LIBRARY_DIR}/Foundation/src/MD4Engine.cpp" + "${LIBRARY_DIR}/Foundation/src/MD5Engine.cpp" + "${LIBRARY_DIR}/Foundation/src/MemoryPool.cpp" + "${LIBRARY_DIR}/Foundation/src/MemoryStream.cpp" + "${LIBRARY_DIR}/Foundation/src/Message.cpp" + "${LIBRARY_DIR}/Foundation/src/Mutex.cpp" + "${LIBRARY_DIR}/Foundation/src/NamedEvent.cpp" + "${LIBRARY_DIR}/Foundation/src/NamedMutex.cpp" + "${LIBRARY_DIR}/Foundation/src/NestedDiagnosticContext.cpp" + "${LIBRARY_DIR}/Foundation/src/Notification.cpp" + "${LIBRARY_DIR}/Foundation/src/NotificationCenter.cpp" + "${LIBRARY_DIR}/Foundation/src/NotificationQueue.cpp" + "${LIBRARY_DIR}/Foundation/src/NullChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/NullStream.cpp" + "${LIBRARY_DIR}/Foundation/src/NumberFormatter.cpp" + "${LIBRARY_DIR}/Foundation/src/NumberParser.cpp" + "${LIBRARY_DIR}/Foundation/src/NumericString.cpp" + "${LIBRARY_DIR}/Foundation/src/Path.cpp" + "${LIBRARY_DIR}/Foundation/src/PatternFormatter.cpp" + "${LIBRARY_DIR}/Foundation/src/Pipe.cpp" + "${LIBRARY_DIR}/Foundation/src/PipeImpl.cpp" + "${LIBRARY_DIR}/Foundation/src/PipeStream.cpp" + "${LIBRARY_DIR}/Foundation/src/PriorityNotificationQueue.cpp" + "${LIBRARY_DIR}/Foundation/src/Process.cpp" + "${LIBRARY_DIR}/Foundation/src/PurgeStrategy.cpp" + "${LIBRARY_DIR}/Foundation/src/Random.cpp" + "${LIBRARY_DIR}/Foundation/src/RandomStream.cpp" + "${LIBRARY_DIR}/Foundation/src/RefCountedObject.cpp" + "${LIBRARY_DIR}/Foundation/src/RegularExpression.cpp" + "${LIBRARY_DIR}/Foundation/src/RotateStrategy.cpp" + "${LIBRARY_DIR}/Foundation/src/Runnable.cpp" + "${LIBRARY_DIR}/Foundation/src/RWLock.cpp" + "${LIBRARY_DIR}/Foundation/src/Semaphore.cpp" + "${LIBRARY_DIR}/Foundation/src/SHA1Engine.cpp" + "${LIBRARY_DIR}/Foundation/src/SharedLibrary.cpp" + "${LIBRARY_DIR}/Foundation/src/SharedMemory.cpp" + "${LIBRARY_DIR}/Foundation/src/SignalHandler.cpp" + "${LIBRARY_DIR}/Foundation/src/SimpleFileChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/SortedDirectoryIterator.cpp" + "${LIBRARY_DIR}/Foundation/src/SplitterChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/Stopwatch.cpp" + "${LIBRARY_DIR}/Foundation/src/StreamChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/StreamConverter.cpp" + "${LIBRARY_DIR}/Foundation/src/StreamCopier.cpp" + "${LIBRARY_DIR}/Foundation/src/StreamTokenizer.cpp" + "${LIBRARY_DIR}/Foundation/src/String.cpp" + "${LIBRARY_DIR}/Foundation/src/StringTokenizer.cpp" + "${LIBRARY_DIR}/Foundation/src/SynchronizedObject.cpp" + "${LIBRARY_DIR}/Foundation/src/SyslogChannel.cpp" + "${LIBRARY_DIR}/Foundation/src/Task.cpp" + "${LIBRARY_DIR}/Foundation/src/TaskManager.cpp" + "${LIBRARY_DIR}/Foundation/src/TaskNotification.cpp" + "${LIBRARY_DIR}/Foundation/src/TeeStream.cpp" + "${LIBRARY_DIR}/Foundation/src/TemporaryFile.cpp" + "${LIBRARY_DIR}/Foundation/src/TextBufferIterator.cpp" + "${LIBRARY_DIR}/Foundation/src/TextConverter.cpp" + "${LIBRARY_DIR}/Foundation/src/TextEncoding.cpp" + "${LIBRARY_DIR}/Foundation/src/TextIterator.cpp" + "${LIBRARY_DIR}/Foundation/src/Thread.cpp" + "${LIBRARY_DIR}/Foundation/src/ThreadLocal.cpp" + "${LIBRARY_DIR}/Foundation/src/ThreadPool.cpp" + "${LIBRARY_DIR}/Foundation/src/ThreadTarget.cpp" + "${LIBRARY_DIR}/Foundation/src/TimedNotificationQueue.cpp" + "${LIBRARY_DIR}/Foundation/src/Timer.cpp" + "${LIBRARY_DIR}/Foundation/src/Timespan.cpp" + "${LIBRARY_DIR}/Foundation/src/Timestamp.cpp" + "${LIBRARY_DIR}/Foundation/src/Timezone.cpp" + "${LIBRARY_DIR}/Foundation/src/Token.cpp" + "${LIBRARY_DIR}/Foundation/src/Unicode.cpp" + "${LIBRARY_DIR}/Foundation/src/UnicodeConverter.cpp" + "${LIBRARY_DIR}/Foundation/src/URI.cpp" + "${LIBRARY_DIR}/Foundation/src/URIStreamFactory.cpp" + "${LIBRARY_DIR}/Foundation/src/URIStreamOpener.cpp" + "${LIBRARY_DIR}/Foundation/src/UTF16Encoding.cpp" + "${LIBRARY_DIR}/Foundation/src/UTF32Encoding.cpp" + "${LIBRARY_DIR}/Foundation/src/UTF8Encoding.cpp" + "${LIBRARY_DIR}/Foundation/src/UTF8String.cpp" + "${LIBRARY_DIR}/Foundation/src/UUID.cpp" + "${LIBRARY_DIR}/Foundation/src/UUIDGenerator.cpp" + "${LIBRARY_DIR}/Foundation/src/Var.cpp" + "${LIBRARY_DIR}/Foundation/src/VarHolder.cpp" + "${LIBRARY_DIR}/Foundation/src/VarIterator.cpp" + "${LIBRARY_DIR}/Foundation/src/Void.cpp" + "${LIBRARY_DIR}/Foundation/src/Windows1250Encoding.cpp" + "${LIBRARY_DIR}/Foundation/src/Windows1251Encoding.cpp" + "${LIBRARY_DIR}/Foundation/src/Windows1252Encoding.cpp" +) - add_library (_poco_foundation ${SRCS}) - add_library (Poco::Foundation ALIAS _poco_foundation) +add_library (_poco_foundation ${SRCS}) +add_library (Poco::Foundation ALIAS _poco_foundation) - if (COMPILER_GCC) - target_compile_options (_poco_foundation - PRIVATE - -Wno-suggest-override - ) - elseif (COMPILER_CLANG) - target_compile_options (_poco_foundation - PRIVATE - -Wno-atomic-implicit-seq-cst - -Wno-deprecated - -Wno-extra-semi-stmt - -Wno-zero-as-null-pointer-constant - -Wno-implicit-int-float-conversion - -Wno-thread-safety-analysis - -Wno-thread-safety-negative - ) - endif () +if (COMPILER_GCC) target_compile_options (_poco_foundation PRIVATE - -Wno-sign-compare - -Wno-unused-parameter + -Wno-suggest-override ) - target_compile_definitions (_poco_foundation +elseif (COMPILER_CLANG) + target_compile_options (_poco_foundation PRIVATE - POCO_UNBUNDLED - POCO_UNBUNDLED_ZLIB - PUBLIC - POCO_ENABLE_CPP11 - POCO_OS_FAMILY_UNIX + -Wno-atomic-implicit-seq-cst + -Wno-deprecated + -Wno-extra-semi-stmt + -Wno-zero-as-null-pointer-constant + -Wno-implicit-int-float-conversion + -Wno-thread-safety-analysis + -Wno-thread-safety-negative ) - target_include_directories (_poco_foundation SYSTEM PUBLIC "${LIBRARY_DIR}/Foundation/include") - target_link_libraries (_poco_foundation PRIVATE Poco::Foundation::PCRE ${ZLIB_LIBRARIES} lz4) -else () - add_library (Poco::Foundation UNKNOWN IMPORTED GLOBAL) - - find_library (LIBRARY_POCO_FOUNDATION PocoFoundation) - find_path (INCLUDE_POCO_FOUNDATION Poco/Foundation.h) - set_target_properties (Poco::Foundation PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_FOUNDATION}) - set_target_properties (Poco::Foundation PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_FOUNDATION}) - - message (STATUS "Using Poco::Foundation: ${LIBRARY_POCO_FOUNDATION} ${INCLUDE_POCO_FOUNDATION}") endif () +target_compile_options (_poco_foundation + PRIVATE + -Wno-sign-compare + -Wno-unused-parameter +) +target_compile_definitions (_poco_foundation + PRIVATE + POCO_UNBUNDLED + POCO_UNBUNDLED_ZLIB + PUBLIC + POCO_ENABLE_CPP11 + POCO_OS_FAMILY_UNIX +) +target_include_directories (_poco_foundation SYSTEM PUBLIC "${LIBRARY_DIR}/Foundation/include") +target_link_libraries (_poco_foundation + PRIVATE + Poco::Foundation::PCRE + ch_contrib::zlib + ch_contrib::lz4) if(OS_DARWIN AND ARCH_AARCH64) target_compile_definitions (_poco_foundation diff --git a/contrib/poco-cmake/JSON/CMakeLists.txt b/contrib/poco-cmake/JSON/CMakeLists.txt index 7033b800d5d5..e138dd046a8a 100644 --- a/contrib/poco-cmake/JSON/CMakeLists.txt +++ b/contrib/poco-cmake/JSON/CMakeLists.txt @@ -1,42 +1,31 @@ -if (USE_INTERNAL_POCO_LIBRARY) - # Poco::JSON (pdjson) - - set (SRCS_PDJSON - "${LIBRARY_DIR}/JSON/src/pdjson.c" - ) - - add_library (_poco_json_pdjson ${SRCS_PDJSON}) - add_library (Poco::JSON::Pdjson ALIAS _poco_json_pdjson) - - # Poco::JSON - - set (SRCS - "${LIBRARY_DIR}/JSON/src/Array.cpp" - "${LIBRARY_DIR}/JSON/src/Handler.cpp" - "${LIBRARY_DIR}/JSON/src/JSONException.cpp" - "${LIBRARY_DIR}/JSON/src/Object.cpp" - "${LIBRARY_DIR}/JSON/src/ParseHandler.cpp" - "${LIBRARY_DIR}/JSON/src/Parser.cpp" - "${LIBRARY_DIR}/JSON/src/ParserImpl.cpp" - "${LIBRARY_DIR}/JSON/src/PrintHandler.cpp" - "${LIBRARY_DIR}/JSON/src/Query.cpp" - "${LIBRARY_DIR}/JSON/src/Stringifier.cpp" - "${LIBRARY_DIR}/JSON/src/Template.cpp" - "${LIBRARY_DIR}/JSON/src/TemplateCache.cpp" - ) - - add_library (_poco_json ${SRCS}) - add_library (Poco::JSON ALIAS _poco_json) - - target_include_directories (_poco_json SYSTEM PUBLIC "${LIBRARY_DIR}/JSON/include") - target_link_libraries (_poco_json PUBLIC Poco::Foundation Poco::JSON::Pdjson) -else () - add_library (Poco::JSON UNKNOWN IMPORTED GLOBAL) - - find_library (LIBRARY_POCO_JSON PocoJSON) - find_path (INCLUDE_POCO_JSON Poco/JSON/JSON.h) - set_target_properties (Poco::JSON PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_JSON}) - set_target_properties (Poco::JSON PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_JSON}) - - message (STATUS "Using Poco::JSON: ${LIBRARY_POCO_JSON} ${INCLUDE_POCO_JSON}") -endif () +# Poco::JSON (pdjson) + +set (SRCS_PDJSON + "${LIBRARY_DIR}/JSON/src/pdjson.c" +) + +add_library (_poco_json_pdjson ${SRCS_PDJSON}) +add_library (Poco::JSON::Pdjson ALIAS _poco_json_pdjson) + +# Poco::JSON + +set (SRCS + "${LIBRARY_DIR}/JSON/src/Array.cpp" + "${LIBRARY_DIR}/JSON/src/Handler.cpp" + "${LIBRARY_DIR}/JSON/src/JSONException.cpp" + "${LIBRARY_DIR}/JSON/src/Object.cpp" + "${LIBRARY_DIR}/JSON/src/ParseHandler.cpp" + "${LIBRARY_DIR}/JSON/src/Parser.cpp" + "${LIBRARY_DIR}/JSON/src/ParserImpl.cpp" + "${LIBRARY_DIR}/JSON/src/PrintHandler.cpp" + "${LIBRARY_DIR}/JSON/src/Query.cpp" + "${LIBRARY_DIR}/JSON/src/Stringifier.cpp" + "${LIBRARY_DIR}/JSON/src/Template.cpp" + "${LIBRARY_DIR}/JSON/src/TemplateCache.cpp" +) + +add_library (_poco_json ${SRCS}) +add_library (Poco::JSON ALIAS _poco_json) + +target_include_directories (_poco_json SYSTEM PUBLIC "${LIBRARY_DIR}/JSON/include") +target_link_libraries (_poco_json PUBLIC Poco::Foundation Poco::JSON::Pdjson) diff --git a/contrib/poco-cmake/MongoDB/CMakeLists.txt b/contrib/poco-cmake/MongoDB/CMakeLists.txt index e3dce7ac5cdb..fec256b4dcd8 100644 --- a/contrib/poco-cmake/MongoDB/CMakeLists.txt +++ b/contrib/poco-cmake/MongoDB/CMakeLists.txt @@ -1,40 +1,29 @@ -if (USE_INTERNAL_POCO_LIBRARY) - set (SRCS - "${LIBRARY_DIR}/MongoDB/src/Array.cpp" - "${LIBRARY_DIR}/MongoDB/src/Binary.cpp" - "${LIBRARY_DIR}/MongoDB/src/Connection.cpp" - "${LIBRARY_DIR}/MongoDB/src/Cursor.cpp" - "${LIBRARY_DIR}/MongoDB/src/Database.cpp" - "${LIBRARY_DIR}/MongoDB/src/DeleteRequest.cpp" - "${LIBRARY_DIR}/MongoDB/src/Document.cpp" - "${LIBRARY_DIR}/MongoDB/src/Element.cpp" - "${LIBRARY_DIR}/MongoDB/src/GetMoreRequest.cpp" - "${LIBRARY_DIR}/MongoDB/src/InsertRequest.cpp" - "${LIBRARY_DIR}/MongoDB/src/JavaScriptCode.cpp" - "${LIBRARY_DIR}/MongoDB/src/KillCursorsRequest.cpp" - "${LIBRARY_DIR}/MongoDB/src/Message.cpp" - "${LIBRARY_DIR}/MongoDB/src/MessageHeader.cpp" - "${LIBRARY_DIR}/MongoDB/src/ObjectId.cpp" - "${LIBRARY_DIR}/MongoDB/src/QueryRequest.cpp" - "${LIBRARY_DIR}/MongoDB/src/RegularExpression.cpp" - "${LIBRARY_DIR}/MongoDB/src/ReplicaSet.cpp" - "${LIBRARY_DIR}/MongoDB/src/RequestMessage.cpp" - "${LIBRARY_DIR}/MongoDB/src/ResponseMessage.cpp" - "${LIBRARY_DIR}/MongoDB/src/UpdateRequest.cpp" - ) +set (SRCS + "${LIBRARY_DIR}/MongoDB/src/Array.cpp" + "${LIBRARY_DIR}/MongoDB/src/Binary.cpp" + "${LIBRARY_DIR}/MongoDB/src/Connection.cpp" + "${LIBRARY_DIR}/MongoDB/src/Cursor.cpp" + "${LIBRARY_DIR}/MongoDB/src/Database.cpp" + "${LIBRARY_DIR}/MongoDB/src/DeleteRequest.cpp" + "${LIBRARY_DIR}/MongoDB/src/Document.cpp" + "${LIBRARY_DIR}/MongoDB/src/Element.cpp" + "${LIBRARY_DIR}/MongoDB/src/GetMoreRequest.cpp" + "${LIBRARY_DIR}/MongoDB/src/InsertRequest.cpp" + "${LIBRARY_DIR}/MongoDB/src/JavaScriptCode.cpp" + "${LIBRARY_DIR}/MongoDB/src/KillCursorsRequest.cpp" + "${LIBRARY_DIR}/MongoDB/src/Message.cpp" + "${LIBRARY_DIR}/MongoDB/src/MessageHeader.cpp" + "${LIBRARY_DIR}/MongoDB/src/ObjectId.cpp" + "${LIBRARY_DIR}/MongoDB/src/QueryRequest.cpp" + "${LIBRARY_DIR}/MongoDB/src/RegularExpression.cpp" + "${LIBRARY_DIR}/MongoDB/src/ReplicaSet.cpp" + "${LIBRARY_DIR}/MongoDB/src/RequestMessage.cpp" + "${LIBRARY_DIR}/MongoDB/src/ResponseMessage.cpp" + "${LIBRARY_DIR}/MongoDB/src/UpdateRequest.cpp" +) - add_library (_poco_mongodb ${SRCS}) - add_library (Poco::MongoDB ALIAS _poco_mongodb) +add_library (_poco_mongodb ${SRCS}) +add_library (Poco::MongoDB ALIAS _poco_mongodb) - target_include_directories (_poco_mongodb SYSTEM PUBLIC "${LIBRARY_DIR}/MongoDB/include") - target_link_libraries (_poco_mongodb PUBLIC Poco::Net) -else () - add_library (Poco::MongoDB UNKNOWN IMPORTED GLOBAL) - - find_library (LIBRARY_POCO_MONGODB PocoMongoDB) - find_path (INCLUDE_POCO_MONGODB Poco/MongoDB/MongoDB.h) - set_target_properties (Poco::MongoDB PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_MONGODB}) - set_target_properties (Poco::MongoDB PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_MONGODB}) - - message (STATUS "Using Poco::MongoDB: ${LIBRARY_POCO_MONGODB} ${INCLUDE_POCO_MONGODB}") -endif () +target_include_directories (_poco_mongodb SYSTEM PUBLIC "${LIBRARY_DIR}/MongoDB/include") +target_link_libraries (_poco_mongodb PUBLIC Poco::Net) diff --git a/contrib/poco-cmake/Net/CMakeLists.txt b/contrib/poco-cmake/Net/CMakeLists.txt index 45989af8d451..30ff799ccfc4 100644 --- a/contrib/poco-cmake/Net/CMakeLists.txt +++ b/contrib/poco-cmake/Net/CMakeLists.txt @@ -1,139 +1,128 @@ -if (USE_INTERNAL_POCO_LIBRARY) - set (SRCS - "${LIBRARY_DIR}/Net/src/AbstractHTTPRequestHandler.cpp" - "${LIBRARY_DIR}/Net/src/DatagramSocket.cpp" - "${LIBRARY_DIR}/Net/src/DatagramSocketImpl.cpp" - "${LIBRARY_DIR}/Net/src/DialogSocket.cpp" - "${LIBRARY_DIR}/Net/src/DNS.cpp" - "${LIBRARY_DIR}/Net/src/FilePartSource.cpp" - "${LIBRARY_DIR}/Net/src/FTPClientSession.cpp" - "${LIBRARY_DIR}/Net/src/FTPStreamFactory.cpp" - "${LIBRARY_DIR}/Net/src/HostEntry.cpp" - "${LIBRARY_DIR}/Net/src/HTMLForm.cpp" - "${LIBRARY_DIR}/Net/src/HTTPAuthenticationParams.cpp" - "${LIBRARY_DIR}/Net/src/HTTPBasicCredentials.cpp" - "${LIBRARY_DIR}/Net/src/HTTPBufferAllocator.cpp" - "${LIBRARY_DIR}/Net/src/HTTPChunkedStream.cpp" - "${LIBRARY_DIR}/Net/src/HTTPClientSession.cpp" - "${LIBRARY_DIR}/Net/src/HTTPCookie.cpp" - "${LIBRARY_DIR}/Net/src/HTTPCredentials.cpp" - "${LIBRARY_DIR}/Net/src/HTTPDigestCredentials.cpp" - "${LIBRARY_DIR}/Net/src/HTTPFixedLengthStream.cpp" - "${LIBRARY_DIR}/Net/src/HTTPHeaderStream.cpp" - "${LIBRARY_DIR}/Net/src/HTTPIOStream.cpp" - "${LIBRARY_DIR}/Net/src/HTTPMessage.cpp" - "${LIBRARY_DIR}/Net/src/HTTPRequest.cpp" - "${LIBRARY_DIR}/Net/src/HTTPRequestHandler.cpp" - "${LIBRARY_DIR}/Net/src/HTTPRequestHandlerFactory.cpp" - "${LIBRARY_DIR}/Net/src/HTTPResponse.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServer.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServerConnection.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServerConnectionFactory.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServerParams.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServerRequest.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServerRequestImpl.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServerResponse.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServerResponseImpl.cpp" - "${LIBRARY_DIR}/Net/src/HTTPServerSession.cpp" - "${LIBRARY_DIR}/Net/src/HTTPSession.cpp" - "${LIBRARY_DIR}/Net/src/HTTPSessionFactory.cpp" - "${LIBRARY_DIR}/Net/src/HTTPSessionInstantiator.cpp" - "${LIBRARY_DIR}/Net/src/HTTPStream.cpp" - "${LIBRARY_DIR}/Net/src/HTTPStreamFactory.cpp" - "${LIBRARY_DIR}/Net/src/ICMPClient.cpp" - "${LIBRARY_DIR}/Net/src/ICMPEventArgs.cpp" - "${LIBRARY_DIR}/Net/src/ICMPPacket.cpp" - "${LIBRARY_DIR}/Net/src/ICMPPacketImpl.cpp" - "${LIBRARY_DIR}/Net/src/ICMPSocket.cpp" - "${LIBRARY_DIR}/Net/src/ICMPSocketImpl.cpp" - "${LIBRARY_DIR}/Net/src/ICMPv4PacketImpl.cpp" - "${LIBRARY_DIR}/Net/src/IPAddress.cpp" - "${LIBRARY_DIR}/Net/src/IPAddressImpl.cpp" - "${LIBRARY_DIR}/Net/src/MailMessage.cpp" - "${LIBRARY_DIR}/Net/src/MailRecipient.cpp" - "${LIBRARY_DIR}/Net/src/MailStream.cpp" - "${LIBRARY_DIR}/Net/src/MediaType.cpp" - "${LIBRARY_DIR}/Net/src/MessageHeader.cpp" - "${LIBRARY_DIR}/Net/src/MulticastSocket.cpp" - "${LIBRARY_DIR}/Net/src/MultipartReader.cpp" - "${LIBRARY_DIR}/Net/src/MultipartWriter.cpp" - "${LIBRARY_DIR}/Net/src/NameValueCollection.cpp" - "${LIBRARY_DIR}/Net/src/Net.cpp" - "${LIBRARY_DIR}/Net/src/NetException.cpp" - "${LIBRARY_DIR}/Net/src/NetworkInterface.cpp" - "${LIBRARY_DIR}/Net/src/NTPClient.cpp" - "${LIBRARY_DIR}/Net/src/NTPEventArgs.cpp" - "${LIBRARY_DIR}/Net/src/NTPPacket.cpp" - "${LIBRARY_DIR}/Net/src/NullPartHandler.cpp" - "${LIBRARY_DIR}/Net/src/OAuth10Credentials.cpp" - "${LIBRARY_DIR}/Net/src/OAuth20Credentials.cpp" - "${LIBRARY_DIR}/Net/src/PartHandler.cpp" - "${LIBRARY_DIR}/Net/src/PartSource.cpp" - "${LIBRARY_DIR}/Net/src/PartStore.cpp" - "${LIBRARY_DIR}/Net/src/PollSet.cpp" - "${LIBRARY_DIR}/Net/src/POP3ClientSession.cpp" - "${LIBRARY_DIR}/Net/src/QuotedPrintableDecoder.cpp" - "${LIBRARY_DIR}/Net/src/QuotedPrintableEncoder.cpp" - "${LIBRARY_DIR}/Net/src/RawSocket.cpp" - "${LIBRARY_DIR}/Net/src/RawSocketImpl.cpp" - "${LIBRARY_DIR}/Net/src/RemoteSyslogChannel.cpp" - "${LIBRARY_DIR}/Net/src/RemoteSyslogListener.cpp" - "${LIBRARY_DIR}/Net/src/ServerSocket.cpp" - "${LIBRARY_DIR}/Net/src/ServerSocketImpl.cpp" - "${LIBRARY_DIR}/Net/src/SMTPChannel.cpp" - "${LIBRARY_DIR}/Net/src/SMTPClientSession.cpp" - "${LIBRARY_DIR}/Net/src/Socket.cpp" - "${LIBRARY_DIR}/Net/src/SocketAddress.cpp" - "${LIBRARY_DIR}/Net/src/SocketAddressImpl.cpp" - "${LIBRARY_DIR}/Net/src/SocketImpl.cpp" - "${LIBRARY_DIR}/Net/src/SocketNotification.cpp" - "${LIBRARY_DIR}/Net/src/SocketNotifier.cpp" - "${LIBRARY_DIR}/Net/src/SocketReactor.cpp" - "${LIBRARY_DIR}/Net/src/SocketStream.cpp" - "${LIBRARY_DIR}/Net/src/StreamSocket.cpp" - "${LIBRARY_DIR}/Net/src/StreamSocketImpl.cpp" - "${LIBRARY_DIR}/Net/src/StringPartSource.cpp" - "${LIBRARY_DIR}/Net/src/TCPServer.cpp" - "${LIBRARY_DIR}/Net/src/TCPServerConnection.cpp" - "${LIBRARY_DIR}/Net/src/TCPServerConnectionFactory.cpp" - "${LIBRARY_DIR}/Net/src/TCPServerDispatcher.cpp" - "${LIBRARY_DIR}/Net/src/TCPServerParams.cpp" - "${LIBRARY_DIR}/Net/src/WebSocket.cpp" - "${LIBRARY_DIR}/Net/src/WebSocketImpl.cpp" - ) +set (SRCS + "${LIBRARY_DIR}/Net/src/AbstractHTTPRequestHandler.cpp" + "${LIBRARY_DIR}/Net/src/DatagramSocket.cpp" + "${LIBRARY_DIR}/Net/src/DatagramSocketImpl.cpp" + "${LIBRARY_DIR}/Net/src/DialogSocket.cpp" + "${LIBRARY_DIR}/Net/src/DNS.cpp" + "${LIBRARY_DIR}/Net/src/FilePartSource.cpp" + "${LIBRARY_DIR}/Net/src/FTPClientSession.cpp" + "${LIBRARY_DIR}/Net/src/FTPStreamFactory.cpp" + "${LIBRARY_DIR}/Net/src/HostEntry.cpp" + "${LIBRARY_DIR}/Net/src/HTMLForm.cpp" + "${LIBRARY_DIR}/Net/src/HTTPAuthenticationParams.cpp" + "${LIBRARY_DIR}/Net/src/HTTPBasicCredentials.cpp" + "${LIBRARY_DIR}/Net/src/HTTPBufferAllocator.cpp" + "${LIBRARY_DIR}/Net/src/HTTPChunkedStream.cpp" + "${LIBRARY_DIR}/Net/src/HTTPClientSession.cpp" + "${LIBRARY_DIR}/Net/src/HTTPCookie.cpp" + "${LIBRARY_DIR}/Net/src/HTTPCredentials.cpp" + "${LIBRARY_DIR}/Net/src/HTTPDigestCredentials.cpp" + "${LIBRARY_DIR}/Net/src/HTTPFixedLengthStream.cpp" + "${LIBRARY_DIR}/Net/src/HTTPHeaderStream.cpp" + "${LIBRARY_DIR}/Net/src/HTTPIOStream.cpp" + "${LIBRARY_DIR}/Net/src/HTTPMessage.cpp" + "${LIBRARY_DIR}/Net/src/HTTPRequest.cpp" + "${LIBRARY_DIR}/Net/src/HTTPRequestHandler.cpp" + "${LIBRARY_DIR}/Net/src/HTTPRequestHandlerFactory.cpp" + "${LIBRARY_DIR}/Net/src/HTTPResponse.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServer.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServerConnection.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServerConnectionFactory.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServerParams.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServerRequest.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServerRequestImpl.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServerResponse.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServerResponseImpl.cpp" + "${LIBRARY_DIR}/Net/src/HTTPServerSession.cpp" + "${LIBRARY_DIR}/Net/src/HTTPSession.cpp" + "${LIBRARY_DIR}/Net/src/HTTPSessionFactory.cpp" + "${LIBRARY_DIR}/Net/src/HTTPSessionInstantiator.cpp" + "${LIBRARY_DIR}/Net/src/HTTPStream.cpp" + "${LIBRARY_DIR}/Net/src/HTTPStreamFactory.cpp" + "${LIBRARY_DIR}/Net/src/ICMPClient.cpp" + "${LIBRARY_DIR}/Net/src/ICMPEventArgs.cpp" + "${LIBRARY_DIR}/Net/src/ICMPPacket.cpp" + "${LIBRARY_DIR}/Net/src/ICMPPacketImpl.cpp" + "${LIBRARY_DIR}/Net/src/ICMPSocket.cpp" + "${LIBRARY_DIR}/Net/src/ICMPSocketImpl.cpp" + "${LIBRARY_DIR}/Net/src/ICMPv4PacketImpl.cpp" + "${LIBRARY_DIR}/Net/src/IPAddress.cpp" + "${LIBRARY_DIR}/Net/src/IPAddressImpl.cpp" + "${LIBRARY_DIR}/Net/src/MailMessage.cpp" + "${LIBRARY_DIR}/Net/src/MailRecipient.cpp" + "${LIBRARY_DIR}/Net/src/MailStream.cpp" + "${LIBRARY_DIR}/Net/src/MediaType.cpp" + "${LIBRARY_DIR}/Net/src/MessageHeader.cpp" + "${LIBRARY_DIR}/Net/src/MulticastSocket.cpp" + "${LIBRARY_DIR}/Net/src/MultipartReader.cpp" + "${LIBRARY_DIR}/Net/src/MultipartWriter.cpp" + "${LIBRARY_DIR}/Net/src/NameValueCollection.cpp" + "${LIBRARY_DIR}/Net/src/Net.cpp" + "${LIBRARY_DIR}/Net/src/NetException.cpp" + "${LIBRARY_DIR}/Net/src/NetworkInterface.cpp" + "${LIBRARY_DIR}/Net/src/NTPClient.cpp" + "${LIBRARY_DIR}/Net/src/NTPEventArgs.cpp" + "${LIBRARY_DIR}/Net/src/NTPPacket.cpp" + "${LIBRARY_DIR}/Net/src/NullPartHandler.cpp" + "${LIBRARY_DIR}/Net/src/OAuth10Credentials.cpp" + "${LIBRARY_DIR}/Net/src/OAuth20Credentials.cpp" + "${LIBRARY_DIR}/Net/src/PartHandler.cpp" + "${LIBRARY_DIR}/Net/src/PartSource.cpp" + "${LIBRARY_DIR}/Net/src/PartStore.cpp" + "${LIBRARY_DIR}/Net/src/PollSet.cpp" + "${LIBRARY_DIR}/Net/src/POP3ClientSession.cpp" + "${LIBRARY_DIR}/Net/src/QuotedPrintableDecoder.cpp" + "${LIBRARY_DIR}/Net/src/QuotedPrintableEncoder.cpp" + "${LIBRARY_DIR}/Net/src/RawSocket.cpp" + "${LIBRARY_DIR}/Net/src/RawSocketImpl.cpp" + "${LIBRARY_DIR}/Net/src/RemoteSyslogChannel.cpp" + "${LIBRARY_DIR}/Net/src/RemoteSyslogListener.cpp" + "${LIBRARY_DIR}/Net/src/ServerSocket.cpp" + "${LIBRARY_DIR}/Net/src/ServerSocketImpl.cpp" + "${LIBRARY_DIR}/Net/src/SMTPChannel.cpp" + "${LIBRARY_DIR}/Net/src/SMTPClientSession.cpp" + "${LIBRARY_DIR}/Net/src/Socket.cpp" + "${LIBRARY_DIR}/Net/src/SocketAddress.cpp" + "${LIBRARY_DIR}/Net/src/SocketAddressImpl.cpp" + "${LIBRARY_DIR}/Net/src/SocketImpl.cpp" + "${LIBRARY_DIR}/Net/src/SocketNotification.cpp" + "${LIBRARY_DIR}/Net/src/SocketNotifier.cpp" + "${LIBRARY_DIR}/Net/src/SocketReactor.cpp" + "${LIBRARY_DIR}/Net/src/SocketStream.cpp" + "${LIBRARY_DIR}/Net/src/StreamSocket.cpp" + "${LIBRARY_DIR}/Net/src/StreamSocketImpl.cpp" + "${LIBRARY_DIR}/Net/src/StringPartSource.cpp" + "${LIBRARY_DIR}/Net/src/TCPServer.cpp" + "${LIBRARY_DIR}/Net/src/TCPServerConnection.cpp" + "${LIBRARY_DIR}/Net/src/TCPServerConnectionFactory.cpp" + "${LIBRARY_DIR}/Net/src/TCPServerDispatcher.cpp" + "${LIBRARY_DIR}/Net/src/TCPServerParams.cpp" + "${LIBRARY_DIR}/Net/src/WebSocket.cpp" + "${LIBRARY_DIR}/Net/src/WebSocketImpl.cpp" +) - add_library (_poco_net ${SRCS}) - add_library (Poco::Net ALIAS _poco_net) +add_library (_poco_net ${SRCS}) +add_library (Poco::Net ALIAS _poco_net) - if (OS_LINUX) - target_compile_definitions (_poco_net PUBLIC POCO_HAVE_FD_EPOLL) - elseif (OS_DARWIN OR OS_FREEBSD) - target_compile_definitions (_poco_net PUBLIC POCO_HAVE_FD_POLL) - endif () +if (OS_LINUX) + target_compile_definitions (_poco_net PUBLIC POCO_HAVE_FD_EPOLL) +elseif (OS_DARWIN OR OS_FREEBSD) + target_compile_definitions (_poco_net PUBLIC POCO_HAVE_FD_POLL) +endif () - if (COMPILER_CLANG) - # clang-specific warnings - target_compile_options (_poco_net - PRIVATE - -Wno-atomic-implicit-seq-cst - -Wno-extra-semi-stmt - -Wno-extra-semi - ) - endif () +if (COMPILER_CLANG) + # clang-specific warnings target_compile_options (_poco_net PRIVATE - -Wno-deprecated + -Wno-atomic-implicit-seq-cst + -Wno-extra-semi-stmt -Wno-extra-semi ) - target_include_directories (_poco_net SYSTEM PUBLIC "${LIBRARY_DIR}/Net/include") - target_link_libraries (_poco_net PUBLIC Poco::Foundation) -else () - add_library (Poco::Net UNKNOWN IMPORTED GLOBAL) - - find_library (LIBRARY_POCO_NET PocoNet) - find_path (INCLUDE_POCO_NET Poco/Net/Net.h) - set_target_properties (Poco::Net PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_NET}) - set_target_properties (Poco::Net PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_NET}) - - message (STATUS "Using Poco::Net: ${LIBRARY_POCO_NET} ${INCLUDE_POCO_NET}") endif () +target_compile_options (_poco_net + PRIVATE + -Wno-deprecated + -Wno-extra-semi +) +target_include_directories (_poco_net SYSTEM PUBLIC "${LIBRARY_DIR}/Net/include") +target_link_libraries (_poco_net PUBLIC Poco::Foundation) diff --git a/contrib/poco-cmake/Net/SSL/CMakeLists.txt b/contrib/poco-cmake/Net/SSL/CMakeLists.txt index 4b3adacfb8f0..de2bb624a8bd 100644 --- a/contrib/poco-cmake/Net/SSL/CMakeLists.txt +++ b/contrib/poco-cmake/Net/SSL/CMakeLists.txt @@ -1,50 +1,39 @@ if (ENABLE_SSL) - if (USE_INTERNAL_POCO_LIBRARY) - set (SRCS - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/AcceptCertificateHandler.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/CertificateHandlerFactory.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/CertificateHandlerFactoryMgr.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/ConsoleCertificateHandler.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/Context.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/HTTPSClientSession.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/HTTPSSessionInstantiator.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/HTTPSStreamFactory.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/InvalidCertificateHandler.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/KeyConsoleHandler.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/KeyFileHandler.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/PrivateKeyFactory.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/PrivateKeyFactoryMgr.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/PrivateKeyPassphraseHandler.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/RejectCertificateHandler.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureServerSocket.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureServerSocketImpl.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureSMTPClientSession.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureSocketImpl.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureStreamSocket.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureStreamSocketImpl.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/Session.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SSLException.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SSLManager.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/Utility.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/VerificationErrorArgs.cpp" - "${LIBRARY_DIR}/NetSSL_OpenSSL/src/X509Certificate.cpp" - ) + set (SRCS + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/AcceptCertificateHandler.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/CertificateHandlerFactory.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/CertificateHandlerFactoryMgr.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/ConsoleCertificateHandler.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/Context.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/HTTPSClientSession.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/HTTPSSessionInstantiator.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/HTTPSStreamFactory.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/InvalidCertificateHandler.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/KeyConsoleHandler.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/KeyFileHandler.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/PrivateKeyFactory.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/PrivateKeyFactoryMgr.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/PrivateKeyPassphraseHandler.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/RejectCertificateHandler.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureServerSocket.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureServerSocketImpl.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureSMTPClientSession.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureSocketImpl.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureStreamSocket.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SecureStreamSocketImpl.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/Session.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SSLException.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/SSLManager.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/Utility.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/VerificationErrorArgs.cpp" + "${LIBRARY_DIR}/NetSSL_OpenSSL/src/X509Certificate.cpp" + ) - add_library (_poco_net_ssl ${SRCS}) - add_library (Poco::Net::SSL ALIAS _poco_net_ssl) - - target_include_directories (_poco_net_ssl SYSTEM PUBLIC "${LIBRARY_DIR}/NetSSL_OpenSSL/include") - target_link_libraries (_poco_net_ssl PUBLIC Poco::Crypto Poco::Net Poco::Util) - else () - add_library (Poco::Net::SSL UNKNOWN IMPORTED GLOBAL) - - find_library (LIBRARY_POCO_NET_SSL PocoNetSSL) - find_path (INCLUDE_POCO_NET_SSL Poco/Net/NetSSL.h) - set_target_properties (Poco::Net::SSL PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_NET_SSL}) - set_target_properties (Poco::Net::SSL PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_NET_SSL}) + add_library (_poco_net_ssl ${SRCS}) + add_library (Poco::Net::SSL ALIAS _poco_net_ssl) - message (STATUS "Using Poco::Net::SSL: ${LIBRARY_POCO_NET_SSL} ${INCLUDE_POCO_NET_SSL}") - endif () + target_include_directories (_poco_net_ssl SYSTEM PUBLIC "${LIBRARY_DIR}/NetSSL_OpenSSL/include") + target_link_libraries (_poco_net_ssl PUBLIC Poco::Crypto Poco::Net Poco::Util) else () add_library (_poco_net_ssl INTERFACE) add_library (Poco::Net::SSL ALIAS _poco_net_ssl) diff --git a/contrib/poco-cmake/Redis/CMakeLists.txt b/contrib/poco-cmake/Redis/CMakeLists.txt index b5892addd852..98e86a8592bf 100644 --- a/contrib/poco-cmake/Redis/CMakeLists.txt +++ b/contrib/poco-cmake/Redis/CMakeLists.txt @@ -1,34 +1,21 @@ -if (USE_INTERNAL_POCO_LIBRARY) - set (SRCS - "${LIBRARY_DIR}/Redis/src/Array.cpp" - "${LIBRARY_DIR}/Redis/src/AsyncReader.cpp" - "${LIBRARY_DIR}/Redis/src/Client.cpp" - "${LIBRARY_DIR}/Redis/src/Command.cpp" - "${LIBRARY_DIR}/Redis/src/Error.cpp" - "${LIBRARY_DIR}/Redis/src/Exception.cpp" - "${LIBRARY_DIR}/Redis/src/RedisEventArgs.cpp" - "${LIBRARY_DIR}/Redis/src/RedisStream.cpp" - "${LIBRARY_DIR}/Redis/src/Type.cpp" - ) +set (SRCS + "${LIBRARY_DIR}/Redis/src/Array.cpp" + "${LIBRARY_DIR}/Redis/src/AsyncReader.cpp" + "${LIBRARY_DIR}/Redis/src/Client.cpp" + "${LIBRARY_DIR}/Redis/src/Command.cpp" + "${LIBRARY_DIR}/Redis/src/Error.cpp" + "${LIBRARY_DIR}/Redis/src/Exception.cpp" + "${LIBRARY_DIR}/Redis/src/RedisEventArgs.cpp" + "${LIBRARY_DIR}/Redis/src/RedisStream.cpp" + "${LIBRARY_DIR}/Redis/src/Type.cpp" +) - add_library (_poco_redis ${SRCS}) - add_library (Poco::Redis ALIAS _poco_redis) +add_library (_poco_redis ${SRCS}) +add_library (Poco::Redis ALIAS _poco_redis) - if (COMPILER_GCC) - target_compile_options (_poco_redis PRIVATE -Wno-deprecated-copy) - endif () - target_compile_options (_poco_redis PRIVATE -Wno-shadow) - target_include_directories (_poco_redis SYSTEM PUBLIC "${LIBRARY_DIR}/Redis/include") - target_link_libraries (_poco_redis PUBLIC Poco::Net) -else () - add_library (Poco::Redis UNKNOWN IMPORTED GLOBAL) - - find_library (LIBRARY_POCO_REDIS PocoRedis) - find_path (INCLUDE_POCO_REDIS Poco/Redis/Redis.h) - set_target_properties (Poco::Redis PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_REDIS}) - set_target_properties (Poco::Redis PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_REDIS}) - - target_link_libraries (Poco::Redis INTERFACE Poco::Net) - - message (STATUS "Using Poco::Redis: ${LIBRARY_POCO_REDIS} ${INCLUDE_POCO_REDIS}") +if (COMPILER_GCC) + target_compile_options (_poco_redis PRIVATE -Wno-deprecated-copy) endif () +target_compile_options (_poco_redis PRIVATE -Wno-shadow) +target_include_directories (_poco_redis SYSTEM PUBLIC "${LIBRARY_DIR}/Redis/include") +target_link_libraries (_poco_redis PUBLIC Poco::Net) diff --git a/contrib/poco-cmake/Util/CMakeLists.txt b/contrib/poco-cmake/Util/CMakeLists.txt index e233e65cfeac..dc355e476583 100644 --- a/contrib/poco-cmake/Util/CMakeLists.txt +++ b/contrib/poco-cmake/Util/CMakeLists.txt @@ -1,46 +1,35 @@ -if (USE_INTERNAL_POCO_LIBRARY) - set (SRCS - "${LIBRARY_DIR}/Util/src/AbstractConfiguration.cpp" - "${LIBRARY_DIR}/Util/src/Application.cpp" - "${LIBRARY_DIR}/Util/src/ConfigurationMapper.cpp" - "${LIBRARY_DIR}/Util/src/ConfigurationView.cpp" - "${LIBRARY_DIR}/Util/src/FilesystemConfiguration.cpp" - "${LIBRARY_DIR}/Util/src/HelpFormatter.cpp" - "${LIBRARY_DIR}/Util/src/IniFileConfiguration.cpp" - "${LIBRARY_DIR}/Util/src/IntValidator.cpp" - "${LIBRARY_DIR}/Util/src/JSONConfiguration.cpp" - "${LIBRARY_DIR}/Util/src/LayeredConfiguration.cpp" - "${LIBRARY_DIR}/Util/src/LoggingConfigurator.cpp" - "${LIBRARY_DIR}/Util/src/LoggingSubsystem.cpp" - "${LIBRARY_DIR}/Util/src/MapConfiguration.cpp" - "${LIBRARY_DIR}/Util/src/Option.cpp" - "${LIBRARY_DIR}/Util/src/OptionCallback.cpp" - "${LIBRARY_DIR}/Util/src/OptionException.cpp" - "${LIBRARY_DIR}/Util/src/OptionProcessor.cpp" - "${LIBRARY_DIR}/Util/src/OptionSet.cpp" - "${LIBRARY_DIR}/Util/src/PropertyFileConfiguration.cpp" - "${LIBRARY_DIR}/Util/src/RegExpValidator.cpp" - "${LIBRARY_DIR}/Util/src/ServerApplication.cpp" - "${LIBRARY_DIR}/Util/src/Subsystem.cpp" - "${LIBRARY_DIR}/Util/src/SystemConfiguration.cpp" - "${LIBRARY_DIR}/Util/src/Timer.cpp" - "${LIBRARY_DIR}/Util/src/TimerTask.cpp" - "${LIBRARY_DIR}/Util/src/Validator.cpp" - "${LIBRARY_DIR}/Util/src/XMLConfiguration.cpp" - ) +set (SRCS + "${LIBRARY_DIR}/Util/src/AbstractConfiguration.cpp" + "${LIBRARY_DIR}/Util/src/Application.cpp" + "${LIBRARY_DIR}/Util/src/ConfigurationMapper.cpp" + "${LIBRARY_DIR}/Util/src/ConfigurationView.cpp" + "${LIBRARY_DIR}/Util/src/FilesystemConfiguration.cpp" + "${LIBRARY_DIR}/Util/src/HelpFormatter.cpp" + "${LIBRARY_DIR}/Util/src/IniFileConfiguration.cpp" + "${LIBRARY_DIR}/Util/src/IntValidator.cpp" + "${LIBRARY_DIR}/Util/src/JSONConfiguration.cpp" + "${LIBRARY_DIR}/Util/src/LayeredConfiguration.cpp" + "${LIBRARY_DIR}/Util/src/LoggingConfigurator.cpp" + "${LIBRARY_DIR}/Util/src/LoggingSubsystem.cpp" + "${LIBRARY_DIR}/Util/src/MapConfiguration.cpp" + "${LIBRARY_DIR}/Util/src/Option.cpp" + "${LIBRARY_DIR}/Util/src/OptionCallback.cpp" + "${LIBRARY_DIR}/Util/src/OptionException.cpp" + "${LIBRARY_DIR}/Util/src/OptionProcessor.cpp" + "${LIBRARY_DIR}/Util/src/OptionSet.cpp" + "${LIBRARY_DIR}/Util/src/PropertyFileConfiguration.cpp" + "${LIBRARY_DIR}/Util/src/RegExpValidator.cpp" + "${LIBRARY_DIR}/Util/src/ServerApplication.cpp" + "${LIBRARY_DIR}/Util/src/Subsystem.cpp" + "${LIBRARY_DIR}/Util/src/SystemConfiguration.cpp" + "${LIBRARY_DIR}/Util/src/Timer.cpp" + "${LIBRARY_DIR}/Util/src/TimerTask.cpp" + "${LIBRARY_DIR}/Util/src/Validator.cpp" + "${LIBRARY_DIR}/Util/src/XMLConfiguration.cpp" +) - add_library (_poco_util ${SRCS}) - add_library (Poco::Util ALIAS _poco_util) +add_library (_poco_util ${SRCS}) +add_library (Poco::Util ALIAS _poco_util) - target_include_directories (_poco_util SYSTEM PUBLIC "${LIBRARY_DIR}/Util/include") - target_link_libraries (_poco_util PUBLIC Poco::JSON Poco::XML) -else () - add_library (Poco::Util UNKNOWN IMPORTED GLOBAL) - - find_library (LIBRARY_POCO_UTIL PocoUtil) - find_path (INCLUDE_POCO_UTIL Poco/Util/Util.h) - set_target_properties (Poco::Util PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_UTIL}) - set_target_properties (Poco::Util PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_UTIL}) - - message (STATUS "Using Poco::Util: ${LIBRARY_POCO_UTIL} ${INCLUDE_POCO_UTIL}") -endif () +target_include_directories (_poco_util SYSTEM PUBLIC "${LIBRARY_DIR}/Util/include") +target_link_libraries (_poco_util PUBLIC Poco::JSON Poco::XML) diff --git a/contrib/poco-cmake/XML/CMakeLists.txt b/contrib/poco-cmake/XML/CMakeLists.txt index af801a65f033..45100f11eb73 100644 --- a/contrib/poco-cmake/XML/CMakeLists.txt +++ b/contrib/poco-cmake/XML/CMakeLists.txt @@ -1,110 +1,99 @@ -if (USE_INTERNAL_POCO_LIBRARY) - # Poco::XML (expat) +# Poco::XML (expat) - set (SRCS_EXPAT - "${LIBRARY_DIR}/XML/src/xmlrole.c" - "${LIBRARY_DIR}/XML/src/xmltok_impl.c" - "${LIBRARY_DIR}/XML/src/xmltok_ns.c" - "${LIBRARY_DIR}/XML/src/xmltok.c" - ) +set (SRCS_EXPAT + "${LIBRARY_DIR}/XML/src/xmlrole.c" + "${LIBRARY_DIR}/XML/src/xmltok_impl.c" + "${LIBRARY_DIR}/XML/src/xmltok_ns.c" + "${LIBRARY_DIR}/XML/src/xmltok.c" +) - add_library (_poco_xml_expat ${SRCS_EXPAT}) - add_library (Poco::XML::Expat ALIAS _poco_xml_expat) +add_library (_poco_xml_expat ${SRCS_EXPAT}) +add_library (Poco::XML::Expat ALIAS _poco_xml_expat) - target_include_directories (_poco_xml_expat PUBLIC "${LIBRARY_DIR}/XML/include") +target_include_directories (_poco_xml_expat PUBLIC "${LIBRARY_DIR}/XML/include") - # Poco::XML +# Poco::XML - set (SRCS - "${LIBRARY_DIR}/XML/src/AbstractContainerNode.cpp" - "${LIBRARY_DIR}/XML/src/AbstractNode.cpp" - "${LIBRARY_DIR}/XML/src/Attr.cpp" - "${LIBRARY_DIR}/XML/src/Attributes.cpp" - "${LIBRARY_DIR}/XML/src/AttributesImpl.cpp" - "${LIBRARY_DIR}/XML/src/AttrMap.cpp" - "${LIBRARY_DIR}/XML/src/CDATASection.cpp" - "${LIBRARY_DIR}/XML/src/CharacterData.cpp" - "${LIBRARY_DIR}/XML/src/ChildNodesList.cpp" - "${LIBRARY_DIR}/XML/src/Comment.cpp" - "${LIBRARY_DIR}/XML/src/ContentHandler.cpp" - "${LIBRARY_DIR}/XML/src/DeclHandler.cpp" - "${LIBRARY_DIR}/XML/src/DefaultHandler.cpp" - "${LIBRARY_DIR}/XML/src/Document.cpp" - "${LIBRARY_DIR}/XML/src/DocumentEvent.cpp" - "${LIBRARY_DIR}/XML/src/DocumentFragment.cpp" - "${LIBRARY_DIR}/XML/src/DocumentType.cpp" - "${LIBRARY_DIR}/XML/src/DOMBuilder.cpp" - "${LIBRARY_DIR}/XML/src/DOMException.cpp" - "${LIBRARY_DIR}/XML/src/DOMImplementation.cpp" - "${LIBRARY_DIR}/XML/src/DOMObject.cpp" - "${LIBRARY_DIR}/XML/src/DOMParser.cpp" - "${LIBRARY_DIR}/XML/src/DOMSerializer.cpp" - "${LIBRARY_DIR}/XML/src/DOMWriter.cpp" - "${LIBRARY_DIR}/XML/src/DTDHandler.cpp" - "${LIBRARY_DIR}/XML/src/DTDMap.cpp" - "${LIBRARY_DIR}/XML/src/Element.cpp" - "${LIBRARY_DIR}/XML/src/ElementsByTagNameList.cpp" - "${LIBRARY_DIR}/XML/src/Entity.cpp" - "${LIBRARY_DIR}/XML/src/EntityReference.cpp" - "${LIBRARY_DIR}/XML/src/EntityResolver.cpp" - "${LIBRARY_DIR}/XML/src/EntityResolverImpl.cpp" - "${LIBRARY_DIR}/XML/src/ErrorHandler.cpp" - "${LIBRARY_DIR}/XML/src/Event.cpp" - "${LIBRARY_DIR}/XML/src/EventDispatcher.cpp" - "${LIBRARY_DIR}/XML/src/EventException.cpp" - "${LIBRARY_DIR}/XML/src/EventListener.cpp" - "${LIBRARY_DIR}/XML/src/EventTarget.cpp" - "${LIBRARY_DIR}/XML/src/InputSource.cpp" - "${LIBRARY_DIR}/XML/src/LexicalHandler.cpp" - "${LIBRARY_DIR}/XML/src/Locator.cpp" - "${LIBRARY_DIR}/XML/src/LocatorImpl.cpp" - "${LIBRARY_DIR}/XML/src/MutationEvent.cpp" - "${LIBRARY_DIR}/XML/src/Name.cpp" - "${LIBRARY_DIR}/XML/src/NamedNodeMap.cpp" - "${LIBRARY_DIR}/XML/src/NamePool.cpp" - "${LIBRARY_DIR}/XML/src/NamespaceStrategy.cpp" - "${LIBRARY_DIR}/XML/src/NamespaceSupport.cpp" - "${LIBRARY_DIR}/XML/src/Node.cpp" - "${LIBRARY_DIR}/XML/src/NodeAppender.cpp" - "${LIBRARY_DIR}/XML/src/NodeFilter.cpp" - "${LIBRARY_DIR}/XML/src/NodeIterator.cpp" - "${LIBRARY_DIR}/XML/src/NodeList.cpp" - "${LIBRARY_DIR}/XML/src/Notation.cpp" - "${LIBRARY_DIR}/XML/src/ParserEngine.cpp" - "${LIBRARY_DIR}/XML/src/ProcessingInstruction.cpp" - "${LIBRARY_DIR}/XML/src/QName.cpp" - "${LIBRARY_DIR}/XML/src/SAXException.cpp" - "${LIBRARY_DIR}/XML/src/SAXParser.cpp" - "${LIBRARY_DIR}/XML/src/Text.cpp" - "${LIBRARY_DIR}/XML/src/TreeWalker.cpp" - "${LIBRARY_DIR}/XML/src/ValueTraits.cpp" - "${LIBRARY_DIR}/XML/src/WhitespaceFilter.cpp" - "${LIBRARY_DIR}/XML/src/XMLException.cpp" - "${LIBRARY_DIR}/XML/src/XMLFilter.cpp" - "${LIBRARY_DIR}/XML/src/XMLFilterImpl.cpp" - "${LIBRARY_DIR}/XML/src/XMLReader.cpp" - "${LIBRARY_DIR}/XML/src/XMLStreamParser.cpp" - "${LIBRARY_DIR}/XML/src/XMLStreamParserException.cpp" - "${LIBRARY_DIR}/XML/src/XMLString.cpp" - "${LIBRARY_DIR}/XML/src/XMLWriter.cpp" +set (SRCS + "${LIBRARY_DIR}/XML/src/AbstractContainerNode.cpp" + "${LIBRARY_DIR}/XML/src/AbstractNode.cpp" + "${LIBRARY_DIR}/XML/src/Attr.cpp" + "${LIBRARY_DIR}/XML/src/Attributes.cpp" + "${LIBRARY_DIR}/XML/src/AttributesImpl.cpp" + "${LIBRARY_DIR}/XML/src/AttrMap.cpp" + "${LIBRARY_DIR}/XML/src/CDATASection.cpp" + "${LIBRARY_DIR}/XML/src/CharacterData.cpp" + "${LIBRARY_DIR}/XML/src/ChildNodesList.cpp" + "${LIBRARY_DIR}/XML/src/Comment.cpp" + "${LIBRARY_DIR}/XML/src/ContentHandler.cpp" + "${LIBRARY_DIR}/XML/src/DeclHandler.cpp" + "${LIBRARY_DIR}/XML/src/DefaultHandler.cpp" + "${LIBRARY_DIR}/XML/src/Document.cpp" + "${LIBRARY_DIR}/XML/src/DocumentEvent.cpp" + "${LIBRARY_DIR}/XML/src/DocumentFragment.cpp" + "${LIBRARY_DIR}/XML/src/DocumentType.cpp" + "${LIBRARY_DIR}/XML/src/DOMBuilder.cpp" + "${LIBRARY_DIR}/XML/src/DOMException.cpp" + "${LIBRARY_DIR}/XML/src/DOMImplementation.cpp" + "${LIBRARY_DIR}/XML/src/DOMObject.cpp" + "${LIBRARY_DIR}/XML/src/DOMParser.cpp" + "${LIBRARY_DIR}/XML/src/DOMSerializer.cpp" + "${LIBRARY_DIR}/XML/src/DOMWriter.cpp" + "${LIBRARY_DIR}/XML/src/DTDHandler.cpp" + "${LIBRARY_DIR}/XML/src/DTDMap.cpp" + "${LIBRARY_DIR}/XML/src/Element.cpp" + "${LIBRARY_DIR}/XML/src/ElementsByTagNameList.cpp" + "${LIBRARY_DIR}/XML/src/Entity.cpp" + "${LIBRARY_DIR}/XML/src/EntityReference.cpp" + "${LIBRARY_DIR}/XML/src/EntityResolver.cpp" + "${LIBRARY_DIR}/XML/src/EntityResolverImpl.cpp" + "${LIBRARY_DIR}/XML/src/ErrorHandler.cpp" + "${LIBRARY_DIR}/XML/src/Event.cpp" + "${LIBRARY_DIR}/XML/src/EventDispatcher.cpp" + "${LIBRARY_DIR}/XML/src/EventException.cpp" + "${LIBRARY_DIR}/XML/src/EventListener.cpp" + "${LIBRARY_DIR}/XML/src/EventTarget.cpp" + "${LIBRARY_DIR}/XML/src/InputSource.cpp" + "${LIBRARY_DIR}/XML/src/LexicalHandler.cpp" + "${LIBRARY_DIR}/XML/src/Locator.cpp" + "${LIBRARY_DIR}/XML/src/LocatorImpl.cpp" + "${LIBRARY_DIR}/XML/src/MutationEvent.cpp" + "${LIBRARY_DIR}/XML/src/Name.cpp" + "${LIBRARY_DIR}/XML/src/NamedNodeMap.cpp" + "${LIBRARY_DIR}/XML/src/NamePool.cpp" + "${LIBRARY_DIR}/XML/src/NamespaceStrategy.cpp" + "${LIBRARY_DIR}/XML/src/NamespaceSupport.cpp" + "${LIBRARY_DIR}/XML/src/Node.cpp" + "${LIBRARY_DIR}/XML/src/NodeAppender.cpp" + "${LIBRARY_DIR}/XML/src/NodeFilter.cpp" + "${LIBRARY_DIR}/XML/src/NodeIterator.cpp" + "${LIBRARY_DIR}/XML/src/NodeList.cpp" + "${LIBRARY_DIR}/XML/src/Notation.cpp" + "${LIBRARY_DIR}/XML/src/ParserEngine.cpp" + "${LIBRARY_DIR}/XML/src/ProcessingInstruction.cpp" + "${LIBRARY_DIR}/XML/src/QName.cpp" + "${LIBRARY_DIR}/XML/src/SAXException.cpp" + "${LIBRARY_DIR}/XML/src/SAXParser.cpp" + "${LIBRARY_DIR}/XML/src/Text.cpp" + "${LIBRARY_DIR}/XML/src/TreeWalker.cpp" + "${LIBRARY_DIR}/XML/src/ValueTraits.cpp" + "${LIBRARY_DIR}/XML/src/WhitespaceFilter.cpp" + "${LIBRARY_DIR}/XML/src/XMLException.cpp" + "${LIBRARY_DIR}/XML/src/XMLFilter.cpp" + "${LIBRARY_DIR}/XML/src/XMLFilterImpl.cpp" + "${LIBRARY_DIR}/XML/src/XMLReader.cpp" + "${LIBRARY_DIR}/XML/src/XMLStreamParser.cpp" + "${LIBRARY_DIR}/XML/src/XMLStreamParserException.cpp" + "${LIBRARY_DIR}/XML/src/XMLString.cpp" + "${LIBRARY_DIR}/XML/src/XMLWriter.cpp" - # expat - "${LIBRARY_DIR}/XML/src/xmlparse.cpp" - ) + # expat + "${LIBRARY_DIR}/XML/src/xmlparse.cpp" +) - add_library (_poco_xml ${SRCS}) - add_library (Poco::XML ALIAS _poco_xml) +add_library (_poco_xml ${SRCS}) +add_library (Poco::XML ALIAS _poco_xml) - target_compile_options (_poco_xml PRIVATE -Wno-old-style-cast) - target_include_directories (_poco_xml SYSTEM PUBLIC "${LIBRARY_DIR}/XML/include") - target_link_libraries (_poco_xml PUBLIC Poco::Foundation Poco::XML::Expat) -else () - add_library (Poco::XML UNKNOWN IMPORTED GLOBAL) - - find_library (LIBRARY_POCO_XML PocoXML) - find_path (INCLUDE_POCO_XML Poco/XML/XML.h) - set_target_properties (Poco::XML PROPERTIES IMPORTED_LOCATION ${LIBRARY_POCO_XML}) - set_target_properties (Poco::XML PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_POCO_XML}) - - message (STATUS "Using Poco::XML: ${LIBRARY_POCO_XML} ${INCLUDE_POCO_XML}") -endif () +target_compile_options (_poco_xml PRIVATE -Wno-old-style-cast) +target_include_directories (_poco_xml SYSTEM PUBLIC "${LIBRARY_DIR}/XML/include") +target_link_libraries (_poco_xml PUBLIC Poco::Foundation Poco::XML::Expat) diff --git a/contrib/protobuf-cmake/CMakeLists.txt b/contrib/protobuf-cmake/CMakeLists.txt index 92eec444e44e..5e22136fc1f3 100644 --- a/contrib/protobuf-cmake/CMakeLists.txt +++ b/contrib/protobuf-cmake/CMakeLists.txt @@ -1,3 +1,22 @@ +option(ENABLE_PROTOBUF "Enable protobuf" ${ENABLE_LIBRARIES}) + +if(NOT ENABLE_PROTOBUF) + message(STATUS "Not using protobuf") + return() +endif() + +set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") +if(OS_FREEBSD AND SANITIZE STREQUAL "address") + # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found + # #include + if(LLVM_INCLUDE_DIRS) + set(Protobuf_INCLUDE_DIR "${Protobuf_INCLUDE_DIR}" ${LLVM_INCLUDE_DIRS}) + else() + message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use protobuf on FreeBSD with address sanitizer without LLVM") + return() + endif() +endif() + set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/protobuf") set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/protobuf") @@ -8,7 +27,6 @@ add_definitions(-DHAVE_PTHREAD) add_definitions(-DHAVE_ZLIB) include_directories( - ${ZLIB_INCLUDE_DIRECTORIES} ${protobuf_binary_dir} ${protobuf_source_dir}/src) @@ -45,13 +63,13 @@ set(libprotobuf_lite_files ${protobuf_source_dir}/src/google/protobuf/wire_format_lite.cc ) -add_library(libprotobuf-lite ${libprotobuf_lite_files}) -target_link_libraries(libprotobuf-lite pthread) +add_library(_libprotobuf-lite ${libprotobuf_lite_files}) +target_link_libraries(_libprotobuf-lite pthread) if(${CMAKE_SYSTEM_NAME} STREQUAL "Android") - target_link_libraries(libprotobuf-lite log) + target_link_libraries(_libprotobuf-lite log) endif() -target_include_directories(libprotobuf-lite SYSTEM PUBLIC ${protobuf_source_dir}/src) -add_library(protobuf::libprotobuf-lite ALIAS libprotobuf-lite) +target_include_directories(_libprotobuf-lite SYSTEM PUBLIC ${protobuf_source_dir}/src) +add_library(protobuf::libprotobuf-lite ALIAS _libprotobuf-lite) set(libprotobuf_files @@ -109,17 +127,17 @@ set(libprotobuf_files ${protobuf_source_dir}/src/google/protobuf/wrappers.pb.cc ) -add_library(libprotobuf ${libprotobuf_lite_files} ${libprotobuf_files}) +add_library(_libprotobuf ${libprotobuf_lite_files} ${libprotobuf_files}) if (ENABLE_FUZZING) - target_compile_options(libprotobuf PRIVATE "-fsanitize-recover=all") + target_compile_options(_libprotobuf PRIVATE "-fsanitize-recover=all") endif() -target_link_libraries(libprotobuf pthread) -target_link_libraries(libprotobuf ${ZLIB_LIBRARIES}) +target_link_libraries(_libprotobuf pthread) +target_link_libraries(_libprotobuf ch_contrib::zlib) if(${CMAKE_SYSTEM_NAME} STREQUAL "Android") - target_link_libraries(libprotobuf log) + target_link_libraries(_libprotobuf log) endif() -target_include_directories(libprotobuf SYSTEM PUBLIC ${protobuf_source_dir}/src) -add_library(protobuf::libprotobuf ALIAS libprotobuf) +target_include_directories(_libprotobuf SYSTEM PUBLIC ${protobuf_source_dir}/src) +add_library(protobuf::libprotobuf ALIAS _libprotobuf) set(libprotoc_files @@ -208,9 +226,9 @@ set(libprotoc_files ${protobuf_source_dir}/src/google/protobuf/compiler/zip_writer.cc ) -add_library(libprotoc ${libprotoc_files}) -target_link_libraries(libprotoc libprotobuf) -add_library(protobuf::libprotoc ALIAS libprotoc) +add_library(_libprotoc ${libprotoc_files}) +target_link_libraries(_libprotoc _libprotobuf) +add_library(protobuf::libprotoc ALIAS _libprotoc) set(protoc_files ${protobuf_source_dir}/src/google/protobuf/compiler/main.cc) @@ -218,7 +236,7 @@ if (CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR) add_executable(protoc ${protoc_files}) - target_link_libraries(protoc libprotoc libprotobuf pthread) + target_link_libraries(protoc _libprotoc _libprotobuf pthread) add_executable(protobuf::protoc ALIAS protoc) if (ENABLE_FUZZING) @@ -297,3 +315,15 @@ else () set_target_properties (protoc PROPERTIES IMPORTED_LOCATION "${PROTOC_BUILD_DIR}/protoc") add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc") endif () + +include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake") + +add_library(_protobuf INTERFACE) +target_link_libraries(_protobuf INTERFACE _libprotobuf) +target_include_directories(_protobuf INTERFACE "${Protobuf_INCLUDE_DIR}") +add_library(ch_contrib::protobuf ALIAS _protobuf) + +add_library(_protoc INTERFACE) +target_link_libraries(_protoc INTERFACE _libprotoc _libprotobuf) +target_include_directories(_protoc INTERFACE "${Protobuf_INCLUDE_DIR}") +add_library(ch_contrib::protoc ALIAS _protoc) diff --git a/contrib/rapidjson-cmake/CMakeLists.txt b/contrib/rapidjson-cmake/CMakeLists.txt new file mode 100644 index 000000000000..0d7ba74a399d --- /dev/null +++ b/contrib/rapidjson-cmake/CMakeLists.txt @@ -0,0 +1,11 @@ +option(ENABLE_RAPIDJSON "Use rapidjson" ${ENABLE_LIBRARIES}) + +if(NOT ENABLE_RAPIDJSON) + message(STATUS "Not using rapidjson") + return() +endif() + +set(RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include") +add_library(_rapidjson INTERFACE) +target_include_directories(_rapidjson SYSTEM BEFORE INTERFACE ${RAPIDJSON_INCLUDE_DIR}) +add_library(ch_contrib::rapidjson ALIAS _rapidjson) diff --git a/contrib/re2-cmake/CMakeLists.txt b/contrib/re2-cmake/CMakeLists.txt index ff8b3c434727..e74f488643d8 100644 --- a/contrib/re2-cmake/CMakeLists.txt +++ b/contrib/re2-cmake/CMakeLists.txt @@ -4,6 +4,11 @@ # This file was edited for ClickHouse +string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space) +if(_have_space GREATER 0) + message(FATAL_ERROR "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.") +endif() + set(SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/re2") set(RE2_SOURCES @@ -30,11 +35,9 @@ set(RE2_SOURCES ${SRC_DIR}/util/rune.cc ${SRC_DIR}/util/strutil.cc ) - add_library(re2 ${RE2_SOURCES}) target_include_directories(re2 PUBLIC "${SRC_DIR}") - # Building re2 which is thread-safe and re2_st which is not. # re2 changes its state during matching of regular expression, e.g. creates temporary DFA. # It uses RWLock to process the same regular expression object from different threads. @@ -43,7 +46,8 @@ target_include_directories(re2 PUBLIC "${SRC_DIR}") add_library(re2_st ${RE2_SOURCES}) target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st) target_include_directories (re2_st PRIVATE .) -target_include_directories (re2_st SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${SRC_DIR}) +target_include_directories (re2_st SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories (re2_st SYSTEM BEFORE PUBLIC ${SRC_DIR}) file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st) foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h) @@ -66,3 +70,8 @@ foreach (FILENAME mutex.h) add_custom_target (transform_${FILENAME} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/util/${FILENAME}") add_dependencies (re2_st transform_${FILENAME}) endforeach () + +# NOTE: you should not change name of library here, since it is used for PVS +# (see docker/test/pvs/Dockerfile), to generate required header (see above) +add_library(ch_contrib::re2 ALIAS re2) +add_library(ch_contrib::re2_st ALIAS re2_st) diff --git a/contrib/replxx b/contrib/replxx index f019cba7ea1b..9460e5e0fc10 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit f019cba7ea1bcd1b4feb7826f28ed57fb581b04c +Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d diff --git a/contrib/replxx-cmake/CMakeLists.txt b/contrib/replxx-cmake/CMakeLists.txt index 222a38095cb8..8487ad520bc1 100644 --- a/contrib/replxx-cmake/CMakeLists.txt +++ b/contrib/replxx-cmake/CMakeLists.txt @@ -1,74 +1,30 @@ option (ENABLE_REPLXX "Enable replxx support" ${ENABLE_LIBRARIES}) if (NOT ENABLE_REPLXX) - if (USE_INTERNAL_REPLXX_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal replxx with ENABLE_REPLXX=OFF") - endif() - - add_library(replxx INTERFACE) - target_compile_definitions(replxx INTERFACE USE_REPLXX=0) - message (STATUS "Not using replxx") return() endif() -option (USE_INTERNAL_REPLXX_LIBRARY "Use internal replxx library (Experimental: set to OFF on your own risk)" ON) - -if (NOT USE_INTERNAL_REPLXX_LIBRARY) - find_library(LIBRARY_REPLXX NAMES replxx replxx-static) - find_path(INCLUDE_REPLXX replxx.hxx) - - if (LIBRARY_REPLXX AND INCLUDE_REPLXX) - set(CMAKE_REQUIRED_LIBRARIES ${LIBRARY_REPLXX}) - set(CMAKE_REQUIRED_INCLUDES ${INCLUDE_REPLXX}) - check_cxx_source_compiles( - " - #include - int main() { - replxx::Replxx rx; - } - " - EXTERNAL_REPLXX_WORKS - ) - - if (NOT EXTERNAL_REPLXX_WORKS) - message (${RECONFIGURE_MESSAGE_LEVEL} "replxx is unusable: ${LIBRARY_REPLXX} ${INCLUDE_REPLXX}") - else() - add_library(replxx UNKNOWN IMPORTED) - set_property(TARGET replxx PROPERTY IMPORTED_LOCATION ${LIBRARY_REPLXX}) - target_include_directories(replxx SYSTEM PUBLIC ${INCLUDE_REPLXX}) - endif() - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system replxx") - endif() -endif() - +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx") -if (NOT LIBRARY_REPLXX OR NOT INCLUDE_REPLXX OR NOT EXTERNAL_REPLXX_WORKS) - set(USE_INTERNAL_REPLXX_LIBRARY 1) - set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx") +set(SRCS + "${LIBRARY_DIR}/src/conversion.cxx" + "${LIBRARY_DIR}/src/ConvertUTF.cpp" + "${LIBRARY_DIR}/src/escape.cxx" + "${LIBRARY_DIR}/src/history.cxx" + "${LIBRARY_DIR}/src/terminal.cxx" + "${LIBRARY_DIR}/src/prompt.cxx" + "${LIBRARY_DIR}/src/replxx_impl.cxx" + "${LIBRARY_DIR}/src/replxx.cxx" + "${LIBRARY_DIR}/src/util.cxx" + "${LIBRARY_DIR}/src/wcwidth.cpp" +) - set(SRCS - "${LIBRARY_DIR}/src/conversion.cxx" - "${LIBRARY_DIR}/src/ConvertUTF.cpp" - "${LIBRARY_DIR}/src/escape.cxx" - "${LIBRARY_DIR}/src/history.cxx" - "${LIBRARY_DIR}/src/terminal.cxx" - "${LIBRARY_DIR}/src/prompt.cxx" - "${LIBRARY_DIR}/src/replxx_impl.cxx" - "${LIBRARY_DIR}/src/replxx.cxx" - "${LIBRARY_DIR}/src/util.cxx" - "${LIBRARY_DIR}/src/wcwidth.cpp" - ) - - add_library (replxx ${SRCS}) - target_include_directories(replxx SYSTEM PUBLIC "${LIBRARY_DIR}/include") -endif () +add_library (_replxx ${SRCS}) +target_include_directories(_replxx SYSTEM PUBLIC "${LIBRARY_DIR}/include") if (COMPILER_CLANG) - target_compile_options(replxx PRIVATE -Wno-documentation) + target_compile_options(_replxx PRIVATE -Wno-documentation) endif () -target_compile_definitions(replxx PUBLIC USE_REPLXX=1) - -message (STATUS "Using replxx") +add_library(ch_contrib::replxx ALIAS _replxx) diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index db0b3942b791..c35009ba10ae 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option (ENABLE_ROCKSDB "Enable rocksdb library" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_ROCKSDB) + message (STATUS "Not using rocksdb") + return() +endif() + ## this file is extracted from `contrib/rocksdb/CMakeLists.txt` set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") @@ -6,6 +13,10 @@ set(PORTABLE ON) ## always disable jemalloc for rocksdb by default ## because it introduces non-standard jemalloc APIs option(WITH_JEMALLOC "build with JeMalloc" OFF) +set(USE_SNAPPY OFF) +if (TARGET ch_contrib::snappy) + set(USE_SNAPPY ON) +endif() option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY}) ## lz4, zlib, zstd is enabled in ClickHouse by default option(WITH_LZ4 "build with lz4" ON) @@ -38,35 +49,30 @@ else() # but it does not have all the jemalloc files in include/... set(WITH_JEMALLOC ON) else() - if(WITH_JEMALLOC) + if(WITH_JEMALLOC AND TARGET ch_contrib::jemalloc) add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE) - list(APPEND THIRDPARTY_LIBS jemalloc) + list(APPEND THIRDPARTY_LIBS ch_contrib::jemalloc) endif() endif() if(WITH_SNAPPY) add_definitions(-DSNAPPY) - list(APPEND THIRDPARTY_LIBS snappy) + list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) endif() if(WITH_ZLIB) add_definitions(-DZLIB) - list(APPEND THIRDPARTY_LIBS zlib) + list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) endif() if(WITH_LZ4) add_definitions(-DLZ4) - list(APPEND THIRDPARTY_LIBS lz4) + list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) endif() if(WITH_ZSTD) add_definitions(-DZSTD) - include_directories(${ZSTD_INCLUDE_DIR}) - include_directories("${ZSTD_INCLUDE_DIR}/common") - include_directories("${ZSTD_INCLUDE_DIR}/dictBuilder") - include_directories("${ZSTD_INCLUDE_DIR}/deprecated") - - list(APPEND THIRDPARTY_LIBS zstd) + list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) endif() endif() @@ -538,8 +544,8 @@ if(WITH_FOLLY_DISTRIBUTED_MUTEX) "${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/WaitOptions.cpp") endif() -set(ROCKSDB_STATIC_LIB rocksdb) - -add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES}) -target_link_libraries(${ROCKSDB_STATIC_LIB} PRIVATE - ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) +add_library(_rocksdb STATIC ${SOURCES}) +add_library(ch_contrib::rocksdb ALIAS _rocksdb) +target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) +# SYSTEM is required to overcome some issues +target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include") diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index e2b0f20f4089..49c80e45b18a 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_S2_GEOMETRY) + message(STATUS "Not using S2 geometry") + return() +endif() + set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src") set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") @@ -108,15 +115,17 @@ set(S2_SRCS ) -add_library(s2 ${S2_SRCS}) -set_property(TARGET s2 PROPERTY CXX_STANDARD 17) +add_library(_s2 ${S2_SRCS}) +add_library(ch_contrib::s2 ALIAS _s2) + +set_property(TARGET _s2 PROPERTY CXX_STANDARD 17) -if (OPENSSL_FOUND) - target_link_libraries(s2 PRIVATE ${OPENSSL_LIBRARIES}) +if (TARGET OpenSSL::SSL) + target_link_libraries(_s2 PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() # Copied from contrib/s2geometry/CMakeLists -target_link_libraries(s2 PRIVATE +target_link_libraries(_s2 PRIVATE absl::base absl::btree absl::config @@ -138,9 +147,9 @@ target_link_libraries(s2 PRIVATE absl::utility ) -target_include_directories(s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") -target_include_directories(s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}") +target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") +target_include_directories(_s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}") if(M_LIBRARY) - target_link_libraries(s2 PRIVATE ${M_LIBRARY}) + target_link_libraries(_s2 PRIVATE ${M_LIBRARY}) endif() diff --git a/contrib/sentry-native-cmake/CMakeLists.txt b/contrib/sentry-native-cmake/CMakeLists.txt index f4e946cf7970..520fa176b91a 100644 --- a/contrib/sentry-native-cmake/CMakeLists.txt +++ b/contrib/sentry-native-cmake/CMakeLists.txt @@ -1,3 +1,14 @@ +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) + option (ENABLE_SENTRY "Enable Sentry" ${ENABLE_LIBRARIES}) +else() + option (ENABLE_SENTRY "Enable Sentry" OFF) +endif() + +if (NOT ENABLE_SENTRY) + message(STATUS "Not using sentry") + return() +endif() + set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native") set (SRCS @@ -38,15 +49,16 @@ else() list(APPEND SRCS ${SRC_DIR}/src/modulefinder/sentry_modulefinder_linux.c) endif() -add_library(sentry ${SRCS}) -add_library(sentry::sentry ALIAS sentry) +add_library(_sentry ${SRCS}) if(BUILD_SHARED_LIBS) - target_compile_definitions(sentry PRIVATE SENTRY_BUILD_SHARED) + target_compile_definitions(_sentry PRIVATE SENTRY_BUILD_SHARED) else() - target_compile_definitions(sentry PUBLIC SENTRY_BUILD_STATIC) + target_compile_definitions(_sentry PUBLIC SENTRY_BUILD_STATIC) endif() -target_link_libraries(sentry PRIVATE curl pthread) -target_include_directories(sentry PUBLIC "${SRC_DIR}/include" PRIVATE "${SRC_DIR}/src") -target_compile_definitions(sentry PRIVATE SENTRY_WITH_INPROC_BACKEND SIZEOF_LONG=8) +target_link_libraries(_sentry PRIVATE ch_contrib::curl pthread) +target_include_directories(_sentry PUBLIC "${SRC_DIR}/include" PRIVATE "${SRC_DIR}/src") +target_compile_definitions(_sentry PRIVATE SENTRY_WITH_INPROC_BACKEND SIZEOF_LONG=8) + +add_library(ch_contrib::sentry ALIAS _sentry) diff --git a/contrib/simdjson-cmake/CMakeLists.txt b/contrib/simdjson-cmake/CMakeLists.txt index bb9a5844def8..ab2840f5b7fe 100644 --- a/contrib/simdjson-cmake/CMakeLists.txt +++ b/contrib/simdjson-cmake/CMakeLists.txt @@ -1,11 +1,20 @@ +option (ENABLE_SIMDJSON "Use simdjson" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_SIMDJSON) + message(STATUS "Not using simdjson") + return() +endif() + set(SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include") set(SIMDJSON_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/src") set(SIMDJSON_SRC "${SIMDJSON_SRC_DIR}/simdjson.cpp") -add_library(simdjson ${SIMDJSON_SRC}) -target_include_directories(simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}") +add_library(_simdjson ${SIMDJSON_SRC}) +target_include_directories(_simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}") # simdjson is using its own CPU dispatching and get confused if we enable AVX/AVX2 flags. if(ARCH_AMD64) - target_compile_options(simdjson PRIVATE -mno-avx -mno-avx2) + target_compile_options(_simdjson PRIVATE -mno-avx -mno-avx2) endif() + +add_library(ch_contrib::simdjson ALIAS _simdjson) diff --git a/contrib/snappy-cmake/CMakeLists.txt b/contrib/snappy-cmake/CMakeLists.txt index 289f8908436c..0997ea207e0d 100644 --- a/contrib/snappy-cmake/CMakeLists.txt +++ b/contrib/snappy-cmake/CMakeLists.txt @@ -30,8 +30,9 @@ configure_file( "${SOURCE_DIR}/snappy-stubs-public.h.in" "${CMAKE_CURRENT_BINARY_DIR}/snappy-stubs-public.h") -add_library(snappy "") -target_sources(snappy +add_library(_snappy "") +add_library(ch_contrib::snappy ALIAS _snappy) +target_sources(_snappy PRIVATE "${SOURCE_DIR}/snappy-internal.h" "${SOURCE_DIR}/snappy-stubs-internal.h" @@ -40,5 +41,5 @@ target_sources(snappy "${SOURCE_DIR}/snappy-stubs-internal.cc" "${SOURCE_DIR}/snappy.cc") -target_include_directories(snappy SYSTEM PUBLIC ${SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -target_compile_definitions(snappy PRIVATE -DHAVE_CONFIG_H) +target_include_directories(_snappy SYSTEM BEFORE PUBLIC ${SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +target_compile_definitions(_snappy PRIVATE -DHAVE_CONFIG_H) diff --git a/contrib/sparsehash-c11-cmake/CMakeLists.txt b/contrib/sparsehash-c11-cmake/CMakeLists.txt new file mode 100644 index 000000000000..af588c9484f4 --- /dev/null +++ b/contrib/sparsehash-c11-cmake/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(_sparsehash INTERFACE) +target_include_directories(_sparsehash SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/sparsehash-c11") +add_library(ch_contrib::sparsehash ALIAS _sparsehash) diff --git a/contrib/sqlite-cmake/CMakeLists.txt b/contrib/sqlite-cmake/CMakeLists.txt index 495cb63798d5..7559dd4c1849 100644 --- a/contrib/sqlite-cmake/CMakeLists.txt +++ b/contrib/sqlite-cmake/CMakeLists.txt @@ -1,6 +1,14 @@ +option(ENABLE_SQLITE "Enable sqlite" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_SQLITE) + message(STATUS "Not using sqlite") + return() +endif() + set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/sqlite-amalgamation") set(SRCS ${LIBRARY_DIR}/sqlite3.c) -add_library(sqlite ${SRCS}) -target_include_directories(sqlite SYSTEM PUBLIC "${LIBRARY_DIR}") +add_library(_sqlite ${SRCS}) +target_include_directories(_sqlite SYSTEM PUBLIC "${LIBRARY_DIR}") +add_library(ch_contrib::sqlite ALIAS _sqlite) diff --git a/contrib/stats-cmake/CMakeLists.txt b/contrib/stats-cmake/CMakeLists.txt deleted file mode 100644 index 8279e49c3f06..000000000000 --- a/contrib/stats-cmake/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# The stats is a header-only library of probability density functions, -# cumulative distribution functions, quantile functions, and random sampling methods. -set(STATS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/stats/include") -set(GCEM_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/gcem/include") - -add_library(stats INTERFACE) - -target_include_directories(stats SYSTEM INTERFACE ${STATS_INCLUDE_DIR}) -target_include_directories(stats SYSTEM INTERFACE ${GCEM_INCLUDE_DIR}) diff --git a/contrib/thrift-cmake/CMakeLists.txt b/contrib/thrift-cmake/CMakeLists.txt index 088dd0a969b5..2a62a6fe7ab1 100644 --- a/contrib/thrift-cmake/CMakeLists.txt +++ b/contrib/thrift-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option(ENABLE_THRIFT "Enable Thrift" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_THRIFT) + message (STATUS "thrift disabled") + return() +endif() + set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp") set(thriftcpp_SOURCES "${LIBRARY_DIR}/src/thrift/TApplicationException.cpp" @@ -82,6 +89,7 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build/cmake/config.h.in" "${CMAKE_CU include_directories("${CMAKE_CURRENT_BINARY_DIR}") -add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES}) -target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC "${THRIFT_INCLUDE_DIR}" ${CMAKE_CURRENT_BINARY_DIR}) -target_link_libraries (${THRIFT_LIBRARY} PUBLIC boost::headers_only) +add_library(_thrift ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES}) +add_library(ch_contrib::thrift ALIAS _thrift) +target_include_directories(_thrift SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src" ${CMAKE_CURRENT_BINARY_DIR}) +target_link_libraries (_thrift PUBLIC boost::headers_only) diff --git a/contrib/unixodbc-cmake/CMakeLists.txt b/contrib/unixodbc-cmake/CMakeLists.txt index e03f6313a317..b594ead3ba0b 100644 --- a/contrib/unixodbc-cmake/CMakeLists.txt +++ b/contrib/unixodbc-cmake/CMakeLists.txt @@ -1,4 +1,13 @@ -if (NOT USE_INTERNAL_ODBC_LIBRARY) +option (ENABLE_ODBC "Enable ODBC library" ${ENABLE_LIBRARIES}) +if (NOT OS_LINUX) + if (ENABLE_ODBC) + message(STATUS "ODBC is only supported on Linux") + endif() + set (ENABLE_ODBC OFF CACHE INTERNAL "") +endif () + +if (NOT ENABLE_ODBC) + message(STATUS "Not using ODBC") return() endif() @@ -20,9 +29,9 @@ set (SRCS_LTDL "${LIBRARY_DIR}/libltdl/loaders/preopen.c" ) -add_library (ltdl ${SRCS_LTDL}) +add_library (_ltdl ${SRCS_LTDL}) -target_include_directories(ltdl +target_include_directories(_ltdl SYSTEM PRIVATE linux_x86_64/libltdl @@ -30,8 +39,8 @@ target_include_directories(ltdl "${LIBRARY_DIR}/libltdl" "${LIBRARY_DIR}/libltdl/libltdl" ) -target_compile_definitions(ltdl PRIVATE -DHAVE_CONFIG_H -DLTDL -DLTDLOPEN=libltdlc) -target_compile_options(ltdl PRIVATE -Wno-constant-logical-operand -Wno-unknown-warning-option -O2) +target_compile_definitions(_ltdl PRIVATE -DHAVE_CONFIG_H -DLTDL -DLTDLOPEN=libltdlc) +target_compile_options(_ltdl PRIVATE -Wno-constant-logical-operand -Wno-unknown-warning-option -O2) # odbc @@ -270,13 +279,13 @@ set (SRCS "${LIBRARY_DIR}/odbcinst/SQLWritePrivateProfileString.c" ) -add_library (unixodbc ${SRCS}) +add_library (_unixodbc ${SRCS}) -target_link_libraries (unixodbc PRIVATE ltdl) +target_link_libraries (_unixodbc PRIVATE _ltdl) # SYSTEM_FILE_PATH was changed to /etc -target_include_directories (unixodbc +target_include_directories (_unixodbc SYSTEM PRIVATE linux_x86_64/private @@ -284,8 +293,8 @@ target_include_directories (unixodbc linux_x86_64 "${LIBRARY_DIR}/include" ) -target_compile_definitions (unixodbc PRIVATE -DHAVE_CONFIG_H) -target_compile_options (unixodbc +target_compile_definitions (_unixodbc PRIVATE -DHAVE_CONFIG_H) +target_compile_options (_unixodbc PRIVATE -Wno-dangling-else -Wno-parentheses @@ -294,4 +303,5 @@ target_compile_options (unixodbc -Wno-reserved-id-macro -O2 ) -target_compile_definitions (unixodbc INTERFACE USE_ODBC=1) + +add_library (ch_contrib::unixodbc ALIAS _unixodbc) diff --git a/contrib/wordnet-blast-cmake/CMakeLists.txt b/contrib/wordnet-blast-cmake/CMakeLists.txt index 37e4e9825caa..40712ecd2c57 100644 --- a/contrib/wordnet-blast-cmake/CMakeLists.txt +++ b/contrib/wordnet-blast-cmake/CMakeLists.txt @@ -6,8 +6,7 @@ set(SRCS "${LIBRARY_DIR}/wnb/core/wordnet.cc" ) -add_library(wnb ${SRCS}) - -target_link_libraries(wnb PRIVATE boost::headers_only boost::graph) - -target_include_directories(wnb SYSTEM PUBLIC "${LIBRARY_DIR}") +add_library(_wnb ${SRCS}) +target_link_libraries(_wnb PRIVATE boost::headers_only boost::graph) +target_include_directories(_wnb SYSTEM PUBLIC "${LIBRARY_DIR}") +add_library(ch_contrib::wnb ALIAS _wnb) diff --git a/contrib/xz-cmake/CMakeLists.txt b/contrib/xz-cmake/CMakeLists.txt index 5d70199413f7..9d08adc9c7ac 100644 --- a/contrib/xz-cmake/CMakeLists.txt +++ b/contrib/xz-cmake/CMakeLists.txt @@ -97,7 +97,7 @@ endif () find_package(Threads REQUIRED) -add_library(liblzma +add_library(_liblzma ${SRC_DIR}/src/common/mythread.h ${SRC_DIR}/src/common/sysdefs.h ${SRC_DIR}/src/common/tuklib_common.h @@ -241,7 +241,7 @@ add_library(liblzma ${SRC_DIR}/src/liblzma/simple/x86.c ) -target_include_directories(liblzma SYSTEM PUBLIC +target_include_directories(_liblzma SYSTEM PRIVATE ${SRC_DIR}/src/liblzma/api ${SRC_DIR}/src/liblzma/common ${SRC_DIR}/src/liblzma/check @@ -252,12 +252,15 @@ target_include_directories(liblzma SYSTEM PUBLIC ${SRC_DIR}/src/liblzma/simple ${SRC_DIR}/src/common ) +target_include_directories(_liblzma SYSTEM BEFORE PUBLIC ${SRC_DIR}/src/liblzma/api) -target_link_libraries(liblzma Threads::Threads) +target_link_libraries(_liblzma Threads::Threads) # Put the tuklib functions under the lzma_ namespace. -target_compile_definitions(liblzma PRIVATE TUKLIB_SYMBOL_PREFIX=lzma_) +target_compile_definitions(_liblzma PRIVATE TUKLIB_SYMBOL_PREFIX=lzma_) if (ENABLE_SSE2) - target_compile_definitions(liblzma PRIVATE HAVE_IMMINTRIN_H HAVE__MM_MOVEMASK_EPI8) + target_compile_definitions(_liblzma PRIVATE HAVE_IMMINTRIN_H HAVE__MM_MOVEMASK_EPI8) endif() + +add_library(ch_contrib::xz ALIAS _liblzma) diff --git a/contrib/yaml-cpp-cmake/CMakeLists.txt b/contrib/yaml-cpp-cmake/CMakeLists.txt index ed0287de1103..00e85f90932b 100644 --- a/contrib/yaml-cpp-cmake/CMakeLists.txt +++ b/contrib/yaml-cpp-cmake/CMakeLists.txt @@ -1,39 +1,47 @@ +option(ENABLE_YAML_CPP "Enable yaml-cpp" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_YAML_CPP) + message(STATUS "Not using yaml") + return() +endif() + set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/yaml-cpp) set (SRCS - ${LIBRARY_DIR}/src/binary.cpp - ${LIBRARY_DIR}/src/emitterutils.cpp - ${LIBRARY_DIR}/src/null.cpp - ${LIBRARY_DIR}/src/scantoken.cpp - ${LIBRARY_DIR}/src/convert.cpp - ${LIBRARY_DIR}/src/exceptions.cpp - ${LIBRARY_DIR}/src/ostream_wrapper.cpp - ${LIBRARY_DIR}/src/simplekey.cpp - ${LIBRARY_DIR}/src/depthguard.cpp - ${LIBRARY_DIR}/src/exp.cpp - ${LIBRARY_DIR}/src/parse.cpp - ${LIBRARY_DIR}/src/singledocparser.cpp - ${LIBRARY_DIR}/src/directives.cpp - ${LIBRARY_DIR}/src/memory.cpp - ${LIBRARY_DIR}/src/parser.cpp - ${LIBRARY_DIR}/src/stream.cpp - ${LIBRARY_DIR}/src/emit.cpp - ${LIBRARY_DIR}/src/nodebuilder.cpp - ${LIBRARY_DIR}/src/regex_yaml.cpp - ${LIBRARY_DIR}/src/tag.cpp - ${LIBRARY_DIR}/src/emitfromevents.cpp - ${LIBRARY_DIR}/src/node.cpp - ${LIBRARY_DIR}/src/scanner.cpp - ${LIBRARY_DIR}/src/emitter.cpp - ${LIBRARY_DIR}/src/node_data.cpp - ${LIBRARY_DIR}/src/scanscalar.cpp - ${LIBRARY_DIR}/src/emitterstate.cpp - ${LIBRARY_DIR}/src/nodeevents.cpp - ${LIBRARY_DIR}/src/scantag.cpp + ${LIBRARY_DIR}/src/binary.cpp + ${LIBRARY_DIR}/src/emitterutils.cpp + ${LIBRARY_DIR}/src/null.cpp + ${LIBRARY_DIR}/src/scantoken.cpp + ${LIBRARY_DIR}/src/convert.cpp + ${LIBRARY_DIR}/src/exceptions.cpp + ${LIBRARY_DIR}/src/ostream_wrapper.cpp + ${LIBRARY_DIR}/src/simplekey.cpp + ${LIBRARY_DIR}/src/depthguard.cpp + ${LIBRARY_DIR}/src/exp.cpp + ${LIBRARY_DIR}/src/parse.cpp + ${LIBRARY_DIR}/src/singledocparser.cpp + ${LIBRARY_DIR}/src/directives.cpp + ${LIBRARY_DIR}/src/memory.cpp + ${LIBRARY_DIR}/src/parser.cpp + ${LIBRARY_DIR}/src/stream.cpp + ${LIBRARY_DIR}/src/emit.cpp + ${LIBRARY_DIR}/src/nodebuilder.cpp + ${LIBRARY_DIR}/src/regex_yaml.cpp + ${LIBRARY_DIR}/src/tag.cpp + ${LIBRARY_DIR}/src/emitfromevents.cpp + ${LIBRARY_DIR}/src/node.cpp + ${LIBRARY_DIR}/src/scanner.cpp + ${LIBRARY_DIR}/src/emitter.cpp + ${LIBRARY_DIR}/src/node_data.cpp + ${LIBRARY_DIR}/src/scanscalar.cpp + ${LIBRARY_DIR}/src/emitterstate.cpp + ${LIBRARY_DIR}/src/nodeevents.cpp + ${LIBRARY_DIR}/src/scantag.cpp ) -add_library (yaml-cpp ${SRCS}) +add_library (_yaml_cpp ${SRCS}) +target_include_directories(_yaml_cpp PRIVATE ${LIBRARY_DIR}/include/yaml-cpp) +target_include_directories(_yaml_cpp SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include) -target_include_directories(yaml-cpp PRIVATE ${LIBRARY_DIR}/include/yaml-cpp) -target_include_directories(yaml-cpp SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include) +add_library (ch_contrib::yaml_cpp ALIAS _yaml_cpp) diff --git a/contrib/zlib-ng-cmake/CMakeLists.txt b/contrib/zlib-ng-cmake/CMakeLists.txt index bf5bc0d7f1c9..371a07dd31a7 100644 --- a/contrib/zlib-ng-cmake/CMakeLists.txt +++ b/contrib/zlib-ng-cmake/CMakeLists.txt @@ -130,8 +130,8 @@ set(ZLIB_SRCS set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_SRCS}) -add_library(zlib ${ZLIB_ALL_SRCS}) -add_library(zlibstatic ALIAS zlib) +add_library(_zlib ${ZLIB_ALL_SRCS}) +add_library(ch_contrib::zlib ALIAS _zlib) # https://github.com/zlib-ng/zlib-ng/pull/733 # This is disabed by default @@ -153,9 +153,9 @@ configure_file(${SOURCE_DIR}/zlib.pc.cmakein ${ZLIB_PC} @ONLY) configure_file(${CMAKE_CURRENT_BINARY_DIR}/zconf.h.cmakein ${CMAKE_CURRENT_BINARY_DIR}/zconf.h @ONLY) # We should use same defines when including zlib.h as used when zlib compiled -target_compile_definitions (zlib PUBLIC ZLIB_COMPAT WITH_GZFILEOP) +target_compile_definitions (_zlib PUBLIC ZLIB_COMPAT WITH_GZFILEOP) if (ARCH_AMD64 OR ARCH_AARCH64) - target_compile_definitions (zlib PUBLIC X86_64 UNALIGNED_OK) + target_compile_definitions (_zlib PUBLIC X86_64 UNALIGNED_OK) endif () -target_include_directories(zlib SYSTEM PUBLIC ${SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(_zlib SYSTEM BEFORE PUBLIC ${SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/contrib/zstd-cmake/CMakeLists.txt b/contrib/zstd-cmake/CMakeLists.txt index 226ee1a80676..4949c3f30d53 100644 --- a/contrib/zstd-cmake/CMakeLists.txt +++ b/contrib/zstd-cmake/CMakeLists.txt @@ -148,7 +148,7 @@ IF (ZSTD_LEGACY_SUPPORT) "${LIBRARY_LEGACY_DIR}/zstd_v07.h") ENDIF (ZSTD_LEGACY_SUPPORT) -ADD_LIBRARY(zstd ${Sources} ${Headers}) - -target_include_directories (zstd PUBLIC ${LIBRARY_DIR}) -target_compile_options(zstd PRIVATE -fno-sanitize=undefined) +add_library(_zstd ${Sources} ${Headers}) +add_library(ch_contrib::zstd ALIAS _zstd) +target_include_directories(_zstd BEFORE PUBLIC ${LIBRARY_DIR}) +target_compile_options(_zstd PRIVATE -fno-sanitize=undefined) diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service index bc19235cb3ad..a9400b24270f 100644 --- a/debian/clickhouse-server.service +++ b/debian/clickhouse-server.service @@ -16,6 +16,8 @@ Restart=always RestartSec=30 RuntimeDirectory=clickhouse-server ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid +# Minus means that this file is optional. +EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE diff --git a/docker/packager/other/fuzzer.sh b/docker/packager/other/fuzzer.sh index 431352f11269..ac820d9e6894 100755 --- a/docker/packager/other/fuzzer.sh +++ b/docker/packager/other/fuzzer.sh @@ -14,7 +14,7 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" # Hope, that the most part of files will be in cache, so we just link new executables # Please, add or change flags directly in cmake cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" \ - -DSANITIZE="$SANITIZER" -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_PROTOBUF=1 -DUSE_INTERNAL_PROTOBUF_LIBRARY=1 "${CMAKE_FLAGS[@]}" .. + -DSANITIZE="$SANITIZER" -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_PROTOBUF=1 "${CMAKE_FLAGS[@]}" .. FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ') diff --git a/docker/packager/packager b/docker/packager/packager index 4e3e26d215f6..05b2e02df969 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -156,7 +156,6 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ result.append('ENABLE_TESTS=1') result.append('BINARY_OUTPUT=tests') cmake_flags.append('-DENABLE_TESTS=1') - cmake_flags.append('-DUSE_GTEST=1') if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') @@ -168,7 +167,6 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if clang_tidy: cmake_flags.append('-DENABLE_CLANG_TIDY=1') cmake_flags.append('-DENABLE_UTILS=1') - cmake_flags.append('-DUSE_GTEST=1') cmake_flags.append('-DENABLE_TESTS=1') cmake_flags.append('-DENABLE_EXAMPLES=1') # Don't stop on first error to find more clang-tidy errors in one run. diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 103f2fdb47fe..20ad0e03bfe0 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -65,7 +65,12 @@ do # check if variable not empty [ -z "$dir" ] && continue # ensure directories exist - if ! mkdir -p "$dir"; then + if [ "$DO_CHOWN" = "1" ]; then + mkdir="mkdir" + else + mkdir="$gosu mkdir" + fi + if ! $mkdir -p "$dir"; then echo "Couldn't create necessary directory: $dir" exit 1 fi diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index d6d9f189e89a..bd0fc4948265 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -12,7 +12,11 @@ dpkg -i package_folder/clickhouse-common-static_*.deb dpkg -i package_folder/clickhouse-common-static-dbg_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb -dpkg -i package_folder/clickhouse-test_*.deb +if [[ -n "$TEST_CASES_FROM_DEB" ]] && [[ "$TEST_CASES_FROM_DEB" -eq 1 ]]; then + dpkg -i package_folder/clickhouse-test_*.deb +else + ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +fi # install test configs /usr/share/clickhouse-test/config/install.sh diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index a68b52170e01..85c751edfbed 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ curl \ git \ libxml2-utils \ + moreutils \ pylint \ python3-pip \ shellcheck \ diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index b7e00c49e06e..655b7d702435 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -10,72 +10,26 @@ def process_result(result_folder): status = "success" description = "" test_results = [] + checks = ( + ("header duplicates", "duplicate_output.txt"), + ("shellcheck", "shellcheck_output.txt"), + ("style", "style_output.txt"), + ("typos", "typos_output.txt"), + ("whitespaces", "whitespaces_output.txt"), + ("workflows", "workflows_output.txt"), + ) - duplicate_log_path = "{}/duplicate_output.txt".format(result_folder) - if not os.path.exists(duplicate_log_path): - logging.info("No header duplicates check log on path %s", duplicate_log_path) - return "exception", "No header duplicates check log", [] - elif os.stat(duplicate_log_path).st_size != 0: - description += " Header duplicates check failed. " - test_results.append(("Header duplicates check", "FAIL")) - status = "failure" - else: - test_results.append(("Header duplicates check", "OK")) - - shellcheck_log_path = "{}/shellcheck_output.txt".format(result_folder) - if not os.path.exists(shellcheck_log_path): - logging.info("No shellcheck log on path %s", shellcheck_log_path) - return "exception", "No shellcheck log", [] - elif os.stat(shellcheck_log_path).st_size != 0: - description += " Shellcheck check failed. " - test_results.append(("Shellcheck ", "FAIL")) - status = "failure" - else: - test_results.append(("Shellcheck", "OK")) - - style_log_path = "{}/style_output.txt".format(result_folder) - if not os.path.exists(style_log_path): - logging.info("No style check log on path %s", style_log_path) - return "exception", "No style check log", [] - elif os.stat(style_log_path).st_size != 0: - description += "Style check failed. " - test_results.append(("Style check", "FAIL")) - status = "failure" - else: - test_results.append(("Style check", "OK")) - - typos_log_path = "{}/typos_output.txt".format(result_folder) - if not os.path.exists(typos_log_path): - logging.info("No typos check log on path %s", typos_log_path) - return "exception", "No typos check log", [] - elif os.stat(typos_log_path).st_size != 0: - description += "Typos check failed. " - test_results.append(("Typos check", "FAIL")) - status = "failure" - else: - test_results.append(("Typos check", "OK")) - - whitespaces_log_path = "{}/whitespaces_output.txt".format(result_folder) - if not os.path.exists(whitespaces_log_path): - logging.info("No whitespaces check log on path %s", whitespaces_log_path) - return "exception", "No whitespaces check log", [] - elif os.stat(whitespaces_log_path).st_size != 0: - description += "Whitespaces check failed. " - test_results.append(("Whitespaces check", "FAIL")) - status = "failure" - else: - test_results.append(("Whitespaces check", "OK")) - - workflows_log_path = "{}/workflows_output.txt".format(result_folder) - if not os.path.exists(workflows_log_path): - logging.info("No workflows check log on path %s", style_log_path) - return "exception", "No workflows check log", [] - elif os.stat(whitespaces_log_path).st_size != 0: - description += "Workflows check failed. " - test_results.append(("Workflows check", "FAIL")) - status = "failure" - else: - test_results.append(("Workflows check", "OK")) + for name, out_file in checks: + full_path = os.path.join(result_folder, out_file) + if not os.path.exists(full_path): + logging.info("No %s check log on path %s", name, full_path) + return "exception", f"No {name} check log", [] + elif os.stat(full_path).st_size != 0: + description += f"Check {name} failed. " + test_results.append((f"Check {name}", "FAIL")) + status = "failure" + else: + test_results.append((f"Check {name}", "OK")) if not description: description += "Style check success" diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 98bc0053ab98..ce3ea4e50a6d 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -3,10 +3,16 @@ # yaml check is not the best one cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv +echo "Check duplicates" | ts ./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt +echo "Check style" | ts ./check-style -n |& tee /test_output/style_output.txt +echo "Check typos" | ts ./check-typos |& tee /test_output/typos_output.txt +echo "Check whitespaces" | ts ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt +echo "Check sorkflows" | ts ./check-workflows |& tee /test_output/workflows_output.txt +echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md index f950cdcc6db0..02019f13964d 100644 --- a/docs/_includes/cmake_in_clickhouse_header.md +++ b/docs/_includes/cmake_in_clickhouse_header.md @@ -22,7 +22,7 @@ cmake .. \ 1. ClickHouse's source CMake files (located in the root directory and in `/src`). 2. Arch-dependent CMake files (located in `/cmake/*os_name*`). -3. Libraries finders (search for contrib libraries, located in `/cmake/find`). +3. Libraries finders (search for contrib libraries, located in `/contrib/*/CMakeLists.txt`). 3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`) ## List of CMake flags diff --git a/docs/_includes/install/deb.sh b/docs/_includes/install/deb.sh index 7dcca601d33b..21106e9fc47c 100644 --- a/docs/_includes/install/deb.sh +++ b/docs/_includes/install/deb.sh @@ -8,4 +8,4 @@ sudo apt-get update sudo apt-get install -y clickhouse-server clickhouse-client sudo service clickhouse-server start -clickhouse-client +clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/rpm.sh b/docs/_includes/install/rpm.sh index de4a07420f7c..e3fd12320470 100644 --- a/docs/_includes/install/rpm.sh +++ b/docs/_includes/install/rpm.sh @@ -4,4 +4,4 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.re sudo yum install clickhouse-server clickhouse-client sudo /etc/init.d/clickhouse-server start -clickhouse-client +clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index 977387af2078..5cdce710b413 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-13 CXX=clang++-13 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_INTERNAL_PARQUET_LIBRARY=OFF -DENABLE_ORC=OFF -DUSE_INTERNAL_ORC_LIBRARY=OFF -DUSE_UNWIND=OFF -DUSE_INTERNAL_PROTOBUF_LIBRARY=ON -DENABLE_GRPC=OFF -DUSE_INTERNAL_GRPC_LIBRARY=OFF -DENABLE_HDFS=OFF -DUSE_INTERNAL_HDFS3_LIBRARY=OFF -DENABLE_MYSQL=OFF -DUSE_INTERNAL_MYSQL_LIBRARY=OFF +CC=clang-13 CXX=clang++-13 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_ORC=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index ccf6da355b92..f7d7100d1814 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -125,10 +125,6 @@ For installing CMake and Ninja on Mac OS X first install Homebrew and then insta Next, check the version of CMake: `cmake --version`. If it is below 3.12, you should install a newer version from the website: https://cmake.org/download/. -## Optional External Libraries {#optional-external-libraries} - -ClickHouse uses several external libraries for building. All of them do not need to be installed separately as they are built together with ClickHouse from the sources located in the submodules. You can check the list in `contrib`. - ## C++ Compiler {#c-compiler} Compilers Clang starting from version 11 is supported for building ClickHouse. diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index cdc904f1e949..bcb026aa0dc5 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -78,15 +78,21 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree]( | DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | | DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | | DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | +| YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) | +| TIME | [Int64](../../sql-reference/data-types/int-uint.md) | | ENUM | [Enum](../../sql-reference/data-types/enum.md) | | STRING | [String](../../sql-reference/data-types/string.md) | | VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) | | BLOB | [String](../../sql-reference/data-types/string.md) | +| GEOMETRY | [String](../../sql-reference/data-types/string.md) | | BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) | | BIT | [UInt64](../../sql-reference/data-types/int-uint.md) | +| SET | [UInt64](../../sql-reference/data-types/int-uint.md) | [Nullable](../../sql-reference/data-types/nullable.md) is supported. +The data of TIME type in MySQL is converted to microseconds in ClickHouse. + Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication. ## Specifics and Recommendations {#specifics-and-recommendations} diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md index 79ba06096b23..e1d571c909c0 100644 --- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md @@ -97,13 +97,16 @@ Structure of the `patterns` section: ``` text pattern + rule_type regexp function pattern + rule_type regexp age + precision ... pattern + rule_type regexp function age + precision @@ -127,12 +130,20 @@ When processing a row, ClickHouse checks the rules in the `pattern` sections. Ea Fields for `pattern` and `default` sections: -- `regexp`– A pattern for the metric name. +- `rule_type` - a rule's type. It's applied only to a particular metrics. The engine use it to separate plain and tagged metrics. Optional parameter. Default value: `all`. +It's unnecessary when performance is not critical, or only one metrics type is used, e.g. plain metrics. By default only one type of rules set is created. Otherwise, if any of special types is defined, two different sets are created. One for plain metrics (root.branch.leaf) and one for tagged metrics (root.branch.leaf;tag1=value1). +The default rules are ended up in both sets. +Valid values: + - `all` (default) - a universal rule, used when `rule_type` is omitted. + - `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression. + - `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression. + - `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`. +- `regexp` – A pattern for the metric name (a regular or DSL). - `age` – The minimum age of the data in seconds. - `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). - `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages. -### Configuration Example {#configuration-example} +### Configuration Example without rules types {#configuration-example} ``` xml @@ -167,6 +178,81 @@ Fields for `pattern` and `default` sections: ``` +### Configuration Example with rules types {#configuration-typed-example} + +``` xml + + Version + + plain + click_cost + any + + 0 + 5 + + + 86400 + 60 + + + + tagged + ^((.*)|.)min\? + min + + 0 + 5 + + + 86400 + 60 + + + + tagged + + min + + 0 + 5 + + + 86400 + 60 + + + + tag_list + someName;tag2=value2 + + 0 + 5 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + +``` + + !!! warning "Warning" Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index 884774cbfaed..d1f92d347a44 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -54,10 +54,8 @@ If the set of columns in the Buffer table does not match the set of columns in a If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared. The same thing happens if the subordinate table does not exist when the buffer is flushed. -If you need to run ALTER for a subordinate table, and the Buffer table, we recommend first deleting the Buffer table, running ALTER for the subordinate table, then creating the Buffer table again. - !!! attention "Attention" - Running ALTER on the Buffer table in releases made before 28 Sep 2020 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table. + Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table. If the server is restarted abnormally, the data in the buffer is lost. diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md index d845b8c5898f..891e1ea464ef 100644 --- a/docs/en/faq/index.md +++ b/docs/en/faq/index.md @@ -25,6 +25,7 @@ Categories: - **[Operations](../faq/operations/index.md)** - [Which ClickHouse version to use in production?](../faq/operations/production.md) - [Is it possible to delete old records from a ClickHouse table?](../faq/operations/delete-old-data.md) + - [Does ClickHouse support multi-region replication?](../faq/operations/multi-region-replication.md) - **[Integration](../faq/integration/index.md)** - [How do I export data from ClickHouse to a file?](../faq/integration/file-export.md) - [What if I have a problem with encodings when connecting to Oracle via ODBC?](../faq/integration/oracle-odbc.md) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index f8f6f26d2085..d72fb4d6f176 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -23,11 +23,13 @@ Web UI can be accessed here: `http://localhost:8123/play`. ![Web UI](../images/play.png) -In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. +In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. See also `/replicas_status` to check replica's delay. ``` bash $ curl 'http://localhost:8123/ping' Ok. +$ curl 'http://localhost:8123/replicas_status' +Ok. ``` Send the request as a URL ‘query’ parameter, or as a POST. Or send the beginning of the query in the ‘query’ parameter, and the rest in the POST (we’ll explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries. diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index a116c8e22222..8d1ff12cf0ad 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -27,6 +27,7 @@ toc_title: Client Libraries - Go - [clickhouse](https://github.com/kshvakov/clickhouse/) - [go-clickhouse](https://github.com/roistat/go-clickhouse) + - [chconn](https://github.com/vahid-sohrabloo/chconn) - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) - Swift diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index b67e373be354..5efa1b971bcb 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -105,10 +105,13 @@ toc_title: Adopters | MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) | | MUX | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) | | MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| Muse Group | Music Software | Performance Monitoring | — | — | [Blog post in Russian, January 2021](https://habr.com/en/post/647079/) | | Netskope | Network Security | — | — | — | [Job advertisement, March 2021](https://www.mendeley.com/careers/job/senior-software-developer-backend-developer-1346348) | | NIC Labs | Network Monitoring | RaTA-DNS | — | — | [Blog post, March 2021](https://niclabs.cl/ratadns/2021/03/Clickhouse) | +| NLMK | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) | | NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | | Noction | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability) +| ntop | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) | | Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | | Ok.ru | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | | Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | @@ -190,5 +193,6 @@ toc_title: Adopters | Цифровой Рабочий | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) | | ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | +| АС "Стрела" | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index fcfc675f9d7d..35ec5d858f57 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -108,7 +108,13 @@ Examples of configuration for quorum with three nodes can be found in [integrati ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with: ```bash -clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon +clickhouse-keeper --config /etc/your_path_to_config/config.xml +``` + +If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as argument: + +```bash +clickhouse keeper --config /etc/your_path_to_config/config.xml ``` ## Four Letter Word Commands {#four-letter-word-commands} diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md index f326762a6107..da84c1f6a89e 100644 --- a/docs/en/operations/external-authenticators/kerberos.md +++ b/docs/en/operations/external-authenticators/kerberos.md @@ -51,6 +51,9 @@ With filtering by realm: ``` +!!! warning "Note" + You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication. + !!! warning "Note" `principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication. diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 9244592d515d..72cfa59b8b21 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -27,7 +27,7 @@ To analyze the `trace_log` system table: For security reasons, introspection functions are disabled by default. -- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. +- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md index eb1824a6f666..e2135e4beb6d 100644 --- a/docs/en/operations/system-tables/stack_trace.md +++ b/docs/en/operations/system-tables/stack_trace.md @@ -2,7 +2,7 @@ Contains stack traces of all server threads. Allows developers to introspect the server state. -To analyze stack frames, use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md). +To analyze stack frames, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md). Columns: diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 4902b09004d6..ab08ef7415c8 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -4,7 +4,7 @@ Contains stack traces collected by the sampling query profiler. ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. -To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions. +To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` introspection functions. Columns: diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 477d3b529652..64e65575f3f3 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -129,6 +129,10 @@ If you want to divide an existing ZooKeeper cluster into two, the correct way is Do not run ZooKeeper on the same servers as ClickHouse. Because ZooKeeper is very sensitive for latency and ClickHouse may utilize all available system resources. +You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers. + +Do not change `minSessionTimeout` setting, large values may affect ClickHouse restart stability. + With the default settings, ZooKeeper is a time bomb: > The ZooKeeper server won’t delete files from old snapshots and logs when using the default configuration (see autopurge), and this is the responsibility of the operator. diff --git a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md new file mode 100644 index 000000000000..7d016f428191 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md @@ -0,0 +1,70 @@ +--- +toc_priority: 303 +toc_title: meanZTest +--- + +# meanZTest {#meanztest} + +Applies mean z-test to samples from two populations. + +**Syntax** + +``` sql +meanZTest(population_variance_x, population_variance_y, confidence_level)(sample_data, sample_index) +``` + +Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. +The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance and the variances are known. + +**Arguments** + +- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Parameters** + +- `population_variance_x` — Variance for population x. [Float](../../../sql-reference/data-types/float.md). +- `population_variance_y` — Variance for population y. [Float](../../../sql-reference/data-types/float.md). +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). + +**Returned values** + +[Tuple](../../../sql-reference/data-types/tuple.md) with four elements: + +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Input table: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 21.9 │ 0 │ +│ 22.1 │ 0 │ +│ 18.9 │ 1 │ +│ 19 │ 1 │ +│ 20.3 │ 1 │ +└─────────────┴──────────────┘ +``` + +Query: + +``` sql +SELECT meanZTest(0.7, 0.45, 0.95)(sample_data, sample_index) FROM mean_ztest +``` + +Result: + +``` text +┌─meanZTest(0.7, 0.45, 0.95)(sample_data, sample_index)────────────────────────────┐ +│ (3.2841296025548123,0.0010229786769086013,0.8198428246768334,3.2468238419898365) │ +└──────────────────────────────────────────────────────────────────────────────────┘ +``` + + +[Original article](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/meanZTest/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index fd391298bc3a..7d8d255e15be 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -10,7 +10,7 @@ Applies Student's t-test to samples from two populations. **Syntax** ``` sql -studentTTest(sample_data, sample_index) +studentTTest([confidence_level])(sample_data, sample_index) ``` Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. @@ -21,12 +21,19 @@ The null hypothesis is that means of populations are equal. Normal distribution - `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). - `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +**Parameters** + +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). + + **Returned values** -[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: +[Tuple](../../../sql-reference/data-types/tuple.md) with two or four elements (if the optional `confidence_level` is specified): - calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). - calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index 62f5761b32e5..282d5c04d602 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -10,7 +10,7 @@ Applies Welch's t-test to samples from two populations. **Syntax** ``` sql -welchTTest(sample_data, sample_index) +welchTTest([confidence_level])(sample_data, sample_index) ``` Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. @@ -21,12 +21,18 @@ The null hypothesis is that means of populations are equal. Normal distribution - `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). - `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +**Parameters** + +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). + **Returned values** -[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: +[Tuple](../../../sql-reference/data-types/tuple.md) with two two or four elements (if the optional `confidence_level` is specified) - calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). - calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md). **Example** diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index bee77a382d78..0d1c4535b282 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -159,7 +159,7 @@ Configuration fields: | Tag | Description | Required | |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| | `name` | Column name. | Yes | -| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes | +| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md),[Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes | | `null_value` | Default value for a non-existing element.
In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description). | Yes | | `expression` | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | | `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).

Default value: `false`. | No | diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 899b6385a3e4..24adb362c987 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -117,6 +117,59 @@ Result: ## bitRotateRight(a, b) {#bitrotaterighta-b} +## bitSlice(s, offset, length) + +Returns a substring starting with the bit from the ‘offset’ index that is ‘length’ bits long. bits indexing starts from +1 + +**Syntax** + +``` sql +bitSlice(s, offset[, length]) +``` + +**Arguments** + +- `s` — s is [String](../../sql-reference/data-types/string.md) + or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an + indent on the right. Numbering of the bits begins with 1. +- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring [ + offset, array_length - length). If you omit the value, the function returns the substring [offset, the_end_string]. + If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right. + +**Returned value** + +- The substring. [String](../../sql-reference/data-types/string.md) + +**Example** + +Query: + +``` sql +select bin('Hello'), bin(bitSlice('Hello', 1, 8)) +select bin('Hello'), bin(bitSlice('Hello', 1, 2)) +select bin('Hello'), bin(bitSlice('Hello', 1, 9)) +select bin('Hello'), bin(bitSlice('Hello', -4, 8)) +``` + +Result: + +``` text +┌─bin('Hello')─────────────────────────────┬─bin(bitSlice('Hello', 1, 8))─┐ +│ 0100100001100101011011000110110001101111 │ 01001000 │ +└──────────────────────────────────────────┴──────────────────────────────┘ +┌─bin('Hello')─────────────────────────────┬─bin(bitSlice('Hello', 1, 2))─┐ +│ 0100100001100101011011000110110001101111 │ 01000000 │ +└──────────────────────────────────────────┴──────────────────────────────┘ +┌─bin('Hello')─────────────────────────────┬─bin(bitSlice('Hello', 1, 9))─┐ +│ 0100100001100101011011000110110001101111 │ 0100100000000000 │ +└──────────────────────────────────────────┴──────────────────────────────┘ +┌─bin('Hello')─────────────────────────────┬─bin(bitSlice('Hello', -4, 8))─┐ +│ 0100100001100101011011000110110001101111 │ 11110000 │ +└──────────────────────────────────────────┴───────────────────────────────┘ +``` + ## bitTest {#bittest} Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 69dd14da1bfc..ec1524f1fa38 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -93,6 +93,8 @@ For [String](../../sql-reference/data-types/string.md) and [FixedString](../../s Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. +Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string. + **Arguments** - `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). @@ -147,6 +149,21 @@ Result: └──────────────────┘ ``` +Query: + +``` sql +SELECT lower(hex(toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'))) as uuid_hex +``` + +Result: + +``` text +┌─uuid_hex─────────────────────────┐ +│ 61f0c4045cb311e7907ba6006ad3dba0 │ +└──────────────────────────────────┘ +``` + + ## unhex {#unhexstr} Performs the opposite operation of [hex](#hex). It interprets each pair of hexadecimal digits (in the argument) as a number and converts it to the byte represented by the number. The return value is a binary string (BLOB). @@ -224,6 +241,8 @@ For [String](../../sql-reference/data-types/string.md) and [FixedString](../../s Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. +Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string. + **Arguments** - `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md). @@ -280,6 +299,21 @@ Result: └──────────────────────────────────────────────────────────────────┘ ``` +Query: + +``` sql +SELECT bin(toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0')) as bin_uuid +``` + +Result: + +``` text +┌─bin_uuid─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 01100001111100001100010000000100010111001011001100010001111001111001000001111011101001100000000001101010110100111101101110100000 │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + + ## unbin {#unbinstr} Interprets each pair of binary digits (in the argument) as a number and converts it to the byte represented by the number. The functions performs the opposite operation to [bin](#bin). diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 19246ee94fea..ecbe00adfd78 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -156,6 +156,40 @@ Result: └─────────────┘ ``` +## h3EdgeLengthKm {#h3edgelengthkm} + +Calculates the average length of the [H3](#h3index) hexagon edge in kilometers. + +**Syntax** + +``` sql +h3EdgeLengthKm(resolution) +``` + +**Parameter** + +- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. + +**Returned values** + +- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3EdgeLengthKm(15) AS edgeLengthKm; +``` + +Result: + +``` text +┌─edgeLengthKm─┐ +│ 0.000509713 │ +└──────────────┘ +``` + ## geoToH3 {#geotoh3} Returns [H3](#h3index) point index `(lon, lat)` with specified resolution. @@ -197,7 +231,7 @@ Result: ## h3ToGeo {#h3togeo} -Returns the geographical coordinates of longitude and latitude corresponding to the provided [H3](#h3index) index. +Returns the centroid longitude and latitude corresponding to the provided [H3](#h3index) index. **Syntax** @@ -278,7 +312,7 @@ h3kRing(h3index, k) **Arguments** - `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Raduis. Type: [integer](../../../sql-reference/data-types/int-uint.md) +- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md) **Returned values** @@ -849,4 +883,147 @@ Result: └────────────────────┘ ``` +## h3ExactEdgeLengthM {#h3exactedgelengthm} + +Returns the exact edge length of the unidirectional edge represented by the input h3 index in meters. + +**Syntax** + +``` sql +h3ExactEdgeLengthM(index) +``` + +**Parameter** + +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Exact edge length in meters. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3ExactEdgeLengthM(1310277011704381439) AS exactEdgeLengthM;; +``` + +Result: + +``` text +┌───exactEdgeLengthM─┐ +│ 195449.63163407316 │ +└────────────────────┘ +``` + +## h3ExactEdgeLengthKm {#h3exactedgelengthkm} + +Returns the exact edge length of the unidirectional edge represented by the input h3 index in kilometers. + +**Syntax** + +``` sql +h3ExactEdgeLengthKm(index) +``` + +**Parameter** + +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Exact edge length in kilometers. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3ExactEdgeLengthKm(1310277011704381439) AS exactEdgeLengthKm;; +``` + +Result: + +``` text +┌──exactEdgeLengthKm─┐ +│ 195.44963163407317 │ +└────────────────────┘ +``` + +## h3ExactEdgeLengthRads {#h3exactedgelengthrads} + +Returns the exact edge length of the unidirectional edge represented by the input h3 index in radians. + +**Syntax** + +``` sql +h3ExactEdgeLengthRads(index) +``` + +**Parameter** + +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Exact edge length in radians. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3ExactEdgeLengthRads(1310277011704381439) AS exactEdgeLengthRads;; +``` + +Result: + +``` text +┌──exactEdgeLengthRads─┐ +│ 0.030677980118976447 │ +└──────────────────────┘ +``` + +## h3NumHexagons {#h3numhexagons} + +Returns the number of unique H3 indices at the given resolution. + +**Syntax** + +``` sql +h3NumHexagons(resolution) +``` + +**Parameter** + +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Number of H3 indices. + +Type: [Int64](../../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT h3NumHexagons(3) AS numHexagons; +``` + +Result: + +``` text +┌─numHexagons─┐ +│ 41162 │ +└─────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 21b570c65d4e..1be68c6bdd42 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -113,6 +113,111 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so /build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97 ``` +## addressToLineWithInlines {#addresstolinewithinlines} + +Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price. + +If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. + +**Syntax** + +``` sql +addressToLineWithInlines(address_of_binary_instruction) +``` + +**Arguments** + +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. + +**Returned value** + +- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. + +- Array with single element which is name of a binary, if the function couldn’t find the debug information. + +- Empty array, if the address is not valid. + +Type: [Array(String)](../../sql-reference/data-types/array.md). + +**Example** + +Enabling introspection functions: + +``` sql +SET allow_introspection_functions=1; +``` + +Applying the function to address. + +```sql +SELECT addressToLineWithInlines(531055181::UInt64); +``` + +``` text +┌─addressToLineWithInlines(CAST('531055181', 'UInt64'))────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Applying the function to the whole stack trace: + +``` sql +SELECT + ta, addressToLineWithInlines(arrayJoin(trace) as ta) +FROM system.trace_log +WHERE + query_id = '5e173544-2020-45de-b645-5deebe2aae54'; +``` + +The [arrayJoin](../../sql-reference/functions/array-functions.md#array-functions-join) functions will split array to rows. + +``` text +┌────────ta─┬─addressToLineWithInlines(arrayJoin(trace))───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 365497529 │ ['./build_normal_debug/./contrib/libcxx/include/string_view:252'] │ +│ 365593602 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:191'] │ +│ 365593866 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365592528 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365591003 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:477'] │ +│ 365590479 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:442'] │ +│ 365590600 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:457'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365597289 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:807'] │ +│ 365599840 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:1118'] │ +│ 531058145 │ ['./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:152'] │ +│ 531055181 │ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │ +│ 422333613 │ ['./build_normal_debug/./src/Functions/IFunctionAdaptors.h:21'] │ +│ 586866022 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:216'] │ +│ 586869053 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:264'] │ +│ 586873237 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:334'] │ +│ 597901620 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:601'] │ +│ 597898534 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:718'] │ +│ 630442912 │ ['./build_normal_debug/./src/Processors/Transforms/ExpressionTransform.cpp:23'] │ +│ 546354050 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.h:38'] │ +│ 626026993 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.cpp:89'] │ +│ 626294022 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:45'] │ +│ 626293730 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:63'] │ +│ 626169525 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:213'] │ +│ 626170308 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:178'] │ +│ 626166348 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:329'] │ +│ 626163461 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:84'] │ +│ 626323536 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:85'] │ +│ 626323277 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:112'] │ +│ 626323133 │ ['./build_normal_debug/./contrib/libcxx/include/type_traits:3682'] │ +│ 626323041 │ ['./build_normal_debug/./contrib/libcxx/include/tuple:1415'] │ +└───────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + +``` + + ## addressToSymbol {#addresstosymbol} Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files. diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 2b3c000bc19e..a5fc07cf6879 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -477,3 +477,74 @@ Result: └──────────┘ ``` +## degrees(x) {#degreesx} + +Converts the input value in radians to degrees. + +**Syntax** + +``` sql +degrees(x) +``` + +**Arguments** + +- `x` — Input in radians. [Float64](../../sql-reference/data-types/float.md#float32-float64). + +**Returned value** + +- Value in degrees. + +Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). + +**Example** + +Query: + +``` sql +SELECT degrees(3.141592653589793); +``` + +Result: + +``` text +┌─degrees(3.141592653589793)─┐ +│ 180 │ +└────────────────────────────┘ +``` + +## radians(x) {#radiansx} + +Converts the input value in degrees to radians. + +**Syntax** + +``` sql +radians(x) +``` + +**Arguments** + +- `x` — Input in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64). + +**Returned value** + +- Value in radians. + +Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). + +**Example** + +Query: + +``` sql +SELECT radians(180); +``` + +Result: + +``` text +┌──────radians(180)─┐ +│ 3.141592653589793 │ +└───────────────────┘ +``` diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index bc89b11ae4d8..c9ad40860f70 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -46,7 +46,7 @@ CHECK TABLE test_table; └───────────┴───────────┴─────────┘ ``` -If `check_query_single_value_result` = 0, the `CHECK TABLE` query shows the general table check status. +If `check_query_single_value_result` = 1, the `CHECK TABLE` query shows the general table check status. ```sql SET check_query_single_value_result = 1; diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index c3e54545549e..7bbbb6f32bd1 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -252,7 +252,6 @@ CREATE TABLE codec_example ENGINE = MergeTree() ``` - ### Encryption Codecs {#create-query-encryption-codecs} These codecs don't actually compress data, but instead encrypt data on disk. These are only available when an encryption key is specified by [encryption](../../../operations/server-configuration-parameters/settings.md#server-settings-encryption) settings. Note that encryption only makes sense at the end of codec pipelines, because encrypted data usually can't be compressed in any meaningful way. @@ -260,6 +259,7 @@ These codecs don't actually compress data, but instead encrypt data on disk. The Encryption codecs: - `CODEC('AES-128-GCM-SIV')` — Encrypts data with AES-128 in [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV mode. + - `CODEC('AES-256-GCM-SIV')` — Encrypts data with AES-256 in GCM-SIV mode. These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content). @@ -269,7 +269,7 @@ These codecs use a fixed nonce and encryption is therefore deterministic. This m !!! attention "Attention" If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging. - + **Example** ```sql diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index dfa065f5d0a1..5dfcf8914399 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -43,7 +43,7 @@ User host is a host from which a connection to ClickHouse server could be establ - `HOST ANY` — User can connect from any location. This is a default option. - `HOST LOCAL` — User can connect only locally. - `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`. -- `HOST NAME REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST NAME REGEXP '.*\.mysite\.com'`. +- `HOST REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST REGEXP '.*\.mysite\.com'`. - `HOST LIKE 'template'` — Allows you to use the [LIKE](../../../sql-reference/functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain. Another way of specifying host is to use `@` syntax following the username. Examples: diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 2b1262f7d3c9..1b2b63ba0e79 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -172,6 +172,7 @@ Hierarchy of privileges: - `SYSTEM FLUSH LOGS` - [INTROSPECTION](#grant-introspection) - `addressToLine` + - `addressToLineWithInlines` - `addressToSymbol` - `demangle` - [SOURCES](#grant-sources) @@ -430,6 +431,7 @@ Allows using [introspection](../../operations/optimizing-performance/sampling-qu - `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` - `addressToLine`. Level: `GLOBAL` + - `addressToLineWithInlines`. Level: `GLOBAL` - `addressToSymbol`. Level: `GLOBAL` - `demangle`. Level: `GLOBAL` diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index ee6893812cc3..b24f0213e4e3 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -285,7 +285,7 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_ `WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings. When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`. When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`. -When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type. +When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals. When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type. Example of a query without `WITH FILL`: @@ -402,4 +402,85 @@ Result: └────────────┴────────────┴──────────┘ ``` +The following query uses the `INTERVAL` data type of 1 day for each data filled on column `d1`: + +``` sql +SELECT + toDate((number * 10) * 86400) AS d1, + toDate(number * 86400) AS d2, + 'original' AS source +FROM numbers(10) +WHERE (number % 3) = 1 +ORDER BY + d1 WITH FILL STEP INTERVAL 1 DAY, + d2 WITH FILL; +``` + +Result: +``` +┌─────────d1─┬─────────d2─┬─source───┐ +│ 1970-01-11 │ 1970-01-02 │ original │ +│ 1970-01-12 │ 1970-01-01 │ │ +│ 1970-01-13 │ 1970-01-01 │ │ +│ 1970-01-14 │ 1970-01-01 │ │ +│ 1970-01-15 │ 1970-01-01 │ │ +│ 1970-01-16 │ 1970-01-01 │ │ +│ 1970-01-17 │ 1970-01-01 │ │ +│ 1970-01-18 │ 1970-01-01 │ │ +│ 1970-01-19 │ 1970-01-01 │ │ +│ 1970-01-20 │ 1970-01-01 │ │ +│ 1970-01-21 │ 1970-01-01 │ │ +│ 1970-01-22 │ 1970-01-01 │ │ +│ 1970-01-23 │ 1970-01-01 │ │ +│ 1970-01-24 │ 1970-01-01 │ │ +│ 1970-01-25 │ 1970-01-01 │ │ +│ 1970-01-26 │ 1970-01-01 │ │ +│ 1970-01-27 │ 1970-01-01 │ │ +│ 1970-01-28 │ 1970-01-01 │ │ +│ 1970-01-29 │ 1970-01-01 │ │ +│ 1970-01-30 │ 1970-01-01 │ │ +│ 1970-01-31 │ 1970-01-01 │ │ +│ 1970-02-01 │ 1970-01-01 │ │ +│ 1970-02-02 │ 1970-01-01 │ │ +│ 1970-02-03 │ 1970-01-01 │ │ +│ 1970-02-04 │ 1970-01-01 │ │ +│ 1970-02-05 │ 1970-01-01 │ │ +│ 1970-02-06 │ 1970-01-01 │ │ +│ 1970-02-07 │ 1970-01-01 │ │ +│ 1970-02-08 │ 1970-01-01 │ │ +│ 1970-02-09 │ 1970-01-01 │ │ +│ 1970-02-10 │ 1970-01-05 │ original │ +│ 1970-02-11 │ 1970-01-01 │ │ +│ 1970-02-12 │ 1970-01-01 │ │ +│ 1970-02-13 │ 1970-01-01 │ │ +│ 1970-02-14 │ 1970-01-01 │ │ +│ 1970-02-15 │ 1970-01-01 │ │ +│ 1970-02-16 │ 1970-01-01 │ │ +│ 1970-02-17 │ 1970-01-01 │ │ +│ 1970-02-18 │ 1970-01-01 │ │ +│ 1970-02-19 │ 1970-01-01 │ │ +│ 1970-02-20 │ 1970-01-01 │ │ +│ 1970-02-21 │ 1970-01-01 │ │ +│ 1970-02-22 │ 1970-01-01 │ │ +│ 1970-02-23 │ 1970-01-01 │ │ +│ 1970-02-24 │ 1970-01-01 │ │ +│ 1970-02-25 │ 1970-01-01 │ │ +│ 1970-02-26 │ 1970-01-01 │ │ +│ 1970-02-27 │ 1970-01-01 │ │ +│ 1970-02-28 │ 1970-01-01 │ │ +│ 1970-03-01 │ 1970-01-01 │ │ +│ 1970-03-02 │ 1970-01-01 │ │ +│ 1970-03-03 │ 1970-01-01 │ │ +│ 1970-03-04 │ 1970-01-01 │ │ +│ 1970-03-05 │ 1970-01-01 │ │ +│ 1970-03-06 │ 1970-01-01 │ │ +│ 1970-03-07 │ 1970-01-01 │ │ +│ 1970-03-08 │ 1970-01-01 │ │ +│ 1970-03-09 │ 1970-01-01 │ │ +│ 1970-03-10 │ 1970-01-01 │ │ +│ 1970-03-11 │ 1970-01-01 │ │ +│ 1970-03-12 │ 1970-01-08 │ original │ +└────────────┴────────────┴──────────┘ +``` + [Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 23d57c225866..b71853f29dd5 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -72,7 +72,7 @@ Reloads all [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-mo **Syntax** ```sql -SYSTEM RELOAD MODELS +SYSTEM RELOAD MODELS [ON CLUSTER cluster_name] ``` ## RELOAD MODEL {#query_language-system-reload-model} @@ -82,7 +82,7 @@ Completely reloads a CatBoost model `model_name` if the configuration was update **Syntax** ```sql -SYSTEM RELOAD MODEL +SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] ``` ## RELOAD FUNCTIONS {#query_language-system-reload-functions} @@ -92,8 +92,8 @@ Reloads all registered [executable user defined functions](../functions/index.md **Syntax** ```sql -RELOAD FUNCTIONS -RELOAD FUNCTION function_name +RELOAD FUNCTIONS [ON CLUSTER cluster_name] +RELOAD FUNCTION [ON CLUSTER cluster_name] function_name ``` ## DROP DNS CACHE {#query_language-system-drop-dns-cache} diff --git a/docs/en/sql-reference/statements/use.md b/docs/en/sql-reference/statements/use.md index 41cba58bb9dd..841c23d333dd 100644 --- a/docs/en/sql-reference/statements/use.md +++ b/docs/en/sql-reference/statements/use.md @@ -3,14 +3,14 @@ toc_priority: 53 toc_title: USE --- -# USE 语句 {#use} +# USE Statement {#use} ``` sql USE db ``` -用于设置会话的当前数据库。 +Lets you set the current database for the session. -如果查询语句中没有在表名前面以加点的方式指明数据库名, 则用当前数据库进行搜索。 +The current database is used for searching for tables if the database is not explicitly defined in the query with a dot before the table name. -使用 HTTP 协议时无法进行此查询,因为没有会话的概念。 +This query can’t be made when using the HTTP protocol, since there is no concept of a session. diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 207b2b82cd2b..19efef3dc6a4 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -30,7 +30,7 @@ There may be any number of space symbols between syntactical constructions (incl ClickHouse supports either SQL-style and C-style comments: -- SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted. +- SQL-style comments start with `--`, `#!` or `# ` and continue to the end of the line, a space after `--` and `#!` can be omitted. - C-style are from `/*` to `*/`and can be multiline, spaces are not required either. ## Keywords {#syntax-keywords} @@ -106,9 +106,9 @@ In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/ ### Heredoc {#heredeoc} -A [heredoc](https://en.wikipedia.org/wiki/Here_document) is a way to define a string (often multiline), while maintaining the original formatting. A heredoc is defined as a custom string literal, placed between two `$` symbols, for example `$heredoc$`. A value between two heredocs is processed "as-is". +A [heredoc](https://en.wikipedia.org/wiki/Here_document) is a way to define a string (often multiline), while maintaining the original formatting. A heredoc is defined as a custom string literal, placed between two `$` symbols, for example `$heredoc$`. A value between two heredocs is processed "as-is". -You can use a heredoc to embed snippets of SQL, HTML, or XML code, etc. +You can use a heredoc to embed snippets of SQL, HTML, or XML code, etc. **Example** diff --git a/docs/ko/images/column-oriented.gif b/docs/ko/images/column-oriented.gif new file mode 100644 index 000000000000..d5ac7c82848c Binary files /dev/null and b/docs/ko/images/column-oriented.gif differ diff --git a/docs/ko/images/logo.svg b/docs/ko/images/logo.svg new file mode 100644 index 000000000000..b5ab923ff653 --- /dev/null +++ b/docs/ko/images/logo.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/ko/images/play.png b/docs/ko/images/play.png new file mode 100644 index 000000000000..b75aebe40895 Binary files /dev/null and b/docs/ko/images/play.png differ diff --git a/docs/ko/images/row-oriented.gif b/docs/ko/images/row-oriented.gif new file mode 100644 index 000000000000..41395b5693e9 Binary files /dev/null and b/docs/ko/images/row-oriented.gif differ diff --git a/docs/ko/index.md b/docs/ko/index.md new file mode 100644 index 000000000000..f2a6396c0697 --- /dev/null +++ b/docs/ko/index.md @@ -0,0 +1,94 @@ +--- +toc_priority: 0 +toc_title: 목차 +--- + +# ClickHouse란? {#what-is-clickhouse} + +ClickHouse® 는 query의 온라인 분석 처리(OLAP)를 위한 열 지향(column-oriented) 데이터베이스 관리 시스템(DBMS)입니다. + +"보통의" 행 지향(row-oriented) DMBS에서는 데이터가 다음과 같은 순서로 저장됩니다. + +| row | WatchID | JavaEnable | Title | GoodEvent | EventTime | +|-----|-------------|------------|--------------------|-----------|---------------------| +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | … | … | … | … | … | + +즉, 행과 관련된 모든 값들은 물리적으로 나란히 저장됩니다. + +행 지향(row-oriented) DMBS의 예시로는 MySQL, Postgres, 그리고 MS SQL 서버 등이 있습니다. + +열 지향 (column-oriented) DBMS에서는 데이터가 아래와 같은 방식으로 저장됩니다: + +| Row: | #0 | #1 | #2 | #N | +|-------------|---------------------|---------------------|---------------------|-----| +| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | +| JavaEnable: | 1 | 0 | 1 | … | +| Title: | Investor Relations | Contact us | Mission | … | +| GoodEvent: | 1 | 1 | 1 | … | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | + +이 예에서는 데이터가 정렬된 순서만을 보여줍니다. 다른 열의 값들은 서로 분리되어 저장되고, 같은 열의 정보들은 함께 저장됩니다. + +열 지향(column-oriented) DBMS 의 종류는 Vertica, Paraccel (Actian Matrix and Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise and Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, 그리고 kdb+ 등이 있습니다. + +데이터를 저장하기 위한 서로 다른 순서는 다른 시나리오에 더 적합합니다. 데이터 접근 시나리오는 쿼리가 수행되는 빈도, 비율 및 비율을 나타내거나, 각 쿼리 유형(행, 열 및 바이트)에 대해 읽은 데이터의 양 데이터 읽기와 업데이트 사이의 관계, 데이터의 작업 크기 및 로컬에서 사용되는 방법 트랜잭션이 사용되는지 여부, 트랜잭션이 얼마나 격리되어 있는지, 데이터 복제 및 논리적 무결성에 대한 요구 사항, 각 쿼리 유형에 대한 대기 시간 및 처리량 요구 사항 등이 있습니다. + +시스템의 부하가 높을수록 사용 시나리오의 요구 사항에 맞게 시스템 설정을 사용자 지정하는 것이 더 중요하며 이 사용자 지정은 더욱 세분화됩니다. 상당히 다른 시나리오에 똑같이 적합한 시스템은 없습니다. 만약 높은 부하에서 시스템이 넓은 시나리오 집합에 대해 적응한다면 시스템은 모든 시나리오를 모두 제대로 처리하지 못하거나 가능한 시나리오 중 하나 또는 몇 개에 대해서만 잘 작동할 것입니다. + +## OLAP 시나리오의 중요 속성들 {#key-properties-of-olap-scenario} + +- 요청(request)의 대부분은 읽기 접근에 관한 것입니다. +- 데이터는 단일 행이 아니라 상당히 큰 일괄 처리(\> 1000개 행)로 업데이트됩니다. 또는 전혀 업데이트되지 않습니다. +- 데이터는 DB에 추가되지만 수정되지는 않습니다. +- 읽기의 경우 DB에서 상당히 많은 수의 행이 추출되지만 열은 일부만 추출됩니다. +- 테이블은 "넓습니다". 이는 열의 수가 많다는 것을 의미합니다. +- 쿼리는 상대적으로 드뭅니다(일반적으로 서버당 수백 또는 초당 쿼리 미만). +- 간단한 쿼리의 경우 약 50ms의 대기 시간이 허용됩니다. +- 열 값은 숫자와 짧은 문자열(예: URL당 60바이트)과 같이 상당히 작습니다 +- 단일 쿼리를 처리할 때 높은 처리량이 필요합니다(서버당 초당 최대 수십억 행). +- 트랜잭션이 필요하지 않습니다. +- 데이터 일관성에 대한 요구 사항이 낮습니다. +- 쿼리당 하나의 큰 테이블이 존재하고 하나를 제외한 모든 테이블은 작습니다. +- 쿼리 결과가 원본 데이터보다 훨씬 작습니다. 즉, 데이터가 필터링되거나 집계되므로 결과가 단일 서버의 RAM에 꼭 들어맞습니다. + +OLAP 시나리오가 다른 일반적인 시나리오(OLTP 또는 키-값 액세스와 같은)와 매우 다르다는 것을 쉽게 알 수 있습니다. 따라서 적절한 성능을 얻으려면 분석 쿼리를 처리하기 위해 OLTP 또는 키-값 DB를 사용하는 것은 의미가 없습니다. 예를 들어 분석에 MongoDB나 Redis를 사용하려고 하면 OLAP 데이터베이스에 비해 성능이 매우 저하됩니다. + +## 왜 열 지향 데이터베이스가 OLAP 시나리오에 적합한가{#why-column-oriented-databases-work-better-in-the-olap-scenario} + +열 지향(column-oriented) 데이터베이스는 OLAP 시나리오에 더 적합합니다. 대부분의 쿼리를 처리하는 데 있어서 행 지향(row-oriented) 데이터베이스보다 100배 이상 빠릅니다. 그 이유는 아래에 자세히 설명되어 있지만 사실은 시각적으로 더 쉽게 설명할 수 있습니다. + +**행 지향 DBMS** + +![Row-oriented](images/row-oriented.gif#) + +**열 지향 DBMS** + +![Column-oriented](images/column-oriented.gif#) + +차이가 보이시나요? + +### 입출력 {#inputoutput} + +1. 분석 쿼리의 경우 적은 수의 테이블 열만 읽어야 합니다. 열 지향 데이터베이스에서는 필요한 데이터만 읽을 수 있습니다. 예를 들어 100개 중 5개의 열이 필요한 경우 I/O가 20배 감소할 것으로 예상할 수 있습니다. +2. 데이터는 패킷으로 읽히므로 압축하기가 더 쉽습니다. 열의 데이터도 압축하기 쉽습니다. 이것은 I/O의 볼륨을 더욱 감소시킵니다. +3. 감소된 I/O로 인해 시스템 캐시에 더 많은 데이터가 들어갑니다. + +예를 들어, "각 광고 플랫폼에 대한 레코드 수 계산" 쿼리는 압축되지 않은 1바이트를 차지하는 하나의 "광고 플랫폼 ID" 열을 읽어야 합니다. 트래픽의 대부분이 광고 플랫폼에서 발생하지 않은 경우 이 열의 최소 10배 압축을 기대할 수 있습니다. 빠른 압축 알고리즘을 사용하면 초당 최소 몇 기가바이트의 압축되지 않은 데이터의 속도로 데이터 압축 해제가 가능합니다. 즉, 이 쿼리는 단일 서버에서 초당 약 수십억 행의 속도로 처리될 수 있습니다. 이 속도는 정말 실제로 달성됩니다. + +### CPU {#cpu} + +쿼리를 수행하려면 많은 행을 처리해야 하므로 별도의 행이 아닌 전체 벡터에 대한 모든 연산을 디스패치하거나 쿼리 엔진을 구현하여 디스패치 비용이 거의 들지 않습니다. 반쯤 괜찮은 디스크 하위 시스템에서 이렇게 하지 않으면 쿼리 인터프리터가 불가피하게 CPU를 정지시킵니다. 데이터를 열에 저장하고 가능한 경우 열별로 처리하는 것이 좋습니다. + +이를 수행하기위한 두가지 방법이 있습니다. + +1. 벡터 엔진. 모든 연산은 별도의 값 대신 벡터에 대해 작성됩니다. 즉, 작업을 자주 호출할 필요가 없으며 파견 비용도 무시할 수 있습니다. 작업 코드에는 최적화된 내부 주기가 포함되어 있습니다. +2. 코드 생성. 쿼리에 대해 생성된 코드에는 모든 간접 호출이 있습니다. + +이것은 단순한 쿼리를 실행할 때 의미가 없기 때문에 "일반" 데이터베이스에서는 수행되지 않습니다. 그러나 예외가 있습니다. 예를 들어 MemSQL은 코드 생성을 사용하여 SQL 쿼리를 처리할 때 대기 시간을 줄입니다. (비교되게, 분석 DBMS는 대기 시간이 아닌 처리량 최적화가 필요합니다.) + +CPU 효율성을 위해 쿼리 언어는 선언적(SQL 또는 MDX)이거나 최소한 벡터(J, K)여야 합니다. 쿼리는 최적화를 허용하는 암시적 루프만 포함해야 합니다. + +{## [원문](https://clickhouse.com/docs/en/) ##} diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md index a1192b509df0..48d92501f062 100644 --- a/docs/ru/development/build-osx.md +++ b/docs/ru/development/build-osx.md @@ -2,8 +2,13 @@ toc_priority: 65 toc_title: Сборка на Mac OS X --- + # Как собрать ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x} +!!! info "Вам не нужно собирать ClickHouse самостоятельно" + Вы можете установить предварительно собранный ClickHouse, как описано в [Быстром старте](https://clickhouse.com/#quick-start). + Следуйте инструкциям по установке для `macOS (Intel)` или `macOS (Apple Silicon)`. + Сборка должна запускаться с x86_64 (Intel) на macOS версии 10.15 (Catalina) и выше в последней версии компилятора Xcode's native AppleClang, Homebrew's vanilla Clang или в GCC-компиляторах. ## Установка Homebrew {#install-homebrew} diff --git a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md index 9cd8eda0b876..117223127c05 100644 --- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md @@ -99,13 +99,16 @@ patterns ``` text pattern + rule_type regexp function pattern + rule_type regexp age + precision ... pattern + rule_type regexp function age + precision @@ -129,12 +132,20 @@ default Поля для разделов `pattern` и `default`: -- `regexp` – шаблон имени метрики. +- `rule_type` - тип правила (применяется только к метрикам указанных типов), используется для разделения правил проверки плоских/теггированных метрик. Опциональное поле. Значение по умолчанию: `all`. +Если используются метрики только одного типа или производительность проверки правил некритична, можно не использовать. По умолчанию создается только один тип правил для проверки. Иначе, если хотя бы для одного правила указано отличное от умолчания значение, создаются 2 независимых типа правил - для обычных (классические root.branch.leaf) и теггированных метрик (root.branch.leaf;tag1=value1). +Правила по умолчанию попадают в оба правила обоих типов. +Возможные значения: + - `all` (default) - универсальное правило, назначается также по умолчанию, если поле не задано + - `plain` - правило для плоских метрик (без тегов). Поле `regexp` обрабатывается как регулярное выражение. + - `tagged` - правило для теггированных метрик (метрика хранится в БД в формате `someName?tag1=value1&tag2=value2&tag3=value3`), регулярное выражение должно быть отсортированно по именам тегов, первым - значение тега `__name__`, если есть. Поле `regexp` обрабатывается как регулярное выражение. + - `tag_list` - правило для теггированных метрик, простой DSL для упрощения задания регулярного выражения в формате тегов graphite `someName;tag1=value1;tag2=value2`, `someName` или `tag1=value1;tag2=value2`. Поле `regexp` транслируется в правило `tagged`. Cортировать по именам тегов не обязательно, оно отсортируется автоматически. Значение тега (но не имя) может быть регулярным выражением (например `env=(dev|staging)`). +- `regexp` – шаблон имени метрики (регулярное выражение или DSL). - `age` – минимальный возраст данных в секундах. - `precision` – точность определения возраста данных в секундах. Должен быть делителем для 86400 (количество секунд в сутках). - `function` – имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`. Допустимые функции: min/max/any/avg. Avg вычисляется неточно, как среднее от средних. -### Пример конфигурации {#configuration-example} +### Пример конфигурации без разделения типа правил {#configuration-example} ``` xml @@ -169,6 +180,80 @@ default ``` +### Пример конфигурации c разделением типа правил {#configuration-typed-example} + +``` xml + + Version + + plain + click_cost + any + + 0 + 5 + + + 86400 + 60 + + + + tagged + ^((.*)|.)min\? + min + + 0 + 5 + + + 86400 + 60 + + + + tagged + + min + + 0 + 5 + + + 86400 + 60 + + + + tag_list + someName;tag2=value2 + + 0 + 5 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + +``` + !!! warning "Внимание" Прореживание данных производится во время слияний. Обычно для старых партиций слияния не запускаются, поэтому для прореживания надо инициировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 0c1ae591ae3d..10b4b9645a23 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -48,10 +48,8 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен. То же самое происходит, если подчинённая таблица не существует в момент сброса буфера. -Если есть необходимость выполнить ALTER для подчинённой таблицы и для таблицы Buffer, то рекомендуется удалить таблицу Buffer, затем выполнить ALTER подчинённой таблицы, а после создать таблицу Buffer заново. - !!! attention "Внимание" - В релизах до 28 сентября 2020 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена. + В релизах до 26 октября 2021 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) и [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена. При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны. diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index d2cc133e0c91..5d667ef82388 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -105,7 +105,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ```xml - + ``` @@ -118,7 +118,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part 00112233445566778899aabbccddeeff - + 1 diff --git a/docs/ru/sql-reference/functions/geo/h3.md b/docs/ru/sql-reference/functions/geo/h3.md index 8f7b98f0a459..78e7bf2fa863 100644 --- a/docs/ru/sql-reference/functions/geo/h3.md +++ b/docs/ru/sql-reference/functions/geo/h3.md @@ -4,11 +4,11 @@ toc_title: "Функции для работы с индексами H3" # Функции для работы с индексами H3 {#h3index} -[H3](https://eng.uber.com/h3/) — это система геокодирования, которая делит поверхность Земли на равные шестигранные ячейки. Система поддерживает иерархию (вложенность) ячеек, т.е. каждый "родительский" шестигранник может быть поделен на семь одинаковых вложенных "дочерних" шестигранников, и так далее. +[H3](https://eng.uber.com/h3/) — это система геокодирования, которая делит поверхность Земли на равные шестиугольные ячейки. Система поддерживает иерархию (вложенность) ячеек, т.е. каждый "родительский" шестиугольник может быть поделен на семь одинаковых вложенных "дочерних" шестиугольников, и так далее. Уровень вложенности называется "разрешением" и может принимать значение от `0` до `15`, где `0` соответствует "базовым" ячейкам самого верхнего уровня (наиболее крупным). -Для каждой точки, имеющей широту и долготу, можно получить 64-битный индекс H3, соответствующий номеру шестигранной ячейки, где эта точка находится. +Для каждой точки, имеющей широту и долготу, можно получить 64-битный индекс H3, соответствующий номеру шестриугольной ячейки, где эта точка находится. Индексы H3 используются, в основном, для геопозиционирования и расчета расстояний. @@ -24,7 +24,7 @@ h3IsValid(h3index) **Параметр** -- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — идентификатор шестриугольника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md). **Возвращаемые значения** @@ -61,7 +61,7 @@ h3GetResolution(h3index) **Параметр** -- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — идентификатор шестиугольника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md). **Возвращаемые значения** @@ -88,7 +88,7 @@ SELECT h3GetResolution(639821929606596015) AS resolution; ## h3EdgeAngle {#h3edgeangle} -Рассчитывает средний размер стороны шестигранника [H3](#h3index) в градусах. +Рассчитывает средний размер стороны шестиугольника [H3](#h3index) в градусах. **Синтаксис** @@ -102,7 +102,7 @@ h3EdgeAngle(resolution) **Возвращаемое значение** -- Средняя длина стороны шестигранника [H3](#h3index) в градусах. Тип данных: [Float64](../../../sql-reference/data-types/float.md). +- Средняя длина стороны шестиугольника [H3](#h3index) в градусах. Тип данных: [Float64](../../../sql-reference/data-types/float.md). **Пример** @@ -122,7 +122,7 @@ SELECT h3EdgeAngle(10) AS edgeAngle; ## h3EdgeLengthM {#h3edgelengthm} -Рассчитывает средний размер стороны шестигранника [H3](#h3index) в метрах. +Рассчитывает средний размер стороны шестиугольника [H3](#h3index) в метрах. **Синтаксис** @@ -136,7 +136,7 @@ h3EdgeLengthM(resolution) **Возвращаемое значение** -- Средняя длина стороны шестигранника H3 в метрах, тип — [Float64](../../../sql-reference/data-types/float.md). +- Средняя длина стороны шестиугольника H3 в метрах, тип — [Float64](../../../sql-reference/data-types/float.md). **Пример** @@ -172,7 +172,7 @@ geoToH3(lon, lat, resolution) **Возвращаемые значения** -- Порядковый номер шестигранника. +- Порядковый номер шестиугольника. - 0 в случае ошибки. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -195,7 +195,7 @@ SELECT geoToH3(37.79506683, 55.71290588, 15) AS h3Index; ## h3ToGeo {#h3togeo} -Возвращает географические координаты долготы и широты, соответствующие указанному [H3](#h3index)-индексу. +Возвращает географические координаты долготы и широты центра шестиугольника, соответствующие указанному [H3](#h3index)-индексу. **Синтаксис** @@ -265,7 +265,7 @@ SELECT h3ToGeoBoundary(644325524701193974) AS coordinates; ## h3kRing {#h3kring} -Возвращает [H3](#h3index)-индексы шестигранников в радиусе `k` от данного в произвольном порядке. +Возвращает [H3](#h3index)-индексы шестиугольника в радиусе `k` от данного в произвольном порядке. **Синтаксис** @@ -275,7 +275,7 @@ h3kRing(h3index, k) **Аргументы** -- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — идентификатор шестиугольника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md). - `k` — радиус. Тип данных: [целое число](../../../sql-reference/data-types/int-uint.md) **Возвращаемые значения** @@ -607,7 +607,7 @@ h3IsResClassIII(index) **Параметр** -- `index` — порядковый номер шестигранника. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — порядковый номер шестиугольника. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md). **Возвращаемые значения** @@ -644,7 +644,7 @@ h3IsPentagon(index) **Параметр** -- `index` — порядковый номер шестигранника. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — порядковый номер шестиугольника. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md). **Возвращаемые значения** diff --git a/docs/ru/sql-reference/functions/logical-functions.md b/docs/ru/sql-reference/functions/logical-functions.md index 6ba55dca30ff..ac4e226b2d2c 100644 --- a/docs/ru/sql-reference/functions/logical-functions.md +++ b/docs/ru/sql-reference/functions/logical-functions.md @@ -70,7 +70,7 @@ SELECT and(NULL, 1, 10, -2); **Синтаксис** ``` sql -and(val1, val2...) +or(val1, val2...) ``` Чтобы вычислять функцию `or` по короткой схеме, используйте настройку [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation). Если настройка включена, то выражение `vali` вычисляется только для строк, где условие `((NOT val1) AND (NOT val2) AND ... AND (NOT val{i-1}))` верно. Например, при выполнении запроса `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(10)` не будет сгенерировано исключение из-за деления на ноль. diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index 6601276d5739..8567a0ff2db9 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -246,6 +246,46 @@ CREATE TABLE codec_example ENGINE = MergeTree() ``` +### Кодеки шифрования {#create-query-encryption-codecs} + +Эти кодеки не сжимают данные, вместо этого они зашифровывают данные на диске. Воспользоваться кодеками можно, только когда ключ шифрования задан параметрами [шифрования](../../../operations/server-configuration-parameters/settings.md#server-settings-encryption). Обратите внимание: ставить кодеки шифрования имеет смысл в самый конец цепочки кодеков, потому что зашифрованные данные, как правило, нельзя сжать релевантным образом. + +Кодеки шифрования: + +- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV. +- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV. + +Эти кодеки используют фиксированный одноразовый ключ шифрования. Таким образом, это детерминированное шифрование. Оно совместимо с поддерживающими дедупликацию движками, в частности, [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md). Однако у шифрования имеется недостаток: если дважды зашифровать один и тот же блок данных, текст на выходе получится одинаковым, и злоумышленник, у которого есть доступ к диску, заметит эту эквивалентность (при этом доступа к содержимому он не получит). + +!!! attention "Внимание" + Большинство движков, включая семейство `MergeTree`, создают на диске индексные файлы, не применяя кодеки. А значит, в том случае, если зашифрованный столбец индексирован, на диске отобразится незашифрованный текст. + +!!! attention "Внимание" + Если вы выполняете запрос SELECT с упоминанием конкретного значения в зашифрованном столбце (например, при использовании секции WHERE), это значение может появиться в [system.query_log](../../../operations/system-tables/query_log.md). Рекомендуем отключить логирование. + +**Пример** + +```sql +CREATE TABLE mytable +( + x String Codec(AES_128_GCM_SIV) +) +ENGINE = MergeTree ORDER BY x; +``` + +!!!note "Замечание" + Если необходимо применить сжатие, это нужно явно прописать в запросе. Без этого будет выполнено только шифрование данных. + +**Пример** + +```sql +CREATE TABLE mytable +( + x String Codec(Delta, LZ4, AES_128_GCM_SIV) +) +ENGINE = MergeTree ORDER BY x; +``` + ## Временные таблицы {#temporary-tables} ClickHouse поддерживает временные таблицы со следующими характеристиками: diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index f6248d97ba90..5c82424892b0 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -43,7 +43,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] - `HOST ANY` — Пользователь может подключиться с любого хоста. Используется по умолчанию. - `HOST LOCAL` — Пользователь может подключиться только локально. - `HOST NAME 'fqdn'` — Хост задается через FQDN. Например, `HOST NAME 'mysite.com'`. -- `HOST NAME REGEXP 'regexp'` — Позволяет использовать регулярные выражения [pcre](http://www.pcre.org/), чтобы задать хосты. Например, `HOST NAME REGEXP '.*\.mysite\.com'`. +- `HOST REGEXP 'regexp'` — Позволяет использовать регулярные выражения [pcre](http://www.pcre.org/), чтобы задать хосты. Например, `HOST REGEXP '.*\.mysite\.com'`. - `HOST LIKE 'template'` — Позволяет использовать оператор [LIKE](../../functions/string-search-functions.md#function-like) для фильтрации хостов. Например, `HOST LIKE '%'` эквивалентен `HOST ANY`; `HOST LIKE '%.mysite.com'` разрешает подключение со всех хостов в домене `mysite.com`. Также, чтобы задать хост, вы можете использовать `@` вместе с именем пользователя. Примеры: diff --git a/docs/ru/sql-reference/syntax.md b/docs/ru/sql-reference/syntax.md index 6705b1068fed..7e9260915a89 100644 --- a/docs/ru/sql-reference/syntax.md +++ b/docs/ru/sql-reference/syntax.md @@ -28,7 +28,7 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ## Комментарии {#comments} Поддерживаются комментарии в SQL-стиле и C-стиле. -Комментарии в SQL-стиле: от `--` до конца строки. Пробел после `--` может не ставиться. +Комментарии в SQL-стиле: от `--`, `#!` или `# ` до конца строки. Пробел после `--` и `#!` может не ставиться. Комментарии в C-стиле: от `/*` до `*/`. Такие комментарии могут быть многострочными. Пробелы тоже не обязательны. ## Ключевые слова {#syntax-keywords} @@ -104,9 +104,9 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ### Heredoc {#heredeoc} -Синтаксис [heredoc](https://ru.wikipedia.org/wiki/Heredoc-синтаксис) — это способ определения строк с сохранением исходного формата (часто с переносом строки). `Heredoc` задается как произвольный строковый литерал между двумя символами `$`, например `$heredoc$`. Значение между двумя `heredoc` обрабатывается "как есть". +Синтаксис [heredoc](https://ru.wikipedia.org/wiki/Heredoc-синтаксис) — это способ определения строк с сохранением исходного формата (часто с переносом строки). `Heredoc` задается как произвольный строковый литерал между двумя символами `$`, например `$heredoc$`. Значение между двумя `heredoc` обрабатывается "как есть". -Синтаксис `heredoc` часто используют для вставки кусков кода SQL, HTML, XML и т.п. +Синтаксис `heredoc` часто используют для вставки кусков кода SQL, HTML, XML и т.п. **Пример** diff --git a/docs/tools/blog.py b/docs/tools/blog.py index bfc8c0908e95..e4fb6f77865f 100644 --- a/docs/tools/blog.py +++ b/docs/tools/blog.py @@ -62,7 +62,7 @@ def build_for_lang(lang, args): strict=True, theme=theme_cfg, nav=blog_nav, - copyright='©2016–2021 ClickHouse, Inc.', + copyright='©2016–2022 ClickHouse, Inc.', use_directory_urls=True, repo_name='ClickHouse/ClickHouse', repo_url='https://github.com/ClickHouse/ClickHouse/', @@ -97,10 +97,6 @@ def build_for_lang(lang, args): with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f: f.write(rss_template.render({'config': raw_config})) - # TODO: AMP for blog - # if not args.skip_amp: - # amp.build_amp(lang, args, cfg) - logging.info(f'Finished building {lang} blog') except exceptions.ConfigurationError as e: diff --git a/docs/tools/cmake_in_clickhouse_generator.py b/docs/tools/cmake_in_clickhouse_generator.py index e66915d4a397..aa4cbbddd189 100644 --- a/docs/tools/cmake_in_clickhouse_generator.py +++ b/docs/tools/cmake_in_clickhouse_generator.py @@ -39,11 +39,6 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No if name in entities: return - # cannot escape the { in macro option description -> invalid AMP html - # Skipping "USE_INTERNAL_${LIB_NAME_UC}_LIBRARY" - if "LIB_NAME_UC" in name: - return - if len(default) == 0: formatted_default: str = "`OFF`" elif default[0] == "$": @@ -140,13 +135,6 @@ def generate_cmake_flags_files() -> None: f.write(entities[k][1] + "\n") ignored_keys.append(k) - f.write("\n\n### External libraries system/bundled mode\n" + table_header) - - for k in sorted_keys: - if k.startswith("USE_INTERNAL_"): - f.write(entities[k][1] + "\n") - ignored_keys.append(k) - f.write("\n\n### Other flags\n" + table_header) for k in sorted(set(sorted_keys).difference(set(ignored_keys))): diff --git a/docs/tools/single_page.py b/docs/tools/single_page.py index cf41e2b78c24..3d32ba30a21a 100644 --- a/docs/tools/single_page.py +++ b/docs/tools/single_page.py @@ -90,7 +90,10 @@ def concatenate(lang, docs_path, single_page_file, nav): line) # If failed to replace the relative link, print to log - if '../' in line: + # But with some exceptions: + # - "../src/" -- for cmake-in-clickhouse.md (link to sources) + # - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo" + if '../' in line and (not '../usr/share' in line) and (not '../src/' in line): logging.info('Failed to resolve relative link:') logging.info(path) logging.info(line) diff --git a/docs/zh/engines/database-engines/index.md b/docs/zh/engines/database-engines/index.md index 10be2e0f0416..e4647da154d5 100644 --- a/docs/zh/engines/database-engines/index.md +++ b/docs/zh/engines/database-engines/index.md @@ -26,4 +26,6 @@ toc_title: Introduction - [Replicated](../../engines/database-engines/replicated.md) +- [SQLite](../../engines/database-engines/sqlite.md) + [来源文章](https://clickhouse.com/docs/en/database_engines/) diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md deleted file mode 120000 index 776734647c22..000000000000 --- a/docs/zh/engines/database-engines/sqlite.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/engines/database-engines/sqlite.md \ No newline at end of file diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md new file mode 100644 index 000000000000..48dca38e4afa --- /dev/null +++ b/docs/zh/engines/database-engines/sqlite.md @@ -0,0 +1,80 @@ +--- +toc_priority: 32 +toc_title: SQLite +--- + +# SQLite {#sqlite} + +允许连接到[SQLite](https://www.sqlite.org/index.html)数据库,并支持ClickHouse和SQLite交换数据, 执行 `INSERT` 和 `SELECT` 查询。 + +## 创建一个数据库 {#creating-a-database} + +``` sql + CREATE DATABASE sqlite_database + ENGINE = SQLite('db_path') +``` + +**引擎参数** + +- `db_path` — SQLite 数据库文件的路径. + +## 数据类型的支持 {#data_types-support} + +| SQLite | ClickHouse | +|---------------|---------------------------------------------------------| +| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) | +| REAL | [Float32](../../sql-reference/data-types/float.md) | +| TEXT | [String](../../sql-reference/data-types/string.md) | +| BLOB | [String](../../sql-reference/data-types/string.md) | + +## 技术细节和建议 {#specifics-and-recommendations} + +SQLite将整个数据库(定义、表、索引和数据本身)存储为主机上的单个跨平台文件。在写入过程中,SQLite会锁定整个数据库文件,因此写入操作是顺序执行的。读操作可以是多任务的。 +SQLite不需要服务管理(如启动脚本)或基于`GRANT`和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。 + +## 使用示例 {#usage-example} + +数据库在ClickHouse,连接到SQLite: + +``` sql +CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db'); +SHOW TABLES FROM sqlite_db; +``` + +``` text +┌──name───┐ +│ table1 │ +│ table2 │ +└─────────┘ +``` + +展示数据表中的内容: + +``` sql +SELECT * FROM sqlite_db.table1; +``` + +``` text +┌─col1──┬─col2─┐ +│ line1 │ 1 │ +│ line2 │ 2 │ +│ line3 │ 3 │ +└───────┴──────┘ +``` +从ClickHouse表插入数据到SQLite表: + +``` sql +CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2; +INSERT INTO clickhouse_table VALUES ('text',10); +INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table; +SELECT * FROM sqlite_db.table1; +``` + +``` text +┌─col1──┬─col2─┐ +│ line1 │ 1 │ +│ line2 │ 2 │ +│ line3 │ 3 │ +│ text │ 10 │ +└───────┴──────┘ +``` diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md new file mode 100644 index 000000000000..aa2c82d902aa --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -0,0 +1,416 @@ +--- +toc_priority: 4 +toc_title: Hive +--- + +# Hive {#hive} + +Hive引擎允许对HDFS Hive表执行 `SELECT` 查询。目前它支持如下输入格式: + +-文本:只支持简单的标量列类型,除了 `Binary` + +- ORC:支持简单的标量列类型,除了`char`; 只支持 `array` 这样的复杂类型 + +- Parquet:支持所有简单标量列类型;只支持 `array` 这样的复杂类型 + +## 创建表 {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [ALIAS expr1], + name2 [type2] [ALIAS expr2], + ... +) ENGINE = Hive('thrift://host:port', 'database', 'table'); +PARTITION BY expr +``` +查看[CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query)查询的详细描述。 + +表的结构可以与原来的Hive表结构有所不同: +- 列名应该与原来的Hive表相同,但你可以使用这些列中的一些,并以任何顺序,你也可以使用一些从其他列计算的别名列。 +- 列类型与原Hive表的列类型保持一致。 +- “Partition by expression”应与原Hive表保持一致,“Partition by expression”中的列应在表结构中。 + +**引擎参数** + +- `thrift://host:port` — Hive Metastore 地址 + +- `database` — 远程数据库名. + +- `table` — 远程数据表名. + +## 使用示例 {#usage-example} + +### 如何使用HDFS文件系统的本地缓存 +我们强烈建议您为远程文件系统启用本地缓存。基准测试显示,如果使用缓存,它的速度会快两倍。 + +在使用缓存之前,请将其添加到 `config.xml` +``` xml + + true + local_cache + 559096952 + 1048576 + +``` + + +- enable: 开启后,ClickHouse将为HDFS (远程文件系统)维护本地缓存。 +- root_dir: 必需的。用于存储远程文件系统的本地缓存文件的根目录。 +- limit_size: 必需的。本地缓存文件的最大大小(单位为字节)。 +- bytes_read_before_flush: 从远程文件系统下载文件时,刷新到本地文件系统前的控制字节数。缺省值为1MB。 + +当ClickHouse为远程文件系统启用了本地缓存时,用户仍然可以选择不使用缓存,并在查询中设置`use_local_cache_for_remote_fs = 0 `, `use_local_cache_for_remote_fs` 默认为 `false`。 + +### 查询 ORC 输入格式的Hive 表 + +#### 在 Hive 中建表 +``` text +hive > CREATE TABLE `test`.`test_orc`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_orc' + +OK +Time taken: 0.51 seconds + +hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_orc; +OK +1 2 3 4 5 6.11 7.22 8 2021-11-05 12:38:16.314 2021-11-05 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.295 seconds, Fetched: 1 row(s) +``` + +#### 在 ClickHouse 中建表 + +ClickHouse中的表,从上面创建的Hive表中获取数据: + +``` sql +CREATE TABLE test.test_orc +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_bool` Bool, + `f_binary` String, + `f_array_int` Array(Int32), + `f_array_string` Array(String), + `f_array_float` Array(Float32), + `f_array_array_int` Array(Array(Int32)), + `f_array_array_string` Array(Array(String)), + `f_array_array_float` Array(Array(Float32)), + `day` String +) +ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +PARTITION BY day + +``` + +``` sql +SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G +``` + +``` text +SELECT * +FROM test.test_orc +SETTINGS input_format_orc_allow_missing_columns = 1 + +Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-04 04:00:44 +f_date: 2021-12-03 +f_string: hello world +f_varchar: hello world +f_bool: true +f_binary: hello world +f_array_int: [1,2,3] +f_array_string: ['hello world','hello world'] +f_array_float: [1.1,1.2] +f_array_array_int: [[1,2],[3,4]] +f_array_array_string: [['a','b'],['c','d']] +f_array_array_float: [[1.11,2.22],[3.33,4.44]] +day: 2021-09-18 + + +1 rows in set. Elapsed: 0.078 sec. +``` + +### 查询 Parquest 输入格式的Hive 表 + +#### 在 Hive 中建表 +``` text +hive > +CREATE TABLE `test`.`test_parquet`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_char` char(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_parquet' +OK +Time taken: 0.51 seconds + +hive > insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_parquet; +OK +1 2 3 4 5 6.11 7.22 8 2021-12-14 17:54:56.743 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.766 seconds, Fetched: 1 row(s) +``` + +#### 在 ClickHouse 中建表 + +ClickHouse 中的表, 从上面创建的Hive表中获取数据: + +``` sql +CREATE TABLE test.test_parquet +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_char` String, + `f_bool` Bool, + `f_binary` String, + `f_array_int` Array(Int32), + `f_array_string` Array(String), + `f_array_float` Array(Float32), + `f_array_array_int` Array(Array(Int32)), + `f_array_array_string` Array(Array(String)), + `f_array_array_float` Array(Array(Float32)), + `day` String +) +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet') +PARTITION BY day +``` + +``` sql +SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G +``` + +``` text +SELECT * +FROM test_parquet +SETTINGS input_format_parquet_allow_missing_columns = 1 + +Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-14 17:54:56 +f_date: 2021-12-14 +f_string: hello world +f_varchar: hello world +f_char: hello world +f_bool: true +f_binary: hello world +f_array_int: [1,2,3] +f_array_string: ['hello world','hello world'] +f_array_float: [1.1,1.2] +f_array_array_int: [[1,2],[3,4]] +f_array_array_string: [['a','b'],['c','d']] +f_array_array_float: [[1.11,2.22],[3.33,4.44]] +day: 2021-09-18 + +1 rows in set. Elapsed: 0.357 sec. +``` + +### 查询文本输入格式的Hive表 + +#### 在Hive 中建表 + +``` text +hive > +CREATE TABLE `test`.`test_text`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_char` char(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_text' +Time taken: 0.1 seconds, Fetched: 34 row(s) + + +hive > insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_text; +OK +1 2 3 4 5 6.11 7.22 8 2021-12-14 18:11:17.239 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.624 seconds, Fetched: 1 row(s) +``` + +#### 在 ClickHouse 中建表 + + +ClickHouse中的表, 从上面创建的Hive表中获取数据: +``` sql +CREATE TABLE test.test_text +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_char` String, + `f_bool` Bool, + `day` String +) +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text') +PARTITION BY day +``` + +``` sql +SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G +``` + +``` text +SELECT * +FROM test.test_text +SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort' + +Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-14 18:11:17 +f_date: 2021-12-14 +f_string: hello world +f_varchar: hello world +f_char: hello world +f_bool: true +day: 2021-09-18 +``` diff --git a/docs/zh/engines/table-engines/integrations/index.md b/docs/zh/engines/table-engines/integrations/index.md index 0c34ae078a01..5ed4a555f9ca 100644 --- a/docs/zh/engines/table-engines/integrations/index.md +++ b/docs/zh/engines/table-engines/integrations/index.md @@ -19,3 +19,5 @@ ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像 - [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) - [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) - [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) +- [SQLite](../../../engines/table-engines/integrations/sqlite.md) +- [Hive](../../../engines/table-engines/integrations/hive.md) diff --git a/docs/zh/faq/general/columnar-database.md b/docs/zh/faq/general/columnar-database.md deleted file mode 120000 index b7557b620101..000000000000 --- a/docs/zh/faq/general/columnar-database.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/general/columnar-database.md \ No newline at end of file diff --git a/docs/zh/faq/general/columnar-database.md b/docs/zh/faq/general/columnar-database.md new file mode 100644 index 000000000000..185deaa7406b --- /dev/null +++ b/docs/zh/faq/general/columnar-database.md @@ -0,0 +1,25 @@ +--- +title: 什么是列存储数据库? +toc_hidden: true +toc_priority: 101 +--- + +# 什么是列存储数据库? {#what-is-a-columnar-database} + +列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是,影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse就是这样一个典型的例子。 + +列存储数据库的主要优点是: + +- 查询只使用许多列其中的少数列。 +— 聚合对大量数据的查询。 +— 按列压缩。 + +下面是构建报表时传统的面向行系统和柱状数据库之间的区别: + +**传统行存储** +!(传统行存储)(https://clickhouse.com/docs/en/images/row-oriented.gif) + +**列存储** +!(列存储)(https://clickhouse.com/docs/en/images/column-oriented.gif) + +列存储数据库是分析应用程序的首选,因为它允许在一个表中有许多列以防万一,但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的,因为和数据仓库一样,它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。 \ No newline at end of file diff --git a/docs/zh/faq/general/dbms-naming.md b/docs/zh/faq/general/dbms-naming.md deleted file mode 120000 index 0df856af0cad..000000000000 --- a/docs/zh/faq/general/dbms-naming.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/general/dbms-naming.md \ No newline at end of file diff --git a/docs/zh/faq/general/dbms-naming.md b/docs/zh/faq/general/dbms-naming.md new file mode 100644 index 000000000000..8d4353f9322e --- /dev/null +++ b/docs/zh/faq/general/dbms-naming.md @@ -0,0 +1,17 @@ +--- +title: "\u201CClickHouse\u201D 有什么含义?" +toc_hidden: true +toc_priority: 10 +--- + +# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean} + +它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上,ClickHouse本应该保存人们在互联网上的所有点击记录,现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。 + +这个由两部分组成的意思有两个结果: + +- 唯一正确的写“Click**H** house”的方式是用大写H。 +- 如果需要缩写,请使用“**CH**”。由于一些历史原因,缩写CK在中国也很流行,主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。 + +!!! info “有趣的事实” + 多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。 diff --git a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md deleted file mode 120000 index 5ac9a6153864..000000000000 --- a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md \ No newline at end of file diff --git a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md new file mode 100644 index 000000000000..39d2d6392292 --- /dev/null +++ b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md @@ -0,0 +1,17 @@ +--- +title: 我如何为ClickHouse贡献代码? +toc_hidden: true +toc_priority: 120 +--- + +# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse} + +ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。 + +按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。 + +如果你想对ClickHouse提出实质性的改变建议,可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。 + +如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。 + + diff --git a/docs/zh/faq/general/index.md b/docs/zh/faq/general/index.md deleted file mode 120000 index 5ff33ccb3600..000000000000 --- a/docs/zh/faq/general/index.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/general/index.md \ No newline at end of file diff --git a/docs/zh/faq/general/index.md b/docs/zh/faq/general/index.md new file mode 100644 index 000000000000..65312a48d8fb --- /dev/null +++ b/docs/zh/faq/general/index.md @@ -0,0 +1,27 @@ +--- +title: ClickHouse 有关常见问题 +toc_hidden_folder: true +toc_priority: 1 +toc_title: General +--- + +# ClickHouse 有关常见问题 {#general-questions} + +常见问题: + +- [什么是 ClickHouse?](../../index.md#what-is-clickhouse) +- [为何 ClickHouse 如此迅捷?](../../faq/general/why-clickhouse-is-so-fast.md) +- [谁在使用 ClickHouse?](../../faq/general/who-is-using-clickhouse.md) +- [“ClickHouse” 有什么含义?](../../faq/general/dbms-naming.md) +- [ “Не тормозит” 有什么含义?](../../faq/general/ne-tormozit.md) +- [什么是 OLAP?](../../faq/general/olap.md) +- [什么是列存储数据库?](../../faq/general/columnar-database.md) +- [为何不使用 MapReduce等技术?](../../faq/general/mapreduce.md) +- [我如何为 ClickHouse贡献代码?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md) + + + +!!! info "没找到您需要的内容?" + 请查阅 [其他 F.A.Q. 类别](../../faq/index.md) 或者从左侧导航栏浏览其他文档 + +{## [原始文档](https://clickhouse.com/docs/en/faq/general/) ##} diff --git a/docs/zh/faq/general/mapreduce.md b/docs/zh/faq/general/mapreduce.md deleted file mode 120000 index 49b79ad4841f..000000000000 --- a/docs/zh/faq/general/mapreduce.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/general/mapreduce.md \ No newline at end of file diff --git a/docs/zh/faq/general/mapreduce.md b/docs/zh/faq/general/mapreduce.md new file mode 100644 index 000000000000..f70ca8a25836 --- /dev/null +++ b/docs/zh/faq/general/mapreduce.md @@ -0,0 +1,13 @@ +--- +title: 为何不使用 MapReduce等技术? +toc_hidden: true +toc_priority: 110 +--- + +# 为何不使用 MapReduce等技术? {#why-not-use-something-like-mapreduce} + +我们可以将MapReduce这样的系统称为分布式计算系统,其中的reduce操作是基于分布式排序的。这个领域中最常见的开源解决方案是[Apache Hadoop](http://hadoop.apache.org)。Yandex使用其内部解决方案YT。 + +这些系统不适合用于在线查询,因为它们的延迟很大。换句话说,它们不能被用作网页界面的后端。这些类型的系统对于实时数据更新并不是很有用。如果操作的结果和所有中间结果(如果有的话)都位于单个服务器的内存中,那么分布式排序就不是执行reduce操作的最佳方式,这通常是在线查询的情况。在这种情况下,哈希表是执行reduce操作的最佳方式。优化map-reduce任务的一种常见方法是使用内存中的哈希表进行预聚合(部分reduce)。用户手动执行此优化。在运行简单的map-reduce任务时,分布式排序是导致性能下降的主要原因之一。 + +大多数MapReduce实现允许你在集群中执行任意代码。但是声明性查询语言更适合于OLAP,以便快速运行实验。例如,Hadoop有Hive和Pig。还可以考虑使用Cloudera Impala或Shark(已经过时了)来支持Spark,以及Spark SQL、Presto和Apache Drill。与专门的系统相比,运行这些任务的性能是非常不理想的,但是相对较高的延迟使得使用这些系统作为web界面的后端是不现实的。 diff --git a/docs/zh/faq/index.md b/docs/zh/faq/index.md index a44dbb31e897..1ba1b792fbd7 100644 --- a/docs/zh/faq/index.md +++ b/docs/zh/faq/index.md @@ -1,8 +1,48 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: F.A.Q. +toc_hidden: true toc_priority: 76 --- +# ClickHouse 问答 F.A.Q {#clickhouse-f-a-q} +本节文档是一个收集经常出现的ClickHouse相关问题的答案的地方。 + +类别: + +- **[常见问题](../faq/general/index.md)** + - [什么是 ClickHouse?](../index.md#what-is-clickhouse) + - [为何 ClickHouse 如此迅捷?](../faq/general/why-clickhouse-is-so-fast.md) + - [谁在使用 ClickHouse?](../faq/general/who-is-using-clickhouse.md) + - [“ClickHouse” 有什么含义?](../faq/general/dbms-naming.md) + - [ “Не тормозит” 有什么含义?](../faq/general/ne-tormozit.md) + - [什么是 OLAP?](../faq/general/olap.md) + - [什么是列存储数据库?](../faq/general/columnar-database.md) + - [为何不使用 MapReduce等技术?](../faq/general/mapreduce.md) + - [我如何为 ClickHouse贡献代码?](../faq/general/how-do-i-contribute-code-to-clickhouse.md) +- **[应用案例](../faq/use-cases/index.md)** + - [我能把 ClickHouse 作为时序数据库来使用吗?](../faq/use-cases/time-series.md) + - [我能把 ClickHouse 作为 key-value 键值存储吗?](../faq/use-cases/key-value.md) +- **[运维操作](../faq/operations/index.md)** + - [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../faq/operations/production.md) + - [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../faq/operations/delete-old-data.md) + - [ClickHouse支持多区域复制吗?](../faq/operations/multi-region-replication.md) +- **[集成开发](../faq/integration/index.md)** + - [如何从 ClickHouse 导出数据到一个文件?](../faq/integration/file-export.md) + - [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../faq/integration/oracle-odbc.md) + +{## TODO +Question candidates: +- How to choose a primary key? +- How to add a column in ClickHouse? +- Too many parts +- How to filter ClickHouse table by an array column contents? +- How to insert all rows from one table to another of identical structure? +- How to kill a process (query) in ClickHouse? +- How to implement pivot (like in pandas)? +- How to remove the default ClickHouse user through users.d? +- Importing MySQL dump to ClickHouse +- Window function workarounds (row_number, lag/lead, running diff/sum/average) +##} + +{## [原始文档](https://clickhouse.com/docs/en/faq) ##} diff --git a/docs/zh/faq/integration/index.md b/docs/zh/faq/integration/index.md deleted file mode 120000 index 8323d6218a30..000000000000 --- a/docs/zh/faq/integration/index.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/integration/index.md \ No newline at end of file diff --git a/docs/zh/faq/integration/index.md b/docs/zh/faq/integration/index.md new file mode 100644 index 000000000000..2bfd728ec8ce --- /dev/null +++ b/docs/zh/faq/integration/index.md @@ -0,0 +1,21 @@ +--- +title: 关于集成ClickHouse和其他系统的问题 +toc_hidden_folder: true +toc_priority: 4 +toc_title: Integration +--- + +# 关于集成ClickHouse和其他系统的问题 {#question-about-integrating-clickhouse-and-other-systems} + +问题: + +- [如何从 ClickHouse 导出数据到一个文件?](../../faq/integration/file-export.md) +- [如何导入JSON到ClickHouse?](../../faq/integration/json-import.md) +- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../../faq/integration/oracle-odbc.md) + + + +!!! info "没看到你要找的东西吗?" + 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。 + +{## [原文](https://clickhouse.com/docs/en/faq/integration/) ##} \ No newline at end of file diff --git a/docs/zh/faq/operations/index.md b/docs/zh/faq/operations/index.md deleted file mode 120000 index fd141164fdce..000000000000 --- a/docs/zh/faq/operations/index.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/operations/index.md \ No newline at end of file diff --git a/docs/zh/faq/operations/index.md b/docs/zh/faq/operations/index.md new file mode 100644 index 000000000000..cdf4b9622ec1 --- /dev/null +++ b/docs/zh/faq/operations/index.md @@ -0,0 +1,20 @@ +--- +title: 关于操作ClickHouse服务器和集群的问题 +toc_hidden_folder: true +toc_priority: 3 +toc_title: Operations +--- + +# 关于操作ClickHouse服务器和集群的问题 {#question-about-operating-clickhouse-servers-and-clusters} + +问题: + +- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md) +- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md) +- [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md) + + +!!! info "没看到你要找的东西吗?" + 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。 + +{## [原文](https://clickhouse.com/docs/en/faq/production/) ##} diff --git a/docs/zh/faq/operations/multi-region-replication.md b/docs/zh/faq/operations/multi-region-replication.md deleted file mode 120000 index dbc985ee1fb9..000000000000 --- a/docs/zh/faq/operations/multi-region-replication.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/operations/multi-region-replication.md \ No newline at end of file diff --git a/docs/zh/faq/operations/multi-region-replication.md b/docs/zh/faq/operations/multi-region-replication.md new file mode 100644 index 000000000000..f5ab147bde6e --- /dev/null +++ b/docs/zh/faq/operations/multi-region-replication.md @@ -0,0 +1,14 @@ +--- +title: ClickHouse支持多区域复制吗? +toc_hidden: true +toc_priority: 30 +--- + +# ClickHouse支持多区域复制吗? {#does-clickhouse-support-multi-region-replication} + +简短的回答是“是的”。然而,我们建议将所有区域/数据中心之间的延迟保持在两位数字范围内,否则,在通过分布式共识协议时,写性能将受到影响。例如,美国海岸之间的复制可能会很好,但美国和欧洲之间就不行。 + +在配置方面,这与单区域复制没有区别,只是使用位于不同位置的主机作为副本。 + +更多信息,请参见[关于数据复制的完整文章](../../engines/table-engines/mergetree-family/replication.md)。 + diff --git a/docs/zh/faq/use-cases/index.md b/docs/zh/faq/use-cases/index.md deleted file mode 120000 index cc545acb0000..000000000000 --- a/docs/zh/faq/use-cases/index.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/use-cases/index.md \ No newline at end of file diff --git a/docs/zh/faq/use-cases/index.md b/docs/zh/faq/use-cases/index.md new file mode 100644 index 000000000000..cfd3270e3f2e --- /dev/null +++ b/docs/zh/faq/use-cases/index.md @@ -0,0 +1,18 @@ +--- +title: 关于ClickHouse使用案例的问题 +toc_hidden_folder: true +toc_priority: 2 +toc_title: 使用案例 +--- + +# 关于ClickHouse使用案例的问题 {#questions-about-clickhouse-use-cases} + +问题: + +- [我能把 ClickHouse 当做时序数据库来使用吗?](../../faq/use-cases/time-series.md) +- [我能把 ClickHouse 当做Key-value 键值存储来使用吗?](../../faq/use-cases/key-value.md) + +!!! info "没找到您所需要的内容?" + 请查看[其他常见问题类别](../../faq/index.md)或浏览左侧边栏中的主要文档文章。 + +{## [原始文档](https://clickhouse.com/docs/en/faq/use-cases/) ##} diff --git a/docs/zh/faq/use-cases/key-value.md b/docs/zh/faq/use-cases/key-value.md deleted file mode 120000 index 63140458d129..000000000000 --- a/docs/zh/faq/use-cases/key-value.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/use-cases/key-value.md \ No newline at end of file diff --git a/docs/zh/faq/use-cases/key-value.md b/docs/zh/faq/use-cases/key-value.md new file mode 100644 index 000000000000..ae47a9a8b256 --- /dev/null +++ b/docs/zh/faq/use-cases/key-value.md @@ -0,0 +1,16 @@ +--- +title: 我能把 ClickHouse 当做Key-value 键值存储来使用吗? +toc_hidden: true +toc_priority: 101 +--- +# 我能把 ClickHouse 当做Key-value 键值存储来使用吗? {#can-i-use-clickhouse-as-a-key-value-storage}. + +简短的回答是 **不能** 。关键值的工作量是在列表中的最高位置时,**不能**{.text-danger}使用ClickHouse的情况。它是一个[OLAP](../../faq/general/olap.md)系统,毕竟有很多优秀的键值存储系统在那里。 + +然而,可能在某些情况下,使用ClickHouse进行类似键值的查询仍然是有意义的。通常,是一些低预算的产品,主要的工作负载是分析性的,很适合ClickHouse,但也有一些次要的过程需要一个键值模式,请求吞吐量不是很高,没有严格的延迟要求。如果你有无限的预算,你会为这样的次要工作负载安装一个次要的键值数据库,但实际上,多维护一个存储系统(监控、备份等)会有额外的成本,这可能是值得避免的。 + +如果你决定违背建议,对ClickHouse运行一些类似键值的查询,这里有一些提示。 + +- ClickHouse中点查询昂贵的关键原因是其稀疏的主索引[MergeTree表引擎家族](../../engines/table-engines/mergetree-family/mergetree.md)。这个索引不能指向每一行具体的数据,相反,它指向每N行,系统必须从邻近的N行扫描到所需的行,沿途读取过多的数据。在一个键值场景中,通过`index_granularity`的设置来减少N的值可能是有用的。 +- ClickHouse将每一列保存在一组单独的文件中,所以要组装一个完整的行,它需要通过这些文件中的每一个。它们的数量随着列数的增加而线性增加,所以在键值场景中,可能值得避免使用许多列,并将所有的有效数据放在一个单一的`String`列中,并以某种序列化格式(如JSON、Protobuf或任何有效的格式)进行编码。 +- 还有一种方法,使用[Join](../../engines/table-engines/special/join.md)表引擎代替正常的`MergeTree`表和[joinGet](../../sql-reference/functions/other-functions.md#joinget) 函数来检索数据。它可以提供更好的查询性能,但可能有一些可用性和可靠性问题。下面是一个[使用实例](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00800_versatile_storage_join.sql#L49-L51)。 diff --git a/docs/zh/faq/use-cases/time-series.md b/docs/zh/faq/use-cases/time-series.md deleted file mode 120000 index 55cbcfc243fb..000000000000 --- a/docs/zh/faq/use-cases/time-series.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/use-cases/time-series.md \ No newline at end of file diff --git a/docs/zh/faq/use-cases/time-series.md b/docs/zh/faq/use-cases/time-series.md new file mode 100644 index 000000000000..045179c8c6e7 --- /dev/null +++ b/docs/zh/faq/use-cases/time-series.md @@ -0,0 +1,21 @@ +--- +title: 我能把 ClickHouse 当做时序数据库来使用吗? +toc_hidden: true +toc_priority: 101 +--- + +# 我能把 ClickHouse 当做时序数据库来使用吗? {#can-i-use-clickhouse-as-a-time-series-database} + +ClickHouse是一个通用的数据存储解决方案[OLAP](../../faq/general/olap.md)的工作负载,而有许多专门的时间序列数据库管理系统。然而,ClickHouse的[专注于查询执行速度](../../faq/general/why-clickhouse-is-so-fast.md)使得它在许多情况下的性能优于专门的系统。关于这个话题有很多独立的基准,所以我们不打算在这里进行论述。相反,让我们将重点放在ClickHouse的重要功能(如果这是你的用例)上。 + + + +首先,有 **[specialized codecs](../../sql-reference/statements/create/table.md#create-query-specialized-codecs)**,这是典型的时间序列。无论是常见的算法,如“DoubleDelta”和“Gorilla”,或特定的ClickHouse 数据类型如“T64”。 + + + +其次,时间序列查询通常只访问最近的数据,比如一天或一周以前的数据。使用具有快速nVME/SSD驱动器和高容量HDD驱动器的服务器是有意义的。ClickHouse [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)特性允许配置在快速硬盘上保持新鲜的热数据,并随着数据的老化逐渐移动到较慢的硬盘上。如果您的需求需要,也可以汇总或删除更旧的数据。 + + + +尽管这与ClickHouse存储和处理原始数据的理念相违背,但你可以使用[materialized views](../../sql-reference/statements/create/view.md)来适应更紧迫的延迟或成本需求。 \ No newline at end of file diff --git a/docs/zh/guides/apply-catboost-model.md b/docs/zh/guides/apply-catboost-model.md index 72f5fa38e848..adc5b48eb555 100644 --- a/docs/zh/guides/apply-catboost-model.md +++ b/docs/zh/guides/apply-catboost-model.md @@ -1,6 +1,4 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 41 toc_title: "\u5E94\u7528CatBoost\u6A21\u578B" --- @@ -10,10 +8,10 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B" [CatBoost](https://catboost.ai) 是一个由[Yandex](https://yandex.com/company/)开发的开源免费机器学习库。 -通过这篇指导,您将学会如何用SQL建模,使用ClickHouse预先训练好的模型来推断数据。 +通过本篇文档,您将学会如何用SQL语句调用已经存放在Clickhouse中的预训练模型来预测数据。 -在ClickHouse中应用CatBoost模型的一般过程: +为了在ClickHouse中应用CatBoost模型,需要进行如下步骤: 1. [创建数据表](#create-table). 2. [将数据插入到表中](#insert-data-to-table). @@ -22,24 +20,26 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B" 有关训练CatBoost模型的详细信息,请参阅 [训练和模型应用](https://catboost.ai/docs/features/training.html#training). +您可以通过[RELOAD MODEL](https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-reload-model)与[RELOAD MODELS](https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-reload-models)语句来重载CatBoost模型。 + ## 先决条件 {#prerequisites} 请先安装 [Docker](https://docs.docker.com/install/)。 !!! note "注" - [Docker](https://www.docker.com) 是一个软件平台,用户可以用来创建独立于其余系统、集成CatBoost和ClickHouse的容器。 + [Docker](https://www.docker.com) 是一个软件平台,用户可以用Docker来创建独立于已有系统并集成了CatBoost和ClickHouse的容器。 在应用CatBoost模型之前: -**1.** 从容器仓库拉取docker映像 (https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) : +**1.** 从容器仓库拉取示例docker镜像 (https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) : ``` bash $ docker pull yandex/tutorial-catboost-clickhouse ``` -此Docker映像包含运行CatBoost和ClickHouse所需的所有内容:代码、运行环境、库、环境变量和配置文件。 +此示例Docker镜像包含运行CatBoost和ClickHouse所需的所有内容:代码、运行时、库、环境变量和配置文件。 -**2.** 确保已成功拉取Docker映像: +**2.** 确保已成功拉取Docker镜像: ``` bash $ docker image ls @@ -47,7 +47,7 @@ REPOSITORY TAG IMAGE ID CR yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB ``` -**3.** 基于此映像启动一个Docker容器: +**3.** 基于此镜像启动一个Docker容器: ``` bash $ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse @@ -124,9 +124,9 @@ FROM amazon_train ## 3. 将CatBoost集成到ClickHouse中 {#integrate-catboost-into-clickhouse} !!! note "注" - **可跳过。** Docker映像包含运行CatBoost和ClickHouse所需的所有内容。 + **可跳过。** 示例Docker映像已经包含了运行CatBoost和ClickHouse所需的所有内容。 -CatBoost集成到ClickHouse步骤: +为了将CatBoost集成进ClickHouse,需要进行如下步骤: **1.** 构建评估库。 @@ -134,13 +134,13 @@ CatBoost集成到ClickHouse步骤: 有关如何构建库文件的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). -**2.** 创建一个新目录(位置与名称可随意指定), 如 `data` 并将创建的库文件放入其中。 Docker映像已经包含了库 `data/libcatboostmodel.so`. +**2.** 创建一个新目录(位置与名称可随意指定), 如 `data` 并将创建的库文件放入其中。 示例Docker镜像已经包含了库 `data/libcatboostmodel.so`. **3.** 创建一个新目录来放配置模型, 如 `models`. **4.** 创建一个模型配置文件,如 `models/amazon_model.xml`. -**5.** 描述模型配置: +**5.** 修改模型配置: ``` xml @@ -165,9 +165,9 @@ CatBoost集成到ClickHouse步骤: /home/catboost/models/*_model.xml ``` -## 4. 运行从SQL推断的模型 {#run-model-inference} +## 4. 使用SQL调用预测模型 {#run-model-inference} -测试模型是否正常,运行ClickHouse客户端 `$ clickhouse client`. +为了测试模型是否正常,可以使用ClickHouse客户端 `$ clickhouse client`. 让我们确保模型能正常工作: @@ -189,7 +189,7 @@ LIMIT 10 ``` !!! note "注" - 函数 [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) 返回带有多类模型的每类原始预测的元组。 + 函数 [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) 会对多类别模型返回一个元组,其中包含每一类别的原始预测值。 执行预测: diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index 738b0365f466..16f51eac9a88 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -18,6 +18,17 @@ $ curl 'http://localhost:8123/' Ok. ``` +Web UI 可以通过这个地址访问: `http://localhost:8123/play`. +在运行状况检查脚本中,使用`GET /ping`请求。这个处理方法总是返回 "Ok"。(以换行结尾)。可从18.12.13版获得。请参见' /replicas_status '检查复制集的延迟。 + + +``` bash +$ curl 'http://localhost:8123/ping' +Ok. +$ curl 'http://localhost:8123/replicas_status' +Ok. +``` + 通过URL中的 `query` 参数来发送请求,或者发送POST请求,或者将查询的开头部分放在URL的`query`参数中,其他部分放在POST中(我们会在后面解释为什么这样做是有必要的)。URL的大小会限制在16KB,所以发送大型查询时要时刻记住这点。 如果请求成功,将会收到200的响应状态码和响应主体中的结果。 diff --git a/docs/zh/operations/requirements.md b/docs/zh/operations/requirements.md index c3013f738a28..964d7aa34f4c 100644 --- a/docs/zh/operations/requirements.md +++ b/docs/zh/operations/requirements.md @@ -1,59 +1,59 @@ --- toc_priority: 44 -toc_title: "要求" +toc_title: "必备条件" --- -# 要求 {#requirements} +# 必备条件 {#requirements} ## CPU {#cpu} -对于从预构建的deb包进行安装,请使用具有x86_64架构并支持SSE4.2指令的CPU。 要使用不支持SSE4.2或具有AArch64或PowerPC64LE体系结构的处理器运行ClickHouse,您应该从源代码构建ClickHouse。 +如果您使用预编译的DEB/RPM包安装ClickHouse,请使用支持SSE4.2指令集的x86_64架构的CPU。如果需要在不支持SSE4.2指令集的CPU上,或者在AArch64(ARM)和PowerPC64LE(IBM Power)架构上运行ClickHouse,您应该从源码编译ClickHouse。 -ClickHouse实现并行数据处理并使用所有可用的硬件资源。 在选择处理器时,考虑到ClickHouse在具有大量内核但时钟速率较低的配置中的工作效率要高于具有较少内核和较高时钟速率的配置。 例如,具有2600MHz的16核心优于具有3600MHz的8核心。 +ClickHouse实现了并行数据处理,处理时会使用所有的可用资源。在选择处理器时,请注意:ClickHouse在具有大量计算核、时钟频率稍低的平台上比计算核少、时钟频率高的平台上效率更高。例如,ClickHouse在16核 2.6GHz的CPU上运行速度高于8核 3.6GHz的CPU。 -建议使用 **睿频加速** 和 **超线程** 技术。 它显着提高了典型工作负载的性能。 +建议使用 **睿频加速** 和 **超线程** 技术。 它显着提高了正常工作负载的性能。 ## RAM {#ram} -我们建议使用至少4GB的RAM来执行重要的查询。 ClickHouse服务器可以使用少得多的RAM运行,但它需要处理查询的内存。 +我们建议使用至少4GB的内存来执行重要的查询。 ClickHouse服务器可以使用很少的内存运行,但它需要一定量的内存用于处理查询。 -RAM所需的体积取决于: +ClickHouse所需内存取决于: -- 查询的复杂性。 -- 查询中处理的数据量。 +- 查询的复杂程度。 +- 查询处理的数据量。 -要计算所需的RAM体积,您应该估计临时数据的大小 [GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct), [JOIN](../sql-reference/statements/select/join.md#select-join) 和您使用的其他操作。 +要计算所需的内存大小,您应该考虑用于[GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause)、[DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct)、[JOIN](../sql-reference/statements/select/join.md#select-join) 和其他操作所需的临时数据量。 -ClickHouse可以使用外部存储器来存储临时数据。看 [在外部存储器中分组](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) 有关详细信息。 +ClickHouse可以使用外部存储器来存储临时数据。详情请见[在外部存储器中分组](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory)。 ## 交换文件 {#swap-file} -禁用生产环境的交换文件。 +请在生产环境禁用交换文件。 ## 存储子系统 {#storage-subsystem} 您需要有2GB的可用磁盘空间来安装ClickHouse。 -数据所需的存储量应单独计算。 评估应包括: +数据所需的存储空间应单独计算。预估存储容量时请考虑: -- 估计数据量。 +- 数据量 - 您可以采取数据的样本并从中获取行的平均大小。 然后将该值乘以计划存储的行数。 + 您可以对数据进行采样并计算每行的平均占用空间。然后将该值乘以计划存储的行数。 -- 数据压缩系数。 +- 数据压缩比 - 要估计数据压缩系数,请将数据的样本加载到ClickHouse中,并将数据的实际大小与存储的表的大小进行比较。 例如,点击流数据通常被压缩6-10倍。 + 要计算数据压缩比,请将样本数据写入ClickHouse,并将原始数据大小与ClickHouse实际存储的数据进行比较。例如,用户点击行为的原始数据压缩比通常为6-10。 -要计算要存储的最终数据量,请将压缩系数应用于估计的数据量。 如果计划将数据存储在多个副本中,则将估计的量乘以副本数。 +请将原始数据的大小除以压缩比来获得实际所需存储的大小。如果您打算将数据存放于几个副本中,请将存储容量乘上副本数。 ## 网络 {#network} -如果可能的话,使用10G或更高级别的网络。 +如果可能的话,请使用10G或更高级别的网络。 -网络带宽对于处理具有大量中间结果数据的分布式查询至关重要。 此外,网络速度会影响复制过程。 +网络带宽对于处理具有大量中间结果数据的分布式查询至关重要。此外,网络速度会影响复制过程。 ## 软件 {#software} -ClickHouse主要是为Linux系列操作系统开发的。 推荐的Linux发行版是Ubuntu。 `tzdata` 软件包应安装在系统中。 +ClickHouse主要是为Linux系列操作系统开发的。推荐的Linux发行版是Ubuntu。您需要检查`tzdata`(对于Ubuntu)软件包是否在安装ClickHouse之前已经安装。 -ClickHouse也可以在其他操作系统系列中工作。 查看详细信息 [开始](../getting-started/index.md) 文档的部分。 +ClickHouse也可以在其他操作系统系列中工作。详情请查看[开始](../getting-started/index.md)。 diff --git a/docs/zh/operations/settings/settings-users.md b/docs/zh/operations/settings/settings-users.md index ae75dddab581..d89b880328a9 100644 --- a/docs/zh/operations/settings/settings-users.md +++ b/docs/zh/operations/settings/settings-users.md @@ -1,5 +1,5 @@ --- -machine_translated: true +machine_translated: false machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 63 toc_title: "\u7528\u6237\u8BBE\u7F6E" @@ -7,12 +7,12 @@ toc_title: "\u7528\u6237\u8BBE\u7F6E" # 用户设置 {#user-settings} -该 `users` 一节 `user.xml` 配置文件包含用户设置。 +`user.xml` 中的 `users` 配置段包含了用户配置 -!!! note "信息" +!!! note "提示" ClickHouse还支持 [SQL驱动的工作流](../access-rights.md#access-control) 用于管理用户。 我们建议使用它。 -的结构 `users` 科: +`users` 配置段的结构: ``` xml @@ -43,21 +43,21 @@ toc_title: "\u7528\u6237\u8BBE\u7F6E" ``` -### 用户名称/密码 {#user-namepassword} +### user_name/password {#user-namepassword} 密码可以以明文或SHA256(十六进制格式)指定。 -- 以明文形式分配密码 (**不推荐**),把它放在一个 `password` 元素。 +- 以明文形式分配密码 (**不推荐**),把它放在一个 `password` 配置段中。 例如, `qwerty`. 密码可以留空。 -- 要使用其SHA256散列分配密码,请将其放置在 `password_sha256_hex` 元素。 +- 要使用SHA256加密后的密码,请将其放置在 `password_sha256_hex` 配置段。 例如, `65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5`. - 如何从shell生成密码的示例: + 从shell生成加密密码的示例: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-' @@ -65,19 +65,19 @@ toc_title: "\u7528\u6237\u8BBE\u7F6E" -- 为了与MySQL客户端兼容,密码可以在双SHA1哈希中指定。 放进去 `password_double_sha1_hex` 元素。 +- 为了与MySQL客户端兼容,密码可以设置为双SHA1哈希加密, 请将其放置在 `password_double_sha1_hex` 配置段。 例如, `08b4a0f1de6ad37da17359e592c8d74788a83eb0`. - 如何从shell生成密码的示例: + 从shell生成密码的示例: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-' 结果的第一行是密码。 第二行是相应的双SHA1哈希。 -### 访问管理 {#access_management-user-setting} +### access_management {#access_management-user-setting} -此设置启用禁用使用SQL驱动 [访问控制和帐户管理](../access-rights.md#access-control) 对于用户。 +此设置可为用户启用或禁用 SQL-driven [访问控制和帐户管理](../access-rights.md#access-control) 。 可能的值: @@ -86,42 +86,42 @@ toc_title: "\u7528\u6237\u8BBE\u7F6E" 默认值:0。 -### 用户名称/网络 {#user-namenetworks} +### user_name/networks {#user-namenetworks} -用户可以从中连接到ClickHouse服务器的网络列表。 +用户访问来源列表 列表中的每个元素都可以具有以下形式之一: -- `` — IP address or network mask. +- `` — IP地址或网络掩码 例: `213.180.204.3`, `10.0.0.1/8`, `10.0.0.1/255.255.255.0`, `2a02:6b8::3`, `2a02:6b8::3/64`, `2a02:6b8::3/ffff:ffff:ffff:ffff::`. -- `` — Hostname. +- `` — 域名 示例: `example01.host.ru`. - 要检查访问,将执行DNS查询,并将所有返回的IP地址与对等地址进行比较。 + 为检查访问,将执行DNS查询,并将所有返回的IP地址与对端地址进行比较。 -- `` — Regular expression for hostnames. +- `` — 域名的正则表达式. 示例, `^example\d\d-\d\d-\d\.host\.ru$` - 要检查访问,a [DNS PTR查询](https://en.wikipedia.org/wiki/Reverse_DNS_lookup) 对对等体地址执行,然后应用指定的正则表达式。 然后,对PTR查询的结果执行另一个DNS查询,并将所有接收到的地址与对等地址进行比较。 我们强烈建议正则表达式以$结尾。 + 为检查访问,[DNS PTR查询](https://en.wikipedia.org/wiki/Reverse_DNS_lookup) 对对端地址执行,然后应用指定的正则表达式。 然后,以PTR查询的结果执行另一个DNS查询,并将所有接收到的地址与对端地址进行比较. 我们强烈建议正则表达式以$结尾. DNS请求的所有结果都将被缓存,直到服务器重新启动。 **例** -要从任何网络打开用户的访问权限,请指定: +要开启任意来源网络的访问, 请指定: ``` xml ::/0 ``` !!! warning "警告" - 从任何网络开放访问是不安全的,除非你有一个防火墙正确配置或服务器没有直接连接到互联网。 + 从任何网络开放访问是不安全的,除非你有一个正确配置的防火墙, 或者服务器没有直接连接到互联网。 -若要仅从本地主机打开访问权限,请指定: +若要限定本机访问, 请指定: ``` xml ::1 @@ -130,22 +130,21 @@ DNS请求的所有结果都将被缓存,直到服务器重新启动。 ### user_name/profile {#user-nameprofile} -您可以为用户分配设置配置文件。 设置配置文件在单独的部分配置 `users.xml` 文件 有关详细信息,请参阅 [设置配置文件](settings-profiles.md). +您可以为用户分配设置配置文件。 设置配置文件在`users.xml` 中有单独的配置段. 有关详细信息,请参阅 [设置配置文件](settings-profiles.md). -### 用户名称/配额 {#user-namequota} +### user_name/quota {#user-namequota} -配额允许您在一段时间内跟踪或限制资源使用情况。 配额在配置 `quotas` -一节 `users.xml` 配置文件。 +配额允许您在一段时间内跟踪或限制资源使用情况。 配额在`users.xml` 中的 `quotas` 配置段下. 您可以为用户分配配额。 有关配额配置的详细说明,请参阅 [配额](../quotas.md#quotas). -### 用户名/数据库 {#user-namedatabases} +### user_name/databases {#user-namedatabases} -在本节中,您可以限制ClickHouse返回的行 `SELECT` 由当前用户进行的查询,从而实现基本的行级安全性。 +在本配置段中,您可以限制ClickHouse中由当前用户进行的 `SELECT` 查询所返回的行,从而实现基本的行级安全性。 **示例** -以下配置强制该用户 `user1` 只能看到的行 `table1` 作为结果 `SELECT` 查询,其中的值 `id` 场是1000。 +以下配置使用户 `user1` 通过SELECT查询只能得到table1中id为1000的行 ``` xml @@ -159,6 +158,6 @@ DNS请求的所有结果都将被缓存,直到服务器重新启动。 ``` -该 `filter` 可以是导致任何表达式 [UInt8](../../sql-reference/data-types/int-uint.md)-键入值。 它通常包含比较和逻辑运算符。 从行 `database_name.table1` 其中,不会为此用户返回为0的筛选结果。 过滤是不兼容的 `PREWHERE` 操作和禁用 `WHERE→PREWHERE` 优化。 +该 `filter` 可以是[UInt8](../../sql-reference/data-types/int-uint.md)编码的任何表达式。 它通常包含比较和逻辑运算符, 当filter返回0时, database_name.table1 的该行结果将不会返回给用户.过滤不兼容 `PREWHERE` 操作并禁用 `WHERE→PREWHERE` 优化。 [原始文章](https://clickhouse.com/docs/en/operations/settings/settings_users/) diff --git a/docs/zh/sql-reference/ansi.md b/docs/zh/sql-reference/ansi.md index 0e7fa1d06c3c..5aad2cf52a8e 100644 --- a/docs/zh/sql-reference/ansi.md +++ b/docs/zh/sql-reference/ansi.md @@ -1,180 +1,189 @@ --- -machine_translated: true -machine_translated_rev: ad252bbb4f7e2899c448eb42ecc39ff195c8faa1 toc_priority: 40 toc_title: "ANSI\u517C\u5BB9\u6027" --- -# Ansi Sql兼容性的ClickHouse SQL方言 {#ansi-sql-compatibility-of-clickhouse-sql-dialect} +# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect} !!! note "注" - 本文依赖于表38, “Feature taxonomy and definition for mandatory features”, Annex F of ISO/IEC CD 9075-2:2013. + 本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准. ## 行为差异 {#differences-in-behaviour} -下表列出了查询功能在ClickHouse中有效但不符合ANSI SQL标准的情况。 +下表列出了ClickHouse能够使用,但与ANSI SQL规定有差异的查询特性。 -| Feature ID | 功能名称 | 差异 | -|------------|--------------------|---------------------------------------------------------------------| -| E011 | 数值(Numeric)数据类型 | 带小数点的数值文字被解释为近似值 (`Float64`)而不是精确值 (`Decimal`) | -| E051-05 | SELECT字段可以重命名 | 字段不仅仅在SELECT结果中可被重命名 | -| E141-01 | 非空约束 | 表中每一列默认为`NOT NULL` | -| E011-04 | 算术运算符 | ClickHouse不会检查算法,并根据自定义规则更改结果数据类型,而是会溢出 | +| 功能ID | 功能名称 | 差异 | +| ------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| E011 | 数值型数据类型 | 带小数点的数字被视为近似值 (`Float64`)而不是精确值 (`Decimal`) | +| E051-05 | SELECT 的列可以重命名 | 字段重命名的作用范围不限于进行重命名的SELECT子查询(参考[表达式别名](https://clickhouse.com/docs/zh/sql-reference/syntax/#notes-on-usage)) | +| E141-01 | NOT NULL(非空)约束 | ClickHouse表中每一列默认为`NOT NULL` | +| E011-04 | 算术运算符 | ClickHouse在运算时会进行溢出,而不是四舍五入。此外会根据自定义规则修改结果数据类型(参考[溢出检查](https://clickhouse.com/docs/zh/sql-reference/data-types/decimal/#yi-chu-jian-cha)) | -## 功能匹配 {#feature-status} +## 功能状态 {#feature-status} -| Feature ID | 功能名称 | 匹配 | 评论 | -|------------|----------------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| **E011** | **数字数据类型** | **部分**{.text-warning} | | -| E011-01 | 整型和小型数据类型 | 是 {.text-success} | | -| E011-02 | 真实、双精度和浮点数据类型数据类型 | 部分 {.text-warning} | `FLOAT()`, `REAL` 和 `DOUBLE PRECISION` 不支持 | -| E011-03 | 十进制和数值数据类型 | 部分 {.text-warning} | 只有 `DECIMAL(p,s)` 支持,而不是 `NUMERIC` | -| E011-04 | 算术运算符 | 是 {.text-success} | | -| E011-05 | 数字比较 | 是 {.text-success} | | -| E011-06 | 数字数据类型之间的隐式转换 | 否。 {.text-danger} | ANSI SQL允许在数值类型之间进行任意隐式转换,而ClickHouse依赖于具有多个重载的函数而不是隐式转换 | -| **E021** | **字符串类型** | **部分**{.text-warning} | | -| E021-01 | 字符数据类型 | 否。 {.text-danger} | | -| E021-02 | 字符变化数据类型 | 否。 {.text-danger} | `String` 行为类似,但括号中没有长度限制 | -| E021-03 | 字符文字 | 部分 {.text-warning} | 不自动连接连续文字和字符集支持 | -| E021-04 | 字符长度函数 | 部分 {.text-warning} | 非也。 `USING` 条款 | -| E021-05 | OCTET_LENGTH函数 | 非也。 {.text-danger} | `LENGTH` 表现类似 | -| E021-06 | SUBSTRING | 部分 {.text-warning} | 不支持 `SIMILAR` 和 `ESCAPE` 条款,否 `SUBSTRING_REGEX` 备选案文 | -| E021-07 | 字符串联 | 部分 {.text-warning} | 非也。 `COLLATE` 条款 | -| E021-08 | 上下功能 | 是 {.text-success} | | -| E021-09 | 修剪功能 | 是 {.text-success} | | -| E021-10 | 固定长度和可变长度字符串类型之间的隐式转换 | 否。 {.text-danger} | ANSI SQL允许在字符串类型之间进行任意隐式转换,而ClickHouse依赖于具有多个重载的函数而不是隐式转换 | -| E021-11 | 职位功能 | 部分 {.text-warning} | 不支持 `IN` 和 `USING` 条款,否 `POSITION_REGEX` 备选案文 | -| E021-12 | 字符比较 | 是 {.text-success} | | -| **E031** | **标识符** | **部分**{.text-warning} | | -| E031-01 | 分隔标识符 | 部分 {.text-warning} | Unicode文字支持有限 | -| E031-02 | 小写标识符 | 是 {.text-success} | | -| E031-03 | 尾部下划线 | 是 {.text-success} | | -| **E051** | **基本查询规范** | **部分**{.text-warning} | | -| E051-01 | SELECT DISTINCT | 是 {.text-success} | | -| E051-02 | GROUP BY子句 | 是 {.text-success} | | -| E051-04 | 分组依据可以包含不在列 ``中出现的列 | 是 {.text-success} | | +| E051-05 | SELECT 的列可以重命名 | 是 {.text-success} | | +| E051-06 | HAVING 从句 | 是 {.text-success} | | +| E051-07 | SELECT 选择的列中允许出现\* | 是 {.text-success} | | +| E051-08 | FROM 从句中的关联名称 | 是 {.text-success} | | +| E051-09 | 重命名 FROM 从句中的列 | 否 {.text-danger} | | +| **E061** | **基本谓词和搜索条件** | **部分**{.text-warning} | | +| E061-01 | 比较谓词 | 是 {.text-success} | | +| E061-02 | BETWEEN 谓词 | 部分 {.text-warning} | 不支持 `SYMMETRIC` 和 `ASYMMETRIC` 从句 | +| E061-03 | IN 谓词后可接值列表 | 是 {.text-success} | | +| E061-04 | LIKE 谓词 | 是 {.text-success} | | +| E061-05 | LIKE 谓词后接 ESCAPE 从句 | 否 {.text-danger} | | +| E061-06 | NULL 谓词 | 是 {.text-success} | | +| E061-07 | 量化比较谓词(ALL、SOME、ANY) | 否 {.text-danger} | | +| E061-08 | EXISTS 谓词 | 否 {.text-danger} | | +| E061-09 | 比较谓词中的子查询 | 是 {.text-success} | | +| E061-11 | IN 谓词中的子查询 | 是 {.text-success} | | +| E061-12 | 量化比较谓词(BETWEEN、IN、LIKE)中的子查询 | 否 {.text-danger} | | +| E061-13 | 相关子查询 | 否 {.text-danger} | | +| E061-14 | 搜索条件 | 是 {.text-success} | | +| **E071** | **基本查询表达式** | **部分**{.text-warning} | | +| E071-01 | UNION DISTINCT 表运算符 | 是 {.text-success} | | +| E071-02 | UNION ALL 表运算符 | 是 {.text-success} | | +| E071-03 | EXCEPT DISTINCT 表运算符 | 否 {.text-danger} | | +| E071-05 | 通过表运算符组合的列不必具有完全相同的数据类型 | 是 {.text-success} | | +| E071-06 | 子查询中的表运算符 | 是 {.text-success} | | +| **E081** | **基本权限** | **是**{.text-success} | | +| E081-01 | 表级别的SELECT(查询)权限 | 是 {.text-success} | | +| E081-02 | DELETE(删除)权限 | 是 {.text-success} | | +| E081-03 | 表级别的INSERT(插入)权限 | 是 {.text-success} | | +| E081-04 | 表级别的UPDATE(更新)权限 | 是 {.text-success} | | +| E081-05 | 列级别的UPDATE(更新)权限 | 是 {.text-success} | | +| E081-06 | 表级别的REFERENCES(引用)权限 | 是 {.text-success} | | +| E081-07 | 列级别的REFERENCES(引用)权限 | 是 {.text-success} | | +| E081-08 | WITH GRANT OPTION | 是 {.text-success} | | +| E081-09 | USAGE(使用)权限 | 是 {.text-success} | | +| E081-10 | EXECUTE(执行)权限 | 是 {.text-success} | | +| **E091** | **集合函数** | **是**{.text-success} | | +| E091-01 | AVG | 是 {.text-success} | | +| E091-02 | COUNT | 是 {.text-success} | | +| E091-03 | MAX | 是 {.text-success} | | +| E091-04 | MIN | 是 {.text-success} | | +| E091-05 | SUM | 是 {.text-success} | | +| E091-06 | ALL修饰词 | 否。 {.text-danger} | | +| E091-07 | DISTINCT修饰词 | 是 {.text-success} | 并非所有聚合函数都支持该修饰词 | +| **E101** | **基本数据操作** | **部分**{.text-warning} | | +| E101-01 | INSERT(插入)语句 | 是 {.text-success} | 注:ClickHouse中的主键并不隐含`UNIQUE` 约束 | +| E101-03 | 可指定范围的UPDATE(更新)语句 | 部分 {.text-warning} | `ALTER UPDATE` 语句用来批量更新数据 | +| E101-04 | 可指定范围的DELETE(删除)语句 | 部分 {.text-warning} | `ALTER DELETE` 语句用来批量删除数据 | +| **E111** | **返回一行的SELECT语句** | **否**{.text-danger} | | +| **E121** | **基本游标支持** | **否**{.text-danger} | | +| E121-01 | DECLARE CURSOR | 否 {.text-danger} | | +| E121-02 | ORDER BY 涉及的列不需要出现在SELECT的列中 | 是 {.text-success} | | +| E121-03 | ORDER BY 从句中的表达式 | 是 {.text-success} | | +| E121-04 | OPEN 语句 | 否 {.text-danger} | | +| E121-06 | 受游标位置控制的 UPDATE 语句 | 否 {.text-danger} | | +| E121-07 | 受游标位置控制的 DELETE 语句 | 否 {.text-danger} | | +| E121-08 | CLOSE 语句 | 否 {.text-danger} | | +| E121-10 | FETCH 语句中包含隐式NEXT | 否 {.text-danger} | | +| E121-17 | WITH HOLD 游标 | 否 {.text-danger} | | +| **E131** | **空值支持** | **是**{.text-success} | 有部分限制 | +| **E141** | **基本完整性约束** | **部分**{.text-warning} | | +| E141-01 | NOT NULL(非空)约束 | 是 {.text-success} | 注: 默认情况下ClickHouse表中的列隐含`NOT NULL`约束 | +| E141-02 | NOT NULL(非空)列的UNIQUE(唯一)约束 | 否 {.text-danger} | | +| E141-03 | 主键约束 | 部分 {.text-warning} | | +| E141-04 | 对于引用删除和引用更新操作,基本的FOREIGN KEY(外键)约束默认不进行任何操作(NO ACTION) | 否 {.text-danger} | | +| E141-06 | CHECK(检查)约束 | 是 {.text-success} | | +| E141-07 | 列默认值 | 是 {.text-success} | | +| E141-08 | 在主键上推断非空 | 是 {.text-success} | | +| E141-10 | 可以按任何顺序指定外键中的名称 | 否 {.text-danger} | | +| **E151** | **事务支持** | **否**{.text-danger} | | +| E151-01 | COMMIT(提交)语句 | 否 {.text-danger} | | +| E151-02 | ROLLBACK(回滚)语句 | 否 {.text-danger} | | +| **E152** | **基本的SET TRANSACTION(设置事务隔离级别)语句** | **否**{.text-danger} | | +| E152-01 | SET TRANSACTION语句:ISOLATION LEVEL SERIALIZABLE(隔离级别为串行化)从句 | 否 {.text-danger} | | +| E152-02 | SET TRANSACTION语句:READ ONLY(只读)和READ WRITE(读写)从句 | 否 {.text-danger} | | +| **E153** | **具有子查询的可更新查询** | **是**{.text-success} | | +| **E161** | **使用“--”符号作为SQL注释** | **是**{.text-success} | | +| **E171** | **SQLSTATE支持** | **否**{.text-danger} | | +| **E182** | **主机语言绑定** | **否**{.text-danger} | | +| **F031** | **基本架构操作** | **部分**{.text-warning} | | +| F031-01 | 使用 CREATE TABLE 语句创建持久表 | 部分 {.text-warning} | 不支持 `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` 从句,不支持用户解析的数据类型 | +| F031-02 | CREATE VIEW(创建视图)语句 | 部分 {.text-warning} | 不支持 `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` 从句,不支持用户解析的数据类型 | +| F031-03 | GRANT(授权)语句 | 是 {.text-success} | | +| F031-04 | ALTER TABLE语句:ADD COLUMN从句 | 是 {.text-success} | 不支持 `GENERATED` 从句和以系统时间做参数 | +| F031-13 | DROP TABLE语句:RESTRICT从句 | 否 {.text-danger} | | +| F031-16 | DROP VIEW语句:RESTRICT子句 | 否 {.text-danger} | | +| F031-19 | REVOKE语句:RESTRICT子句 | 否 {.text-danger} | | +| **F041** | **基本连接关系** | **部分**{.text-warning} | | +| F041-01 | Inner join(但不一定是INNER关键字) | 是 {.text-success} | | +| F041-02 | INNER 关键字 | 是 {.text-success} | | +| F041-03 | LEFT OUTER JOIN | 是 {.text-success} | | +| F041-04 | RIGHT OUTER JOIN | 是 {.text-success} | | +| F041-05 | 外连接可嵌套 | 是 {.text-success} | | +| F041-07 | 左外部连接或右外连接中的内部表也可用于内部联接 | 是 {.text-success} | | +| F041-08 | 支持所有比较运算符(而不仅仅是=) | 否 {.text-danger} | | +| **F051** | **基本日期和时间** | **部分**{.text-warning} | | +| F051-01 | DATE(日期)数据类型(并支持用于表达日期的字面量) | 是 {.text-success} | | +| F051-02 | TIME(时间)数据类型(并支持用于表达时间的字面量),小数秒精度至少为0 | 否 {.text-danger} | | +| F051-03 | 时间戳数据类型(并支持用于表达时间戳的字面量),小数秒精度至少为0和6 | 是 {.text-danger} | | +| F051-04 | 日期、时间和时间戳数据类型的比较谓词 | 是 {.text-success} | | +| F051-05 | Datetime 类型和字符串形式表达的时间之间的显式转换 | 是 {.text-success} | | +| F051-06 | CURRENT_DATE | 否 {.text-danger} | 使用`today()`替代 | +| F051-07 | LOCALTIME | 否 {.text-danger} | 使用`now()`替代 | +| F051-08 | LOCALTIMESTAMP | 否 {.text-danger} | | +| **F081** | **视图的UNION和EXCEPT操作** | **部分**{.text-warning} | | +| **F131** | **分组操作** | **部分**{.text-warning} | | +| F131-01 | 在具有分组视图的查询中支持 WHERE、GROUP BY 和 HAVING 子句 | 是 {.text-success} | | +| F131-02 | 在分组视图中支持多张表 | 是 {.text-success} | | +| F131-03 | 分组视图的查询中支持集合函数 | 是 {.text-success} | | +| F131-04 | 带有 `GROUP BY` 和 `HAVING` 从句,以及分组视图的子查询 | 是 {.text-success} | | +| F131-05 | 带有 `GROUP BY` 和 `HAVING` 从句,以及分组视图的仅返回1条记录的SELECT查询 | 否 {.text-danger} | | +| **F181** | **多模块支持** | **否**{.text-danger} | | +| **F201** | **CAST 函数** | **是**{.text-success} | | +| **F221** | **显式默认值** | **否**{.text-danger} | | +| **F261** | **CASE 表达式** | **是**{.text-success} | | +| F261-01 | 简单 CASE 表达式 | 是 {.text-success} | | +| F261-02 | 搜索型 CASE 表达式 | 是 {.text-success} | | +| F261-03 | NULLIF | 是 {.text-success} | | +| F261-04 | COALESCE | 是 {.text-success} | | +| **F311** | **架构定义语句** | **部分**{.text-warning} | | +| F311-01 | CREATE SCHEMA | 部分 {.text-warning} | 见`CREATE DATABASE` | +| F311-02 | 用于创建持久表的 CREATE TABLE | 是 {.text-success} | | +| F311-03 | CREATE VIEW | 是 {.text-success} | | +| F311-04 | CREATE VIEW: WITH CHECK OPTION | 否 {.text-danger} | | +| F311-05 | GRANT 语句 | 是 {.text-success} | | +| **F471** | **标量子查询** | **是**{.text-success} | | +| **F481** | **扩展 NULL 谓词** | **是**{.text-success} | | +| **F812** | **基本标志位** | **否**{.text-danger} | +| **S011** | **用于不重复数据的数据类型** | **否**{.text-danger} | +| **T321** | **基本的SQL调用例程** | **否**{.text-danger} | | +| T321-01 | 没有重载的用户定义函数 | 否{.text-danger} | | +| T321-02 | 没有重载的用户定义存储过程 | 否{.text-danger} | | +| T321-03 | 功能调用 | 否 {.text-danger} | | +| T321-04 | CALL 语句 | 否 {.text-danger} | | +| T321-05 | RETURN 语句 | 否 {.text-danger} | | +| **T631** | **IN 谓词后接一个列表** | **是**{.text-success} | | diff --git a/docs/zh/sql-reference/statements/alter/index.md b/docs/zh/sql-reference/statements/alter/index.md index 2f60dbb262e7..f7d983cab4e5 100644 --- a/docs/zh/sql-reference/statements/alter/index.md +++ b/docs/zh/sql-reference/statements/alter/index.md @@ -1,23 +1,74 @@ --- -toc_hidden_folder: true -toc_priority: 42 -toc_title: INDEX +toc_priority: 35 +toc_title: ALTER --- -# 操作数据跳过索引 {#manipulations-with-data-skipping-indices} +## ALTER {#query_language_queries_alter} -可以使用以下操作: +大多数 `ALTER TABLE` 查询修改表设置或数据: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - 向表元数据添加索引描述。 +- [COLUMN](../../../sql-reference/statements/alter/column.md) +- [PARTITION](../../../sql-reference/statements/alter/partition.md) +- [DELETE](../../../sql-reference/statements/alter/delete.md) +- [UPDATE](../../../sql-reference/statements/alter/update.md) +- [ORDER BY](../../../sql-reference/statements/alter/order-by.md) +- [INDEX](../../../sql-reference/statements/alter/index/index.md) +- [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md) +- [TTL](../../../sql-reference/statements/alter/ttl.md) -- `ALTER TABLE [db].name DROP INDEX name` - 从表元数据中删除索引描述并从磁盘中删除索引文件。 +!!! note "备注" + 大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。 -- `ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name` - 查询在分区`partition_name`中重建二级索引`name`。 操作为[mutation](../../../sql-reference/statements/alter/index.md#mutations). +这些 `ALTER` 语句操作视图: -前两个命令是轻量级的,它们只更改元数据或删除文件。 +- [ALTER TABLE ... MODIFY QUERY](../../../sql-reference/statements/alter/view.md) — 修改一个 [Materialized view](../create/view.md#materialized) 结构. +- [ALTER LIVE VIEW](../../../sql-reference/statements/alter/view.md#alter-live-view) — 刷新一个 [Live view](../create/view.md#live-view). -Also, they are replicated, syncing indices metadata via ZooKeeper. -此外,它们会被复制,会通过ZooKeeper同步索引元数据。 +这些 `ALTER` 语句修改与基于角色的访问控制相关的实体: -!!! note "注意" -索引操作仅支持具有以下特征的表 [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)引擎 (包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)). +- [USER](../../../sql-reference/statements/alter/user.md) +- [ROLE](../../../sql-reference/statements/alter/role.md) +- [QUOTA](../../../sql-reference/statements/alter/quota.md) +- [ROW POLICY](../../../sql-reference/statements/alter/row-policy.md) +- [SETTINGS PROFILE](../../../sql-reference/statements/alter/settings-profile.md) + +[ALTER TABLE ... MODIFY COMMENT](../../../sql-reference/statements/alter/comment.md) 语句添加、修改或删除表中的注释,无论之前是否设置过。 + +## Mutations 突变 {#mutations} + +用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的,最明显的是[ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程,类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并,产生新的“突变”版本的部件。 + + + +对于 `*MergeTree` 表,通过重写整个数据部分来执行突变。没有原子性——一旦突变的部件准备好,部件就会被替换,并且在突变期间开始执行的 `SELECT` 查询将看到来自已经突变的部件的数据,以及来自尚未突变的部件的数据。 + + + +突变完全按照它们的产生顺序排列,并按此顺序应用于每个部分。突变还与“INSERT INTO”查询进行部分排序:在提交突变之前插入表中的数据将被突变,而在此之后插入的数据将不会被突变。注意,突变不会以任何方式阻止插入。 + + + +突变查询在添加突变条目后立即返回(对于复制表到ZooKeeper,对于非复制表到文件系统)。突变本身使用系统配置文件设置异步执行。要跟踪突变的进程,可以使用[`system.mutations`](../../../operations/system-tables/mutations.md#system_tables-mutations) 表。成功提交的变异将继续执行,即使ClickHouse服务器重新启动。没有办法回滚突变一旦提交,但如果突变卡住了,它可以取消与[`KILL MUTATION`](../../../sql-reference/statements/misc.md#kill-mutation) 查询。 + + + +完成突变的条目不会立即删除(保留条目的数量由 `finished_mutations_to_keep` 存储引擎参数决定)。删除旧的突变条目。 + +## ALTER 查询的同步性 {#synchronicity-of-alter-queries} + + +对于非复制表,所有的 `ALTER` 查询都是同步执行的。对于复制表,查询只是向“ZooKeeper”添加相应动作的指令,动作本身会尽快执行。但是,查询可以等待所有副本上的这些操作完成。 + +对于所有的“ALTER”查询,您可以使用[replication_alter_partitions_sync](../../../operations/settings/settings.md#replication-alter-partitions-sync)设置等待。 + +通过[replication_wait_for_inactive_replica_timeout](../../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout]设置,可以指定不活动的副本执行所有 `ALTER` 查询的等待时间(以秒为单位)。 + + + +!!! info "备注" + + 对于所有的 `ALTER` 查询,如果 `replication_alter_partitions_sync = 2` 和一些副本的不激活时间超过时间(在 `replication_wait_for_inactive_replica_timeout` 设置中指定),那么将抛出一个异常 `UNFINISHED`。 + + + +对于 `ALTER TABLE ... UPDATE|DELETE` 查询由 [mutations_sync](../../../operations/settings/settings.md#mutations_sync) 设置定义的同步度。 diff --git a/docs/zh/sql-reference/statements/alter/index/index.md b/docs/zh/sql-reference/statements/alter/index/index.md deleted file mode 120000 index b754fa71b83e..000000000000 --- a/docs/zh/sql-reference/statements/alter/index/index.md +++ /dev/null @@ -1 +0,0 @@ -../../../../../en/sql-reference/statements/alter/index/index.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/alter/index/index.md b/docs/zh/sql-reference/statements/alter/index/index.md new file mode 100644 index 000000000000..16f48e55b2fd --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/index/index.md @@ -0,0 +1,23 @@ +--- +toc_hidden_folder: true +toc_priority: 42 +toc_title: INDEX +--- + +# 操作数据跳过索引 {#manipulations-with-data-skipping-indices} + +可以使用以下操作: + +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - 向表元数据添加索引描述。 + +- `ALTER TABLE [db].name DROP INDEX name` - 从表元数据中删除索引描述并从磁盘中删除索引文件。 + +- `ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name` - 查询在分区`partition_name`中重建二级索引`name`。 操作为[mutation](../../../../sql-reference/statements/alter/index.md#mutations). + +前两个命令是轻量级的,它们只更改元数据或删除文件。 + +Also, they are replicated, syncing indices metadata via ZooKeeper. +此外,它们会被复制,会通过ZooKeeper同步索引元数据。 + +!!! note "注意" + 索引操作仅支持具有以下特征的表 [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md)引擎 (包括[replicated](../../../../engines/table-engines/mergetree-family/replication.md)). diff --git a/docs/zh/sql-reference/statements/alter/ttl.md b/docs/zh/sql-reference/statements/alter/ttl.md deleted file mode 120000 index 94a112e7a17a..000000000000 --- a/docs/zh/sql-reference/statements/alter/ttl.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/ttl.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/alter/ttl.md b/docs/zh/sql-reference/statements/alter/ttl.md new file mode 100644 index 000000000000..ca011a2a12fc --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/ttl.md @@ -0,0 +1,85 @@ +--- +toc_priority: 44 +toc_title: TTL +--- + +# 表的 TTL 操作 {#manipulations-with-table-ttl} + +## 修改 MODIFY TTL {#modify-ttl} + +你能修改 [表 TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) ,命令语法如下所示: + +``` sql +ALTER TABLE table_name MODIFY TTL ttl_expression; +``` + +## 移除 REMOVE TTL {#remove-ttl} + +TTL 属性可以用下列命令从表中移除: + +```sql +ALTER TABLE table_name REMOVE TTL +``` + +**示例** + +创建一个表,带有 `TTL` 属性如下所示: + +```sql +CREATE TABLE table_with_ttl +( + event_time DateTime, + UserID UInt64, + Comment String +) +ENGINE MergeTree() +ORDER BY tuple() +TTL event_time + INTERVAL 3 MONTH; +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO table_with_ttl VALUES (now(), 1, 'username1'); + +INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2'); +``` + +运行命令 `OPTIMIZE` 强制清理 `TTL`: + +```sql +OPTIMIZE TABLE table_with_ttl FINAL; +SELECT * FROM table_with_ttl FORMAT PrettyCompact; +``` +第二行记录被从表中删除掉了. + +```text +┌─────────event_time────┬──UserID─┬─────Comment──┐ +│ 2020-12-11 12:44:57 │ 1 │ username1 │ +└───────────────────────┴─────────┴──────────────┘ +``` + +现在用下面的命令,把表的 `TTL` 移除掉: + +```sql +ALTER TABLE table_with_ttl REMOVE TTL; +``` + +重新插入上面的数据,并尝试再次运行 `OPTIMIZE` 命令清理 `TTL` 属性 : + +```sql +INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2'); +OPTIMIZE TABLE table_with_ttl FINAL; +SELECT * FROM table_with_ttl FORMAT PrettyCompact; +``` + +可以看得到 `TTL` 这个属性已经没了,并且可以看得到第二行记录并没有被删除: + +```text +┌─────────event_time────┬──UserID─┬─────Comment──┐ +│ 2020-12-11 12:44:57 │ 1 │ username1 │ +│ 2020-08-11 12:44:57 │ 2 │ username2 │ +└───────────────────────┴─────────┴──────────────┘ +``` + +**更多参考** + +- 关于 [TTL 表达式](../../../sql-reference/statements/create/table.md#ttl-expression). +- 修改列 [with TTL](../../../sql-reference/statements/alter/column.md#alter_modify-column). diff --git a/docs/zh/sql-reference/statements/alter/update.md b/docs/zh/sql-reference/statements/alter/update.md deleted file mode 120000 index fa9be21c0700..000000000000 --- a/docs/zh/sql-reference/statements/alter/update.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/update.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/alter/update.md b/docs/zh/sql-reference/statements/alter/update.md new file mode 100644 index 000000000000..08eccdf1aa2c --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/update.md @@ -0,0 +1,29 @@ +--- +toc_priority: 40 +toc_title: UPDATE +--- + +# ALTER TABLE … UPDATE 语句 {#alter-table-update-statements} + +``` sql +ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr +``` + +操作与指定过滤表达式相匹配的数据。作为一个[变更 mutation](../../../sql-reference/statements/alter/index.md#mutations)来实现. + +!!! note "Note" + `ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明,与OLTP数据库中的类似查询不同,这是一个繁重的操作,不是为频繁使用而设计。 + +`filter_expr`必须是`UInt8`类型。这个查询将指定列的值更新为行中相应表达式的值,对于这些行,`filter_expr`取值为非零。使用`CAST`操作符将数值映射到列的类型上。不支持更新用于计算主键或分区键的列。 + +一个查询可以包含几个由逗号分隔的命令。 + +查询处理的同步性由 [mutations_sync](../../../operations/settings/settings.md#mutations_sync) 设置定义。 默认情况下,它是异步操作。 + + +**更多详情请参阅** + +- [变更 Mutations](../../../sql-reference/statements/alter/index.md#mutations) +- [ALTER查询的同步性问题](../../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [mutations_sync](../../../operations/settings/settings.md#mutations_sync) setting + diff --git a/docs/zh/sql-reference/statements/exists.md b/docs/zh/sql-reference/statements/exists.md deleted file mode 120000 index d69e8224fe6a..000000000000 --- a/docs/zh/sql-reference/statements/exists.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/statements/exists.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/exists.md b/docs/zh/sql-reference/statements/exists.md new file mode 100644 index 000000000000..69b26fea918d --- /dev/null +++ b/docs/zh/sql-reference/statements/exists.md @@ -0,0 +1,12 @@ +--- +toc_priority: 45 +toc_title: EXISTS +--- + +# EXISTS 语句 {#exists-statement} + +``` sql +EXISTS [TEMPORARY] [TABLE|DICTIONARY] [db.]name [INTO OUTFILE filename] [FORMAT format] +``` + +返回一个单独的 `UInt8`类型的列,如果表或数据库不存在,则包含一个值 `0`,如果表在指定的数据库中存在,则包含一个值 `1`。 \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/set.md b/docs/zh/sql-reference/statements/set.md deleted file mode 120000 index 02e106afc9f3..000000000000 --- a/docs/zh/sql-reference/statements/set.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/statements/set.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/set.md b/docs/zh/sql-reference/statements/set.md new file mode 100644 index 000000000000..a9888a7080eb --- /dev/null +++ b/docs/zh/sql-reference/statements/set.md @@ -0,0 +1,23 @@ +--- +toc_priority: 50 +toc_title: SET +--- + +# SET 语句 {#query-set} + +``` sql +SET param = value +``` + +给当前会话的 `param` [配置项](../../operations/settings/index.md)赋值。你不能用这样的方式修改[服务器相关设置](../../operations/server-configuration-parameters/index.md)。 + + +您还可以在单个查询中设置指定设置配置文件中的所有值。 + + + +``` sql +SET profile = 'profile-name-from-the-settings-file' +``` + +更多详情, 详见 [配置项](../../operations/settings/settings.md). diff --git a/docs/zh/sql-reference/statements/truncate.md b/docs/zh/sql-reference/statements/truncate.md deleted file mode 120000 index 92fbd705e8f0..000000000000 --- a/docs/zh/sql-reference/statements/truncate.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/statements/truncate.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/truncate.md b/docs/zh/sql-reference/statements/truncate.md new file mode 100644 index 000000000000..7d7bf7e47e25 --- /dev/null +++ b/docs/zh/sql-reference/statements/truncate.md @@ -0,0 +1,31 @@ +--- +toc_priority: 52 +toc_title: TRUNCATE +--- + +# TRUNCATE 语句 {#truncate-statement} + +``` sql +TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +删除表中的所有数据。当省略子句 `IF EXISTS` 时,如果表不存在,则查询返回一个错误。 + + + +`TRUNCATE` 查询不支持[View](../../engines/table-engines/special/view.md),[File](../../engines/table-engines/special/file.md), [URL](../../engines/table-engines/special/url.md), [Buffer](../../engines/table-engines/special/buffer.md) 和 [Null](../../engines/table-engines/special/null.md)表引擎。 + + + +可以使用 replication_alter_partitions_sync 设置在复制集上等待执行的操作。 + + + +通过 replication_wait_for_inactive_replica_timeout 设置,可以指定不活动副本执行 `TRUNCATE`查询需要等待多长时间(以秒为单位)。 + + + +!!! info "注意" + 如果`replication_alter_partitions_sync` 被设置为`2`,并且某些复制集超过 `replication_wait_for_inactive_replica_timeout`设置的时间不激活,那么将抛出一个异常`UNFINISHED`。 + + diff --git a/docs/zh/sql-reference/statements/use.md b/docs/zh/sql-reference/statements/use.md deleted file mode 120000 index 7bdbf0493266..000000000000 --- a/docs/zh/sql-reference/statements/use.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/statements/use.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/use.md b/docs/zh/sql-reference/statements/use.md new file mode 100644 index 000000000000..41cba58bb9dd --- /dev/null +++ b/docs/zh/sql-reference/statements/use.md @@ -0,0 +1,16 @@ +--- +toc_priority: 53 +toc_title: USE +--- + +# USE 语句 {#use} + +``` sql +USE db +``` + +用于设置会话的当前数据库。 + +如果查询语句中没有在表名前面以加点的方式指明数据库名, 则用当前数据库进行搜索。 + +使用 HTTP 协议时无法进行此查询,因为没有会话的概念。 diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 4806a7fe46e5..8906d186bfc1 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -53,9 +53,9 @@ option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_ option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL}) -if (NOT USE_NURAFT) +if (NOT ENABLE_NURAFT) # RECONFIGURE_MESSAGE_LEVEL should not be used here, - # since USE_NURAFT is set to OFF for FreeBSD and Darwin. + # since ENABLE_NURAFT is set to OFF for FreeBSD and Darwin. message (STATUS "clickhouse-keeper and clickhouse-keeper-converter will not be built (lack of NuRaft)") set(ENABLE_CLICKHOUSE_KEEPER OFF) set(ENABLE_CLICKHOUSE_KEEPER_CONVERTER OFF) @@ -160,7 +160,7 @@ else() message(STATUS "ClickHouse keeper-converter mode: OFF") endif() -if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES)) +if(NOT (USE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES)) set(CLICKHOUSE_ONE_SHARED ON) endif() @@ -468,7 +468,7 @@ else () endif() endif () -if (ENABLE_TESTS AND USE_GTEST) +if (ENABLE_TESTS) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) add_dependencies(clickhouse-bundle clickhouse-tests) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a5e4517824d9..86bf4a007a8b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -286,7 +286,7 @@ bool Client::executeMultiQuery(const String & all_queries_text) // , where the inline data is delimited by semicolon and not by a // newline. auto * insert_ast = parsed_query->as(); - if (insert_ast && insert_ast->data) + if (insert_ast && isSyncInsertWithData(*insert_ast, global_context)) { this_query_end = insert_ast->end; adjustQueryEnd(this_query_end, all_queries_end, global_context->getSettingsRef().max_parser_depth); @@ -481,48 +481,76 @@ catch (...) void Client::connect() { - connection_parameters = ConnectionParameters(config()); - - if (is_interactive) - std::cout << "Connecting to " - << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " - : "") - << connection_parameters.host << ":" << connection_parameters.port - << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; + UInt16 default_port = ConnectionParameters::getPortFromConfig(config()); + connection_parameters = ConnectionParameters(config(), hosts_ports[0].host, + hosts_ports[0].port.value_or(default_port)); String server_name; UInt64 server_version_major = 0; UInt64 server_version_minor = 0; UInt64 server_version_patch = 0; - try + for (size_t attempted_address_index = 0; attempted_address_index < hosts_ports.size(); ++attempted_address_index) { - connection = Connection::createConnection(connection_parameters, global_context); + connection_parameters.host = hosts_ports[attempted_address_index].host; + connection_parameters.port = hosts_ports[attempted_address_index].port.value_or(default_port); - if (max_client_network_bandwidth) - { - ThrottlerPtr throttler = std::make_shared(max_client_network_bandwidth, 0, ""); - connection->setThrottler(throttler); - } + if (is_interactive) + std::cout << "Connecting to " + << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " + : "") + << connection_parameters.host << ":" << connection_parameters.port + << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; - connection->getServerVersion( - connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision); - } - catch (const Exception & e) - { - /// It is typical when users install ClickHouse, type some password and instantly forget it. - if ((connection_parameters.user.empty() || connection_parameters.user == "default") - && e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED) + try { - std::cerr << std::endl - << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl - << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl - << "and deleting this file will reset the password." << std::endl - << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl - << std::endl; + connection = Connection::createConnection(connection_parameters, global_context); + + if (max_client_network_bandwidth) + { + ThrottlerPtr throttler = std::make_shared(max_client_network_bandwidth, 0, ""); + connection->setThrottler(throttler); + } + + connection->getServerVersion( + connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision); + config().setString("host", connection_parameters.host); + config().setInt("port", connection_parameters.port); + break; } + catch (const Exception & e) + { + /// It is typical when users install ClickHouse, type some password and instantly forget it. + /// This problem can't be fixed with reconnection so it is not attempted + if ((connection_parameters.user.empty() || connection_parameters.user == "default") + && e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED) + { + std::cerr << std::endl + << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl + << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl + << "and deleting this file will reset the password." << std::endl + << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl + << std::endl; + throw; + } + else + { + if (attempted_address_index == hosts_ports.size() - 1) + throw; - throw; + if (is_interactive) + { + std::cerr << "Connection attempt to database at " + << connection_parameters.host << ":" << connection_parameters.port + << " resulted in failure" + << std::endl + << getExceptionMessage(e, false) + << std::endl + << "Attempting connection to the next provided address" + << std::endl; + } + } + } } server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch); @@ -966,8 +994,11 @@ void Client::addOptions(OptionsDescription & options_description) /// Main commandline options related to client functionality and all parameters from Settings. options_description.main_description->add_options() ("config,c", po::value(), "config-file path (another shorthand)") - ("host,h", po::value()->default_value("localhost"), "server host") - ("port", po::value()->default_value(9000), "server port") + ("host,h", po::value>()->multitoken()->default_value({{"localhost"}}, "localhost"), + "list of server hosts with optionally assigned port to connect. List elements are separated by a space." + "Every list element looks like '[:]'. If port isn't assigned, connection is made by port from '--port' param" + "Example of usage: '-h host1:1 host2 host3:3'") + ("port", po::value()->default_value(9000), "server port, which is default port for every host from '--host' param") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") /** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown. @@ -1074,8 +1105,8 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("config")) config().setString("config-file", options["config"].as()); - if (options.count("host") && !options["host"].defaulted()) - config().setString("host", options["host"].as()); + if (options.count("host")) + hosts_ports = options["host"].as>(); if (options.count("interleave-queries-file")) interleave_queries_files = options["interleave-queries-file"].as>(); if (options.count("port") && !options["port"].defaulted()) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 4b0e8ad1ca14..835afcdb2ed8 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -57,8 +57,16 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ("seed", po::value(), "seed (arbitrary string) that determines the result of obfuscation") ; + Settings cmd_settings; + for (const auto & field : cmd_settings.all()) + { + if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size") + cmd_settings.addProgramOption(desc, field); + } + boost::program_options::variables_map options; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + po::notify(options); if (options.count("help")) { @@ -149,7 +157,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ParserQuery parser(end); do { - ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr res = parseQueryAndMovePosition( + parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); /// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail, /// should throw exception early and make exception message more readable. if (const auto * insert_query = res->as(); insert_query && insert_query->data) @@ -222,6 +231,5 @@ int mainEntryClickHouseFormat(int argc, char ** argv) std::cerr << getCurrentExceptionMessage(true) << '\n'; return getCurrentExceptionCode(); } - return 0; } diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index dd93e0b49abc..f8df823ecb73 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -364,7 +364,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv) "clickhouse-git-import", "clickhouse-compressor", "clickhouse-format", - "clickhouse-extract-from-config" + "clickhouse-extract-from-config", + "clickhouse-keeper", + "clickhouse-keeper-converter", }; for (const auto & tool : tools) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index d144b4d332e2..88df4d5b3e7d 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -324,14 +324,12 @@ int Keeper::main(const std::vector & /*args*/) } else { - LOG_WARNING(log, message); + LOG_WARNING(log, fmt::runtime(message)); } } DB::ServerUUID::load(path + "/uuid", log); - const Settings & settings = global_context->getSettingsRef(); - std::string include_from_path = config().getString("include_from", "/etc/metrika.xml"); GlobalThreadPool::initialize( @@ -377,8 +375,8 @@ int Keeper::main(const std::vector & /*args*/) { Poco::Net::ServerSocket socket; auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); + socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); + socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); servers->emplace_back( listen_host, port_name, @@ -393,8 +391,8 @@ int Keeper::main(const std::vector & /*args*/) #if USE_SSL Poco::Net::SecureServerSocket socket; auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); + socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); + socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); servers->emplace_back( listen_host, secure_port_name, diff --git a/programs/library-bridge/Handlers.cpp b/programs/library-bridge/Handlers.cpp index bf9ace679ba8..58f9bd0a9361 100644 --- a/programs/library-bridge/Handlers.cpp +++ b/programs/library-bridge/Handlers.cpp @@ -37,7 +37,7 @@ namespace if (!response.sent()) *response.send() << message << std::endl; - LOG_WARNING(&Poco::Logger::get("LibraryBridge"), message); + LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message)); } std::shared_ptr parseColumns(std::string && column_string) @@ -123,7 +123,7 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe } else { - LOG_TRACE(log, "Cannot clone from dictionary with id: {}, will call libNew instead"); + LOG_TRACE(log, "Cannot clone from dictionary with id: {}, will call libNew instead", from_dictionary_id); lib_new = true; } } @@ -178,7 +178,7 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe catch (const Exception & ex) { processError(response, "Invalid 'sample_block' parameter in request body '" + ex.message() + "'"); - LOG_WARNING(log, ex.getStackTraceString()); + LOG_WARNING(log, fmt::runtime(ex.getStackTraceString())); return; } @@ -278,7 +278,7 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe catch (const Exception & ex) { processError(response, "Invalid 'requested_block' parameter in request body '" + ex.message() + "'"); - LOG_WARNING(log, ex.getStackTraceString()); + LOG_WARNING(log, fmt::runtime(ex.getStackTraceString())); return; } diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index a294857ace8a..70363c62caca 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -314,25 +315,31 @@ void LocalServer::cleanup() std::string LocalServer::getInitialCreateTableQuery() { - if (!config().has("table-structure") && !config().has("table-file")) + if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format")) return {}; auto table_name = backQuoteIfNeed(config().getString("table-name", "table")); auto table_structure = config().getString("table-structure", "auto"); - auto data_format = backQuoteIfNeed(config().getString("table-data-format", "TSV")); String table_file; + String format_from_file_name; if (!config().has("table-file") || config().getString("table-file") == "-") { /// Use Unix tools stdin naming convention table_file = "stdin"; + format_from_file_name = FormatFactory::instance().getFormatFromFileDescriptor(STDIN_FILENO); } else { /// Use regular file - table_file = quoteString(config().getString("table-file")); + auto file_name = config().getString("table-file"); + table_file = quoteString(file_name); + format_from_file_name = FormatFactory::instance().getFormatFromFileName(file_name, false); } + auto data_format + = backQuoteIfNeed(config().getString("table-data-format", format_from_file_name.empty() ? "TSV" : format_from_file_name)); + if (table_structure == "auto") table_structure = ""; else diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 7b232f2b5dc0..54f47204259a 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -26,8 +26,8 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE dbms bridge clickhouse_parsers - nanodbc - unixodbc + ch_contrib::nanodbc + ch_contrib::unixodbc ) set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 8ceeddcd7ab9..4d9a6b7a6928 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -77,7 +77,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) *response.send() << message << std::endl; - LOG_WARNING(log, message); + LOG_WARNING(log, fmt::runtime(message)); }; if (!params.has("table")) diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index bc976f54aeef..76c0103d6042 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -1,11 +1,12 @@ #pragma once +#include + #if USE_ODBC #include #include #include -#include #include diff --git a/programs/odbc-bridge/HandlerFactory.cpp b/programs/odbc-bridge/HandlerFactory.cpp index 6a5ef89ab8b5..1a6df287a5cc 100644 --- a/programs/odbc-bridge/HandlerFactory.cpp +++ b/programs/odbc-bridge/HandlerFactory.cpp @@ -1,6 +1,7 @@ #include "HandlerFactory.h" #include "PingHandler.h" #include "ColumnInfoHandler.h" +#include #include #include #include diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index c7cad68f19ef..7f809da4b10a 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -29,7 +29,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) *response.send() << message << std::endl; - LOG_WARNING(log, message); + LOG_WARNING(log, fmt::runtime(message)); }; if (!params.has("connection_string")) diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index ef3806fd802b..23ffd84663be 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -2,7 +2,7 @@ #include #include - +#include #include #if USE_ODBC diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 82d1bd61c24f..02bdabe8ffa0 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -45,7 +46,7 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) *response.send() << message << std::endl; - LOG_WARNING(log, message); + LOG_WARNING(log, fmt::runtime(message)); } @@ -101,7 +102,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse catch (const Exception & ex) { processError(response, "Invalid 'sample_block' parameter in request body '" + ex.message() + "'"); - LOG_ERROR(log, ex.getStackTraceString()); + LOG_ERROR(log, fmt::runtime(ex.getStackTraceString())); return; } diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index 7b526bd8041c..0c58af2f7c19 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -37,7 +37,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) *response.send() << message << std::endl; - LOG_WARNING(log, message); + LOG_WARNING(log, fmt::runtime(message)); }; if (!params.has("connection_string")) diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h index d7b922ed05b4..7afa77ca0910 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.h +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -2,6 +2,7 @@ #include #include +#include #include #if USE_ODBC diff --git a/programs/odbc-bridge/getIdentifierQuote.h b/programs/odbc-bridge/getIdentifierQuote.h index f4227af5c070..a7620da2291c 100644 --- a/programs/odbc-bridge/getIdentifierQuote.h +++ b/programs/odbc-bridge/getIdentifierQuote.h @@ -1,5 +1,7 @@ #pragma once +#include + #if USE_ODBC #include diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 281c25d50eb0..643fd2f0ec4f 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -18,13 +18,15 @@ set (CLICKHOUSE_SERVER_LINK clickhouse_storages_system clickhouse_table_functions string_utils - jemalloc ${LINK_RESOURCE_LIB} PUBLIC daemon ) +if (TARGET ch_contrib::jemalloc) + list(APPEND CLICKHOUSE_SERVER_LINK PRIVATE ch_contrib::jemalloc) +endif() clickhouse_program_add(server) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 5fc3f9aa9675..9fa36e3de89d 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -98,9 +99,7 @@ #endif #if USE_SSL -# if USE_INTERNAL_SSL_LIBRARY -# include -# endif +# include # include # include #endif @@ -114,10 +113,6 @@ # include #endif -#if USE_BASE64 -# include -#endif - #if USE_JEMALLOC # include #endif @@ -200,6 +195,7 @@ namespace { void setupTmpPath(Poco::Logger * log, const std::string & path) +try { LOG_DEBUG(log, "Setting up {} to store temporary data in it", path); @@ -218,6 +214,15 @@ void setupTmpPath(Poco::Logger * log, const std::string & path) LOG_DEBUG(log, "Skipped file in temporary path {}", it->path().string()); } } +catch (...) +{ + DB::tryLogCurrentException( + log, + fmt::format( + "Caught exception while setup temporary path: {}. It is ok to skip this exception as cleaning old temporary files is not " + "necessary", + path)); +} int waitServersToFinish(std::vector & servers, size_t seconds_to_wait) { @@ -968,11 +973,90 @@ if (ThreadFuzzer::instance().isEffective()) global_context->updateInterserverCredentials(*config); CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); - +#if USE_SSL + CertificateReloader::instance().tryLoad(*config); +#endif ProfileEvents::increment(ProfileEvents::MainConfigLoads); }, /* already_loaded = */ false); /// Reload it right now (initial loading) + const auto listen_hosts = getListenHosts(config()); + const auto listen_try = getListenTry(config()); + + if (config().has("keeper_server")) + { +#if USE_NURAFT + //// If we don't have configured connection probably someone trying to use clickhouse-server instead + //// of clickhouse-keeper, so start synchronously. + bool can_initialize_keeper_async = false; + + if (has_zookeeper) /// We have configured connection to some zookeeper cluster + { + /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start + /// synchronously. + can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); + } + /// Initialize keeper RAFT. + global_context->initializeKeeperDispatcher(can_initialize_keeper_async); + FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); + + for (const auto & listen_host : listen_hosts) + { + /// TCP Keeper + const char * port_name = "keeper_server.tcp_port"; + createServer( + config(), listen_host, port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); + socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); + return ProtocolServerAdapter( + listen_host, + port_name, + "Keeper (tcp): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory(*this, false), server_pool, socket)); + }); + + const char * secure_port_name = "keeper_server.tcp_port_secure"; + createServer( + config(), listen_host, secure_port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); + socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); + return ProtocolServerAdapter( + listen_host, + secure_port_name, + "Keeper with secure protocol (tcp_secure): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory(*this, true), server_pool, socket)); +#else + UNUSED(port); + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }); + } +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); +#endif + + } + + for (auto & server : servers_to_start_before_tables) + { + server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } + auto & access_control = global_context->getAccessControl(); if (config().has("custom_settings_prefixes")) access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes")); @@ -1081,83 +1165,6 @@ if (ThreadFuzzer::instance().isEffective()) /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start. CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs"); - const auto listen_hosts = getListenHosts(config()); - const auto listen_try = getListenTry(config()); - - if (config().has("keeper_server")) - { -#if USE_NURAFT - //// If we don't have configured connection probably someone trying to use clickhouse-server instead - //// of clickhouse-keeper, so start synchronously. - bool can_initialize_keeper_async = false; - - if (has_zookeeper) /// We have configured connection to some zookeeper cluster - { - /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start - /// synchronously. - can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); - } - /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(can_initialize_keeper_async); - FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); - - for (const auto & listen_host : listen_hosts) - { - /// TCP Keeper - const char * port_name = "keeper_server.tcp_port"; - createServer( - config(), listen_host, port_name, listen_try, /* start_server: */ false, - servers_to_start_before_tables, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "Keeper (tcp): " + address.toString(), - std::make_unique( - new KeeperTCPHandlerFactory(*this, false), server_pool, socket)); - }); - - const char * secure_port_name = "keeper_server.tcp_port_secure"; - createServer( - config(), listen_host, secure_port_name, listen_try, /* start_server: */ false, - servers_to_start_before_tables, - [&](UInt16 port) -> ProtocolServerAdapter - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - secure_port_name, - "Keeper with secure protocol (tcp_secure): " + address.toString(), - std::make_unique( - new KeeperTCPHandlerFactory(*this, true), server_pool, socket)); -#else - UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - }); - } -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); -#endif - - } - - for (auto & server : servers_to_start_before_tables) - { - server.start(); - LOG_INFO(log, "Listening for {}", server.getDescription()); - } - SCOPE_EXIT({ /// Stop reloading of the main config. This must be done before `global_context->shutdown()` because /// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart. @@ -1358,6 +1365,16 @@ if (ThreadFuzzer::instance().isEffective()) ErrorCodes::NO_ELEMENTS_IN_CONFIG); } + if (servers.empty()) + throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", + ErrorCodes::NO_ELEMENTS_IN_CONFIG); + +#if USE_SSL + CertificateReloader::instance().tryLoad(config()); +#endif + + /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. + async_metrics.start(); { diff --git a/programs/server/config.xml b/programs/server/config.xml index d88773a3fc40..ce0c54f67303 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -244,7 +244,7 @@ openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096 Only file format with BEGIN DH PARAMETERS is supported. --> - /etc/clickhouse-server/dhparam.pem + none true true diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 19b069546ee2..ca8609f39845 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -1,8 +1,8 @@ #include #include +#include #include #include -#include #include namespace DB @@ -101,7 +101,7 @@ namespace AccessRightsElements getResult() const { ProtoElements sorted = *this; - boost::range::sort(sorted); + ::sort(sorted.begin(), sorted.end()); AccessRightsElements res; res.reserve(sorted.size()); diff --git a/src/Access/CachedAccessChecking.cpp b/src/Access/CachedAccessChecking.cpp new file mode 100644 index 000000000000..aa8ef6073d3c --- /dev/null +++ b/src/Access/CachedAccessChecking.cpp @@ -0,0 +1,44 @@ +#include +#include + + +namespace DB +{ +CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_) + : CachedAccessChecking(access_, AccessRightsElement{access_flags_}) +{ +} + +CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_) + : access(access_), element(element_) +{ +} + +CachedAccessChecking::~CachedAccessChecking() = default; + +bool CachedAccessChecking::checkAccess(bool throw_if_denied) +{ + if (checked) + return result; + if (throw_if_denied) + { + try + { + access->checkAccess(element); + result = true; + } + catch (...) + { + result = false; + throw; + } + } + else + { + result = access->isGranted(element); + } + checked = true; + return result; +} + +} diff --git a/src/Access/CachedAccessChecking.h b/src/Access/CachedAccessChecking.h new file mode 100644 index 000000000000..e87c28dd823c --- /dev/null +++ b/src/Access/CachedAccessChecking.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ContextAccess; + +/// Checks if the current user has a specified access type granted, +/// and if it's checked another time later, it will just return the first result. +class CachedAccessChecking +{ +public: + CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_); + CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_); + ~CachedAccessChecking(); + + bool checkAccess(bool throw_if_denied = true); + +private: + const std::shared_ptr access; + const AccessRightsElement element; + bool checked = false; + bool result = false; +}; + +} diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 4472e975878c..55b8359d3853 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -145,14 +145,14 @@ enum class AccessType M(SYSTEM_RELOAD_EMBEDDED_DICTIONARIES, "RELOAD EMBEDDED DICTIONARIES", GLOBAL, SYSTEM_RELOAD) /* implicitly enabled by the grant SYSTEM_RELOAD_DICTIONARY ON *.* */\ M(SYSTEM_RELOAD, "", GROUP, SYSTEM) \ M(SYSTEM_RESTART_DISK, "SYSTEM RESTART DISK", GLOBAL, SYSTEM) \ - M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP_MERGES, START MERGES", TABLE, SYSTEM) \ + M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP MERGES, START MERGES", TABLE, SYSTEM) \ M(SYSTEM_TTL_MERGES, "SYSTEM STOP TTL MERGES, SYSTEM START TTL MERGES, STOP TTL MERGES, START TTL MERGES", TABLE, SYSTEM) \ M(SYSTEM_FETCHES, "SYSTEM STOP FETCHES, SYSTEM START FETCHES, STOP FETCHES, START FETCHES", TABLE, SYSTEM) \ M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \ M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \ - M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP_REPLICATED_SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \ + M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \ - M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP_REPLICATION_QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \ + M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP REPLICATION QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \ M(SYSTEM_DROP_REPLICA, "DROP REPLICA", TABLE, SYSTEM) \ M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \ M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \ @@ -166,6 +166,7 @@ enum class AccessType M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\ \ M(addressToLine, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLine() */\ + M(addressToLineWithInlines, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLineWithInlines() */\ M(addressToSymbol, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToSymbol() */\ M(demangle, "", GLOBAL, INTROSPECTION) /* allows to execute function demangle() */\ M(INTROSPECTION, "INTROSPECTION FUNCTIONS", GROUP, ALL) /* allows to execute functions addressToLine(), addressToSymbol(), demangle()*/\ diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index d4100c4e520d..3e1c289b2075 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -270,12 +270,21 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur Poco::Util::AbstractConfiguration::Keys ldap_server_names; config.keys("ldap_servers", ldap_server_names); - for (const auto & ldap_server_name : ldap_server_names) + ldap_client_params_blueprint.clear(); + for (auto ldap_server_name : ldap_server_names) { try { - ldap_client_params_blueprint.erase(ldap_server_name); - parseLDAPServer(ldap_client_params_blueprint.emplace(ldap_server_name, LDAPClient::Params{}).first->second, config, ldap_server_name); + const auto bracket_pos = ldap_server_name.find('['); + if (bracket_pos != std::string::npos) + ldap_server_name.resize(bracket_pos); + + if (ldap_client_params_blueprint.count(ldap_server_name) > 0) + throw Exception("Multiple LDAP servers with the same name are not allowed", ErrorCodes::BAD_ARGUMENTS); + + LDAPClient::Params ldap_client_params_tmp; + parseLDAPServer(ldap_client_params_tmp, config, ldap_server_name); + ldap_client_params_blueprint.emplace(std::move(ldap_server_name), std::move(ldap_client_params_tmp)); } catch (...) { @@ -283,10 +292,15 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur } } + kerberos_params.reset(); try { if (kerberos_keys_count > 0) - parseKerberosParams(kerberos_params.emplace(), config); + { + GSSAcceptorContext::Params kerberos_params_tmp; + parseKerberosParams(kerberos_params_tmp, config); + kerberos_params = std::move(kerberos_params_tmp); + } } catch (...) { diff --git a/src/Access/RolesOrUsersSet.cpp b/src/Access/RolesOrUsersSet.cpp index 810198eeb987..2c302fde229c 100644 --- a/src/Access/RolesOrUsersSet.cpp +++ b/src/Access/RolesOrUsersSet.cpp @@ -7,8 +7,8 @@ #include #include #include -#include #include +#include namespace DB @@ -132,7 +132,7 @@ std::shared_ptr RolesOrUsersSet::toAST() const ast->names.reserve(ids.size()); for (const UUID & id : ids) ast->names.emplace_back(::DB::toString(id)); - boost::range::sort(ast->names); + ::sort(ast->names.begin(), ast->names.end()); } if (!except_ids.empty()) @@ -140,7 +140,7 @@ std::shared_ptr RolesOrUsersSet::toAST() const ast->except_names.reserve(except_ids.size()); for (const UUID & except_id : except_ids) ast->except_names.emplace_back(::DB::toString(except_id)); - boost::range::sort(ast->except_names); + ::sort(ast->except_names.begin(), ast->except_names.end()); } return ast; @@ -161,7 +161,7 @@ std::shared_ptr RolesOrUsersSet::toASTWithNames(const Access if (name) ast->names.emplace_back(std::move(*name)); } - boost::range::sort(ast->names); + ::sort(ast->names.begin(), ast->names.end()); } if (!except_ids.empty()) @@ -173,7 +173,7 @@ std::shared_ptr RolesOrUsersSet::toASTWithNames(const Access if (except_name) ast->except_names.emplace_back(std::move(*except_name)); } - boost::range::sort(ast->except_names); + ::sort(ast->except_names.begin(), ast->except_names.end()); } return ast; diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index eac761c1a827..347f4607dbf6 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -70,11 +70,11 @@ static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types) AggregateFunctionPtr AggregateFunctionFactory::get( const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const { - auto type_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types); + auto types_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types); /// If one of the types is Nullable, we apply aggregate function combinator "Null". - if (std::any_of(type_without_low_cardinality.begin(), type_without_low_cardinality.end(), + if (std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), [](const auto & type) { return type->isNullable(); })) { AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null"); @@ -82,10 +82,10 @@ AggregateFunctionPtr AggregateFunctionFactory::get( throw Exception("Logical error: cannot find aggregate function combinator to apply a function to Nullable arguments.", ErrorCodes::LOGICAL_ERROR); - DataTypes nested_types = combinator->transformArguments(type_without_low_cardinality); + DataTypes nested_types = combinator->transformArguments(types_without_low_cardinality); Array nested_parameters = combinator->transformParameters(parameters); - bool has_null_arguments = std::any_of(type_without_low_cardinality.begin(), type_without_low_cardinality.end(), + bool has_null_arguments = std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), [](const auto & type) { return type->onlyNull(); }); AggregateFunctionPtr nested_function = getImpl( @@ -97,13 +97,10 @@ AggregateFunctionPtr AggregateFunctionFactory::get( // that are rewritten to AggregateFunctionNothing, in this case // nested_function is nullptr. if (!nested_function || !nested_function->isOnlyWindowFunction()) - { - return combinator->transformAggregateFunction(nested_function, - out_properties, type_without_low_cardinality, parameters); - } + return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters); } - auto with_original_arguments = getImpl(name, type_without_low_cardinality, parameters, out_properties, false); + auto with_original_arguments = getImpl(name, types_without_low_cardinality, parameters, out_properties, false); if (!with_original_arguments) throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR); diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index eee91904b9bc..532e1ce50b33 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -557,7 +558,7 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable } if (limit < answer.size()) { - std::nth_element(answer.begin(), answer.begin() + limit, answer.end()); + ::nth_element(answer.begin(), answer.begin() + limit, answer.end()); answer.resize(limit); } diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index b858c6b628c4..047ddf1ddeb4 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -72,7 +74,7 @@ class AggregateFunctionHistogramData private: void sort() { - std::sort(points, points + size, + ::sort(points, points + size, [](const WeightedValue & first, const WeightedValue & second) { return first.mean < second.mean; diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index d752900c0187..ce71e76de435 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -40,28 +40,6 @@ class AggregateFunctionCombinatorIf final : public IAggregateFunctionCombinator } }; -/** Given an array of flags, checks if it's all zeros - * When the buffer is all zeros, this is slightly faster than doing a memcmp since doesn't require allocating memory - * When the buffer has values, this is much faster since it avoids visiting all memory (and the allocation and function calls) - */ -static bool ALWAYS_INLINE inline is_all_zeros(const UInt8 * flags, size_t size) -{ - size_t unroll_size = size - size % 8; - size_t i = 0; - while (i < unroll_size) - { - UInt64 v = *reinterpret_cast(&flags[i]); - if (v) - return false; - i += 8; - } - - for (; i < size; ++i) - if (flags[i]) - return false; - - return true; -} /** There are two cases: for single argument and variadic. * Code for single argument is much more efficient. @@ -73,6 +51,7 @@ class AggregateFunctionIfNullUnary final { private: size_t num_arguments; + bool filter_is_nullable = false; /// The name of the nested function, including combinators (i.e. *If) /// @@ -92,8 +71,26 @@ class AggregateFunctionIfNullUnary final using Base = AggregateFunctionNullBase>; -public: + inline bool singleFilter(const IColumn ** columns, size_t row_num) const + { + const IColumn * filter_column = columns[num_arguments - 1]; + + if (filter_is_nullable) + { + const ColumnNullable * nullable_column = assert_cast(filter_column); + filter_column = nullable_column->getNestedColumnPtr().get(); + const UInt8 * filter_null_map = nullable_column->getNullMapData().data(); + + return assert_cast(*filter_column).getData()[row_num] && !filter_null_map[row_num]; + } + else + { + return assert_cast(*filter_column).getData()[row_num]; + } + } + +public: String getName() const override { return name; @@ -105,24 +102,17 @@ class AggregateFunctionIfNullUnary final , name(name_) { if (num_arguments == 0) - throw Exception("Aggregate function " + getName() + " require at least one argument", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - } + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Aggregate function {} require at least one argument", getName()); - static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) - { - const IColumn * filter_column = columns[num_arguments - 1]; - if (const ColumnNullable * nullable_column = typeid_cast(filter_column)) - filter_column = nullable_column->getNestedColumnPtr().get(); - - return assert_cast(*filter_column).getData()[row_num]; + filter_is_nullable = arguments[num_arguments - 1]->isNullable(); } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { const ColumnNullable * column = assert_cast(columns[0]); const IColumn * nested_column = &column->getNestedColumn(); - if (!column->isNullAt(row_num) && singleFilter(columns, row_num, num_arguments)) + if (!column->isNullAt(row_num) && singleFilter(columns, row_num)) { this->setFlag(place); this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena); @@ -136,29 +126,41 @@ class AggregateFunctionIfNullUnary final const IColumn * columns_param[] = {&column->getNestedColumn()}; const IColumn * filter_column = columns[num_arguments - 1]; - if (const ColumnNullable * nullable_column = typeid_cast(filter_column)) - filter_column = nullable_column->getNestedColumnPtr().get(); - if constexpr (result_is_nullable) + + const UInt8 * filter_values = nullptr; + const UInt8 * filter_null_map = nullptr; + + if (filter_is_nullable) { - /// We need to check if there is work to do as otherwise setting the flag would be a mistake, - /// it would mean that the return value would be the default value of the nested type instead of NULL - if (is_all_zeros(assert_cast(filter_column)->getData().data(), batch_size)) - return; + const ColumnNullable * nullable_column = assert_cast(filter_column); + filter_column = nullable_column->getNestedColumnPtr().get(); + filter_null_map = nullable_column->getNullMapData().data(); } + filter_values = assert_cast(filter_column)->getData().data(); + /// Combine the 2 flag arrays so we can call a simplified version (one check vs 2) /// Note that now the null map will contain 0 if not null and not filtered, or 1 for null or filtered (or both) - const auto * filter_flags = assert_cast(filter_column)->getData().data(); + auto final_nulls = std::make_unique(batch_size); - for (size_t i = 0; i < batch_size; ++i) - final_nulls[i] = (!!null_map[i]) | (!filter_flags[i]); - this->nested_function->addBatchSinglePlaceNotNull( - batch_size, this->nestedPlace(place), columns_param, final_nulls.get(), arena, -1); + if (filter_null_map) + for (size_t i = 0; i < batch_size; ++i) + final_nulls[i] = (!!null_map[i]) | (!filter_values[i]) | (!!filter_null_map[i]); + else + for (size_t i = 0; i < batch_size; ++i) + final_nulls[i] = (!!null_map[i]) | (!filter_values[i]); if constexpr (result_is_nullable) - if (!memoryIsByte(null_map, batch_size, 1)) + { + if (!memoryIsByte(final_nulls.get(), batch_size, 1)) this->setFlag(place); + else + return; /// No work to do. + } + + this->nested_function->addBatchSinglePlaceNotNull( + batch_size, this->nestedPlace(place), columns_param, final_nulls.get(), arena, -1); } #if USE_EMBEDDED_COMPILER @@ -367,10 +369,14 @@ AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter( const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params, const AggregateFunctionProperties & properties) const { - bool return_type_is_nullable = !properties.returns_default_when_only_null && getReturnType()->canBeInsideNullable(); - size_t nullable_size = std::count_if(arguments.begin(), arguments.end(), [](const auto & element) { return element->isNullable(); }); - return_type_is_nullable &= nullable_size != 1 || !arguments.back()->isNullable(); /// If only condition is nullable. we should non-nullable type. - bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; + assert(!arguments.empty()); + + /// Nullability of the last argument (condition) does not affect the nullability of the result (NULL is processed as false). + /// For other arguments it is as usual (at least one is NULL then the result is NULL if possible). + bool return_type_is_nullable = !properties.returns_default_when_only_null && getReturnType()->canBeInsideNullable() + && std::any_of(arguments.begin(), arguments.end() - 1, [](const auto & element) { return element->isNullable(); }); + + bool need_to_serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; if (arguments.size() <= 2 && arguments.front()->isNullable()) { @@ -380,7 +386,7 @@ AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter( } else { - if (serialize_flag) + if (need_to_serialize_flag) return std::make_shared>(nested_function->getName(), nested_func, arguments, params); else return std::make_shared>(nested_function->getName(), nested_func, arguments, params); @@ -394,7 +400,7 @@ AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter( } else { - if (serialize_flag) + if (need_to_serialize_flag) return std::make_shared>(nested_function, arguments, params); else return std::make_shared>(nested_function, arguments, params); diff --git a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h index 443d76f47cb3..92f527f7c43b 100644 --- a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h +++ b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -7,6 +9,7 @@ #include #include #include +#include #include #include @@ -14,8 +17,6 @@ #include #include -#include - namespace DB { @@ -67,7 +68,7 @@ struct AggregateFunctionIntervalLengthSumData /// either sort whole container or do so partially merging ranges afterwards if (!sorted && !other.sorted) { - std::sort(std::begin(segments), std::end(segments)); + ::sort(std::begin(segments), std::end(segments)); } else { @@ -76,10 +77,10 @@ struct AggregateFunctionIntervalLengthSumData const auto end = std::end(segments); if (!sorted) - std::sort(begin, middle); + ::sort(begin, middle); if (!other.sorted) - std::sort(middle, end); + ::sort(middle, end); std::inplace_merge(begin, middle, end); } @@ -89,11 +90,11 @@ struct AggregateFunctionIntervalLengthSumData void sort() { - if (!sorted) - { - std::sort(std::begin(segments), std::end(segments)); - sorted = true; - } + if (sorted) + return; + + ::sort(std::begin(segments), std::end(segments)); + sorted = true; } void serialize(WriteBuffer & buf) const diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h index 550fd138452b..79c189446205 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.h +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -226,7 +227,7 @@ class AggregateFunctionMap final { keys.push_back(it.first); } - std::sort(keys.begin(), keys.end()); + ::sort(keys.begin(), keys.end()); // insert using sorted keys to result column for (auto & key : keys) diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h index 8755c9db5dbd..18af4f0a2204 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -142,7 +143,7 @@ class AggregateFunctionIntersectionsMax final auto & array = this->data(place).value; /// Sort by position; for equal position, sort by weight to get deterministic result. - std::sort(array.begin(), array.end()); + ::sort(array.begin(), array.end()); for (const auto & point_weight : array) { diff --git a/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp b/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp new file mode 100644 index 000000000000..edc4361bce36 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionMeanZTest.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include + + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +namespace DB +{ +struct Settings; + +namespace +{ + +struct MeanZTestData : public ZTestMoments +{ + static constexpr auto name = "meanZTest"; + + std::pair getResult(Float64 pop_var_x, Float64 pop_var_y) const + { + Float64 mean_x = getMeanX(); + Float64 mean_y = getMeanY(); + + /// z = \frac{\bar{X_{1}} - \bar{X_{2}}}{\sqrt{\frac{\sigma_{1}^{2}}{n_{1}} + \frac{\sigma_{2}^{2}}{n_{2}}}} + Float64 zstat = (mean_x - mean_y) / getStandardError(pop_var_x, pop_var_y); + if (!std::isfinite(zstat)) + { + return {std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()}; + } + + Float64 pvalue = 2.0 * boost::math::cdf(boost::math::normal(0.0, 1.0), -1.0 * std::abs(zstat)); + + return {zstat, pvalue}; + } +}; + +AggregateFunctionPtr createAggregateFunctionMeanZTest( + const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) +{ + assertBinary(name, argument_types); + + if (parameters.size() != 3) + throw Exception("Aggregate function " + name + " requires three parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!isNumber(argument_types[0]) || !isNumber(argument_types[1])) + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS); + + return std::make_shared>(argument_types, parameters); +} + +} + +void registerAggregateFunctionMeanZTest(AggregateFunctionFactory & factory) +{ + factory.registerFunction("meanZTest", createAggregateFunctionMeanZTest); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionMeanZTest.h b/src/AggregateFunctions/AggregateFunctionMeanZTest.h new file mode 100644 index 000000000000..e4be2503d87f --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionMeanZTest.h @@ -0,0 +1,139 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +struct Settings; + +class ReadBuffer; +class WriteBuffer; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + +/// Returns tuple of (z-statistic, p-value, confidence-interval-low, confidence-interval-high) +template +class AggregateFunctionMeanZTest : + public IAggregateFunctionDataHelper> +{ +private: + Float64 pop_var_x; + Float64 pop_var_y; + Float64 confidence_level; + +public: + AggregateFunctionMeanZTest(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper>({arguments}, params) + { + pop_var_x = params.at(0).safeGet(); + pop_var_y = params.at(1).safeGet(); + confidence_level = params.at(2).safeGet(); + + if (!std::isfinite(pop_var_x) || !std::isfinite(pop_var_y) || !std::isfinite(confidence_level)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} requires finite parameter values.", Data::name); + } + + if (pop_var_x < 0.0 || pop_var_y < 0.0) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Population variance parameters must be larger than or equal to zero in aggregate function {}.", Data::name); + } + + if (confidence_level <= 0.0 || confidence_level >= 1.0) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Confidence level parameter must be between 0 and 1 in aggregate function {}.", Data::name); + } + } + + String getName() const override + { + return Data::name; + } + + DataTypePtr getReturnType() const override + { + DataTypes types + { + std::make_shared>(), + std::make_shared>(), + std::make_shared>(), + std::make_shared>(), + }; + + Strings names + { + "z_statistic", + "p_value", + "confidence_interval_low", + "confidence_interval_high" + }; + + return std::make_shared( + std::move(types), + std::move(names) + ); + } + + bool allocatesMemoryInArena() const override { return false; } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override + { + Float64 value = columns[0]->getFloat64(row_num); + UInt8 is_second = columns[1]->getUInt(row_num); + + if (is_second) + this->data(place).addY(value); + else + this->data(place).addX(value); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + this->data(place).write(buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override + { + this->data(place).read(buf); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + auto [z_stat, p_value] = this->data(place).getResult(pop_var_x, pop_var_y); + auto [ci_low, ci_high] = this->data(place).getConfidenceIntervals(pop_var_x, pop_var_y, confidence_level); + + /// Because p-value is a probability. + p_value = std::min(1.0, std::max(0.0, p_value)); + + auto & column_tuple = assert_cast(to); + auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); + auto & column_value = assert_cast &>(column_tuple.getColumn(1)); + auto & column_ci_low = assert_cast &>(column_tuple.getColumn(2)); + auto & column_ci_high = assert_cast &>(column_tuple.getColumn(3)); + + column_stat.getData().push_back(z_stat); + column_value.getData().push_back(p_value); + column_ci_low.getData().push_back(ci_low); + column_ci_high.getData().push_back(ci_high); + } +}; + +}; diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 046348397518..248454bef02e 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -74,11 +75,11 @@ struct AggregateFunctionSequenceMatchData final void sort() { - if (!sorted) - { - std::sort(std::begin(events_list), std::end(events_list), Comparator{}); - sorted = true; - } + if (sorted) + return; + + ::sort(std::begin(events_list), std::end(events_list), Comparator{}); + sorted = true; } void serialize(WriteBuffer & buf) const diff --git a/src/AggregateFunctions/AggregateFunctionSimpleState.h b/src/AggregateFunctions/AggregateFunctionSimpleState.h index d32d9a4f8060..d63d8b71b8ce 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleState.h +++ b/src/AggregateFunctions/AggregateFunctionSimpleState.h @@ -17,15 +17,11 @@ class AggregateFunctionSimpleState final : public IAggregateFunctionHelper(arguments_, params_) , nested_func(nested_) - , arguments(arguments_) - , params(params_) { } @@ -35,18 +31,19 @@ class AggregateFunctionSimpleState final : public IAggregateFunctionHelpergetReturnType()->getName()); - + // Need to make a clone to avoid recursive reference. + auto storage_type_out = DataTypeFactory::instance().get(nested_func->getReturnType()->getName()); // Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type. AggregateFunctionProperties properties; auto function - = AggregateFunctionFactory::instance().get(nested_func->getName(), {storage_type}, nested_func->getParameters(), properties); + = AggregateFunctionFactory::instance().get(nested_func->getName(), {storage_type_out}, nested_func->getParameters(), properties); + // Need to make a clone because it'll be customized. + auto storage_type_arg = DataTypeFactory::instance().get(nested_func->getReturnType()->getName()); DataTypeCustomNamePtr custom_name - = std::make_unique(function, DataTypes{nested_func->getReturnType()}, params); - storage_type->setCustomization(std::make_unique(std::move(custom_name), nullptr)); - return storage_type; + = std::make_unique(function, DataTypes{nested_func->getReturnType()}, parameters); + storage_type_arg->setCustomization(std::make_unique(std::move(custom_name), nullptr)); + return storage_type_arg; } bool isVersioned() const override diff --git a/src/AggregateFunctions/AggregateFunctionState.h b/src/AggregateFunctions/AggregateFunctionState.h index 98fcfa83d67c..f4f55835c935 100644 --- a/src/AggregateFunctions/AggregateFunctionState.h +++ b/src/AggregateFunctions/AggregateFunctionState.h @@ -20,13 +20,12 @@ class AggregateFunctionState final : public IAggregateFunctionHelper(arguments_, params_) - , nested_func(nested_), arguments(arguments_), params(params_) {} + , nested_func(nested_) + {} String getName() const override { diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp index c6e7029d2835..83a91ef06fc3 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp @@ -7,6 +7,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -27,14 +28,24 @@ struct StudentTTestData : public TTestMoments { static constexpr auto name = "studentTTest"; - std::pair getResult() const + bool hasEnoughObservations() const { - Float64 mean_x = x1 / nx; - Float64 mean_y = y1 / ny; + return nx > 0 && ny > 0 && nx + ny > 2; + } + + Float64 getDegreesOfFreedom() const + { + return nx + ny - 2; + } + + std::tuple getResult() const + { + Float64 mean_x = getMeanX(); + Float64 mean_y = getMeanY(); /// To estimate the variance we first estimate two means. /// That's why the number of degrees of freedom is the total number of values of both samples minus 2. - Float64 degrees_of_freedom = nx + ny - 2; + Float64 degrees_of_freedom = getDegreesOfFreedom(); /// Calculate s^2 /// The original formulae looks like @@ -59,12 +70,14 @@ AggregateFunctionPtr createAggregateFunctionStudentTTest( const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { assertBinary(name, argument_types); - assertNoParameters(name, parameters); + + if (parameters.size() > 1) + throw Exception("Aggregate function " + name + " requires zero or one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!isNumber(argument_types[0]) || !isNumber(argument_types[1])) throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS); - return std::make_shared>(argument_types); + return std::make_shared>(argument_types, parameters); } } diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 7e661a92c5b5..295258cd8cf8 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -226,7 +226,7 @@ class AggregateFunctionMapBase : public IAggregateFunctionDataHelper< { // FIXME why is storing NearestFieldType not enough, and we // have to check for decimals again here? - UInt32 scale = static_cast &>(key_column).getData().getScale(); + UInt32 scale = static_cast &>(key_column).getScale(); it = merged_maps.find(DecimalField(key, scale)); } else @@ -251,7 +251,7 @@ class AggregateFunctionMapBase : public IAggregateFunctionDataHelper< if constexpr (is_decimal) { - UInt32 scale = static_cast &>(key_column).getData().getScale(); + UInt32 scale = static_cast &>(key_column).getScale(); merged_maps.emplace(DecimalField(key, scale), std::move(new_values)); } else diff --git a/src/AggregateFunctions/AggregateFunctionTTest.h b/src/AggregateFunctions/AggregateFunctionTTest.h index a91ce16c3ffc..40cf00238785 100644 --- a/src/AggregateFunctions/AggregateFunctionTTest.h +++ b/src/AggregateFunctions/AggregateFunctionTTest.h @@ -9,6 +9,7 @@ #include #include #include +#include /// This function is used in implementations of different T-Tests. @@ -28,6 +29,11 @@ struct Settings; class ReadBuffer; class WriteBuffer; +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + /** * If you have a cumulative distribution function F, then calculating the p-value for given statistic T is simply 1−F(T) * In our case p-value is two-sided, so we multiply it by 2. @@ -79,10 +85,29 @@ template class AggregateFunctionTTest : public IAggregateFunctionDataHelper> { +private: + bool need_confidence_interval = false; + Float64 confidence_level; public: - AggregateFunctionTTest(const DataTypes & arguments) - : IAggregateFunctionDataHelper>({arguments}, {}) + AggregateFunctionTTest(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper>({arguments}, params) { + if (params.size() > 0) + { + need_confidence_interval = true; + confidence_level = params.at(0).safeGet(); + + if (!std::isfinite(confidence_level)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} requires finite parameter values.", Data::name); + } + + if (confidence_level <= 0.0 || confidence_level >= 1.0 || fabs(confidence_level - 0.0) < DBL_EPSILON || fabs(confidence_level - 1.0) < DBL_EPSILON) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Confidence level parameter must be between 0 and 1 in aggregate function {}.", Data::name); + } + + } } String getName() const override @@ -92,22 +117,48 @@ class AggregateFunctionTTest : DataTypePtr getReturnType() const override { - DataTypes types + if (need_confidence_interval) { - std::make_shared>(), - std::make_shared>(), - }; - - Strings names + DataTypes types + { + std::make_shared>(), + std::make_shared>(), + std::make_shared>(), + std::make_shared>(), + }; + + Strings names + { + "t_statistic", + "p_value", + "confidence_interval_low", + "confidence_interval_high", + }; + + return std::make_shared( + std::move(types), + std::move(names) + ); + } + else { - "t_statistic", - "p_value" - }; - - return std::make_shared( - std::move(types), - std::move(names) - ); + DataTypes types + { + std::make_shared>(), + std::make_shared>(), + }; + + Strings names + { + "t_statistic", + "p_value", + }; + + return std::make_shared( + std::move(types), + std::move(names) + ); + } } bool allocatesMemoryInArena() const override { return false; } @@ -140,17 +191,46 @@ class AggregateFunctionTTest : void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - auto [t_statistic, p_value] = this->data(place).getResult(); + auto & data = this->data(place); + auto & column_tuple = assert_cast(to); + + if (!data.hasEnoughObservations() || data.isEssentiallyConstant()) + { + auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); + auto & column_value = assert_cast &>(column_tuple.getColumn(1)); + column_stat.getData().push_back(std::numeric_limits::quiet_NaN()); + column_value.getData().push_back(std::numeric_limits::quiet_NaN()); + + if (need_confidence_interval) + { + auto & column_ci_low = assert_cast &>(column_tuple.getColumn(2)); + auto & column_ci_high = assert_cast &>(column_tuple.getColumn(3)); + column_ci_low.getData().push_back(std::numeric_limits::quiet_NaN()); + column_ci_high.getData().push_back(std::numeric_limits::quiet_NaN()); + } + + return; + } + + auto [t_statistic, p_value] = data.getResult(); /// Because p-value is a probability. p_value = std::min(1.0, std::max(0.0, p_value)); - auto & column_tuple = assert_cast(to); auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); auto & column_value = assert_cast &>(column_tuple.getColumn(1)); column_stat.getData().push_back(t_statistic); column_value.getData().push_back(p_value); + + if (need_confidence_interval) + { + auto [ci_low, ci_high] = data.getConfidenceIntervals(confidence_level, data.getDegreesOfFreedom()); + auto & column_ci_low = assert_cast &>(column_tuple.getColumn(2)); + auto & column_ci_high = assert_cast &>(column_tuple.getColumn(3)); + column_ci_low.getData().push_back(ci_low); + column_ci_high.getData().push_back(ci_high); + } } }; diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index c7c56da79f65..fe5cf83c5093 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -7,6 +7,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -21,34 +22,38 @@ struct WelchTTestData : public TTestMoments { static constexpr auto name = "welchTTest"; - std::pair getResult() const + bool hasEnoughObservations() const { - Float64 mean_x = x1 / nx; - Float64 mean_y = y1 / ny; - - /// s_x^2, s_y^2 + return nx > 1 && ny > 1; + } - /// The original formulae looks like \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2} - /// But we made some mathematical transformations not to store original sequences. - /// Also we dropped sqrt, because later it will be squared later. + Float64 getDegreesOfFreedom() const + { + Float64 mean_x = getMeanX(); + Float64 mean_y = getMeanY(); Float64 sx2 = (x2 + nx * mean_x * mean_x - 2 * mean_x * x1) / (nx - 1); Float64 sy2 = (y2 + ny * mean_y * mean_y - 2 * mean_y * y1) / (ny - 1); - /// t-statistic - Float64 t_stat = (mean_x - mean_y) / sqrt(sx2 / nx + sy2 / ny); - - /// degrees of freedom - Float64 numerator_sqrt = sx2 / nx + sy2 / ny; Float64 numerator = numerator_sqrt * numerator_sqrt; Float64 denominator_x = sx2 * sx2 / (nx * nx * (nx - 1)); Float64 denominator_y = sy2 * sy2 / (ny * ny * (ny - 1)); - Float64 degrees_of_freedom = numerator / (denominator_x + denominator_y); + return numerator / (denominator_x + denominator_y); + } + + std::tuple getResult() const + { + Float64 mean_x = getMeanX(); + Float64 mean_y = getMeanY(); - return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)}; + /// t-statistic + Float64 se = getStandardError(); + Float64 t_stat = (mean_x - mean_y) / se; + + return {t_stat, getPValue(getDegreesOfFreedom(), t_stat * t_stat)}; } }; @@ -56,12 +61,14 @@ AggregateFunctionPtr createAggregateFunctionWelchTTest( const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { assertBinary(name, argument_types); - assertNoParameters(name, parameters); + + if (parameters.size() > 1) + throw Exception("Aggregate function " + name + " requires zero or one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!isNumber(argument_types[0]) || !isNumber(argument_types[1])) throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS); - return std::make_shared>(argument_types); + return std::make_shared>(argument_types, parameters); } } diff --git a/src/AggregateFunctions/CMakeLists.txt b/src/AggregateFunctions/CMakeLists.txt index 64f6eed9a6c6..0cb38fc729a6 100644 --- a/src/AggregateFunctions/CMakeLists.txt +++ b/src/AggregateFunctions/CMakeLists.txt @@ -23,7 +23,7 @@ list(REMOVE_ITEM clickhouse_aggregate_functions_headers ) add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_sources}) -target_link_libraries(clickhouse_aggregate_functions PRIVATE dbms PUBLIC ${CITYHASH_LIBRARIES}) +target_link_libraries(clickhouse_aggregate_functions PRIVATE dbms PUBLIC ch_contrib::cityhash) if(ENABLE_EXAMPLES) add_subdirectory(examples) diff --git a/src/AggregateFunctions/Moments.h b/src/AggregateFunctions/Moments.h index 6f51e76607f2..45a77e9cfdb2 100644 --- a/src/AggregateFunctions/Moments.h +++ b/src/AggregateFunctions/Moments.h @@ -2,6 +2,9 @@ #include #include +#include +#include +#include namespace DB @@ -357,6 +360,120 @@ struct TTestMoments { readPODBinary(*this, buf); } + + Float64 getMeanX() const + { + return x1 / nx; + } + + Float64 getMeanY() const + { + return y1 / ny; + } + + Float64 getStandardError() const + { + /// The original formulae looks like \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2} + /// But we made some mathematical transformations not to store original sequences. + /// Also we dropped sqrt, because later it will be squared later. + Float64 mean_x = getMeanX(); + Float64 mean_y = getMeanY(); + + Float64 sx2 = (x2 + nx * mean_x * mean_x - 2 * mean_x * x1) / (nx - 1); + Float64 sy2 = (y2 + ny * mean_y * mean_y - 2 * mean_y * y1) / (ny - 1); + + return sqrt(sx2 / nx + sy2 / ny); + } + + std::pair getConfidenceIntervals(Float64 confidence_level, Float64 degrees_of_freedom) const + { + Float64 mean_x = getMeanX(); + Float64 mean_y = getMeanY(); + Float64 se = getStandardError(); + + boost::math::students_t dist(degrees_of_freedom); + Float64 t = boost::math::quantile(boost::math::complement(dist, (1.0 - confidence_level) / 2.0)); + Float64 mean_diff = mean_x - mean_y; + Float64 ci_low = mean_diff - t * se; + Float64 ci_high = mean_diff + t * se; + + return {ci_low, ci_high}; + } + + bool isEssentiallyConstant() const + { + return getStandardError() < 10 * DBL_EPSILON * std::max(std::abs(getMeanX()), std::abs(getMeanY())); + } +}; + +template +struct ZTestMoments +{ + T nx{}; + T ny{}; + T x1{}; + T y1{}; + + void addX(T value) + { + ++nx; + x1 += value; + } + + void addY(T value) + { + ++ny; + y1 += value; + } + + void merge(const ZTestMoments & rhs) + { + nx += rhs.nx; + ny += rhs.ny; + x1 += rhs.x1; + y1 += rhs.y1; + } + + void write(WriteBuffer & buf) const + { + writePODBinary(*this, buf); + } + + void read(ReadBuffer & buf) + { + readPODBinary(*this, buf); + } + + Float64 getMeanX() const + { + return x1 / nx; + } + + Float64 getMeanY() const + { + return y1 / ny; + } + + Float64 getStandardError(Float64 pop_var_x, Float64 pop_var_y) const + { + /// \sqrt{\frac{\sigma_{1}^{2}}{n_{1}} + \frac{\sigma_{2}^{2}}{n_{2}}} + return std::sqrt(pop_var_x / nx + pop_var_y / ny); + } + + std::pair getConfidenceIntervals(Float64 pop_var_x, Float64 pop_var_y, Float64 confidence_level) const + { + /// (\bar{x_{1}} - \bar{x_{2}}) \pm zscore \times \sqrt{\frac{\sigma_{1}^{2}}{n_{1}} + \frac{\sigma_{2}^{2}}{n_{2}}} + Float64 mean_x = getMeanX(); + Float64 mean_y = getMeanY(); + + Float64 z = boost::math::quantile(boost::math::complement( + boost::math::normal(0.0f, 1.0f), (1.0f - confidence_level) / 2.0f)); + Float64 se = getStandardError(pop_var_x, pop_var_y); + Float64 ci_low = (mean_x - mean_y) - z * se; + Float64 ci_high = (mean_x - mean_y) + z * se; + + return {ci_low, ci_high}; + } }; } diff --git a/src/AggregateFunctions/QuantileBFloat16Histogram.h b/src/AggregateFunctions/QuantileBFloat16Histogram.h index 2a71522c1fc9..e60945f32adc 100644 --- a/src/AggregateFunctions/QuantileBFloat16Histogram.h +++ b/src/AggregateFunctions/QuantileBFloat16Histogram.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -134,7 +135,7 @@ struct QuantileBFloat16Histogram ++arr_it; } - std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; }); + ::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; }); Float64 threshold = std::ceil(sum_weight * level); Float64 accumulated = 0; @@ -175,7 +176,7 @@ struct QuantileBFloat16Histogram ++arr_it; } - std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; }); + ::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; }); size_t level_index = 0; Float64 accumulated = 0; diff --git a/src/AggregateFunctions/QuantileExact.h b/src/AggregateFunctions/QuantileExact.h index 4dcac2472b9e..9be24689d125 100644 --- a/src/AggregateFunctions/QuantileExact.h +++ b/src/AggregateFunctions/QuantileExact.h @@ -88,7 +88,7 @@ struct QuantileExact : QuantileExactBase> if (!array.empty()) { size_t n = level < 1 ? level * array.size() : (array.size() - 1); - nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE: You can think of the radix-select algorithm. + ::nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE: You can think of the radix-select algorithm. return array[n]; } @@ -107,7 +107,7 @@ struct QuantileExact : QuantileExactBase> auto level = levels[indices[i]]; size_t n = level < 1 ? level * array.size() : (array.size() - 1); - nth_element(array.begin() + prev_n, array.begin() + n, array.end()); + ::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); result[indices[i]] = array[n]; prev_n = n; } @@ -143,7 +143,7 @@ struct QuantileExactExclusive : public QuantileExact else if (n < 1) return static_cast(array[0]); - nth_element(array.begin(), array.begin() + n - 1, array.end()); + ::nth_element(array.begin(), array.begin() + n - 1, array.end()); auto nth_elem = std::min_element(array.begin() + n, array.end()); return static_cast(array[n - 1]) + (h - n) * static_cast(*nth_elem - array[n - 1]); @@ -172,7 +172,7 @@ struct QuantileExactExclusive : public QuantileExact result[indices[i]] = static_cast(array[0]); else { - nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); + ::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); auto nth_elem = std::min_element(array.begin() + n, array.end()); result[indices[i]] = static_cast(array[n - 1]) + (h - n) * static_cast(*nth_elem - array[n - 1]); @@ -207,7 +207,7 @@ struct QuantileExactInclusive : public QuantileExact return static_cast(array[array.size() - 1]); else if (n < 1) return static_cast(array[0]); - nth_element(array.begin(), array.begin() + n - 1, array.end()); + ::nth_element(array.begin(), array.begin() + n - 1, array.end()); auto nth_elem = std::min_element(array.begin() + n, array.end()); return static_cast(array[n - 1]) + (h - n) * static_cast(*nth_elem - array[n - 1]); @@ -234,7 +234,7 @@ struct QuantileExactInclusive : public QuantileExact result[indices[i]] = static_cast(array[0]); else { - nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); + ::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); auto nth_elem = std::min_element(array.begin() + n, array.end()); result[indices[i]] = static_cast(array[n - 1]) + (h - n) * (static_cast(*nth_elem) - array[n - 1]); @@ -262,9 +262,7 @@ struct QuantileExactLow : public QuantileExactBase(floor(s / 2))]; + n = static_cast(floor(s / 2)); } else { - return array[static_cast((floor(s / 2)) - 1)]; + n = static_cast((floor(s / 2)) - 1); } } else @@ -284,9 +282,10 @@ struct QuantileExactLow : public QuantileExactBase::quiet_NaN(); } @@ -295,12 +294,11 @@ struct QuantileExactLow : public QuantileExactBase(floor(s / 2))]; + n = static_cast(floor(s / 2)); } else { - result[indices[i]] = array[static_cast(floor((s / 2) - 1))]; + n = static_cast(floor((s / 2) - 1)); } } else { // else quantile is the nth index of the sorted array obtained by multiplying // level and size of array. Example if level = 0.1 and size of array is 10. - size_t n = level < 1 ? level * array.size() : (array.size() - 1); - result[indices[i]] = array[n]; + n = level < 1 ? level * array.size() : (array.size() - 1); } + ::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); + result[indices[i]] = array[n]; + prev_n = n; } } else @@ -344,23 +344,22 @@ struct QuantileExactHigh : public QuantileExactBase(floor(s / 2))]; + n = static_cast(floor(s / 2)); } else { // else quantile is the nth index of the sorted array obtained by multiplying // level and size of array. Example if level = 0.1 and size of array is 10. - size_t n = level < 1 ? level * array.size() : (array.size() - 1); - return array[n]; + n = level < 1 ? level * array.size() : (array.size() - 1); } + ::nth_element(array.begin(), array.begin() + n, array.end()); + return array[n]; } return std::numeric_limits::quiet_NaN(); } @@ -369,26 +368,27 @@ struct QuantileExactHigh : public QuantileExactBase(floor(s / 2))]; + n = static_cast(floor(s / 2)); } else { // else quantile is the nth index of the sorted array obtained by multiplying // level and size of array. Example if level = 0.1 and size of array is 10. - size_t n = level < 1 ? level * array.size() : (array.size() - 1); - result[indices[i]] = array[n]; + n = level < 1 ? level * array.size() : (array.size() - 1); } + ::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); + result[indices[i]] = array[n]; + prev_n = n; } } else diff --git a/src/AggregateFunctions/QuantileExactWeighted.h b/src/AggregateFunctions/QuantileExactWeighted.h index 97ad15f7e759..69e716f09ae5 100644 --- a/src/AggregateFunctions/QuantileExactWeighted.h +++ b/src/AggregateFunctions/QuantileExactWeighted.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -101,7 +103,7 @@ struct QuantileExactWeighted ++i; } - std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; }); + ::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; }); Float64 threshold = std::ceil(sum_weight * level); Float64 accumulated = 0; @@ -151,7 +153,7 @@ struct QuantileExactWeighted ++i; } - std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; }); + ::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; }); Float64 accumulated = 0; diff --git a/src/AggregateFunctions/QuantileTiming.h b/src/AggregateFunctions/QuantileTiming.h index 36f1da3ee608..c89d1b66f5b8 100644 --- a/src/AggregateFunctions/QuantileTiming.h +++ b/src/AggregateFunctions/QuantileTiming.h @@ -90,7 +90,7 @@ namespace detail /** This function must be called before get-functions. */ void prepare() const { - std::sort(elems, elems + count); + ::sort(elems, elems + count); } UInt16 get(double level) const @@ -183,7 +183,7 @@ namespace detail /// Sorting an array will not be considered a violation of constancy. auto & array = elems; - nth_element(array.begin(), array.begin() + n, array.end()); + ::nth_element(array.begin(), array.begin() + n, array.end()); quantile = array[n]; } @@ -204,7 +204,7 @@ namespace detail ? level * elems.size() : (elems.size() - 1); - nth_element(array.begin() + prev_n, array.begin() + n, array.end()); + ::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); result[level_index] = array[n]; prev_n = n; diff --git a/src/AggregateFunctions/QuantilesCommon.h b/src/AggregateFunctions/QuantilesCommon.h index 8a1645c37811..161e25d0dc20 100644 --- a/src/AggregateFunctions/QuantilesCommon.h +++ b/src/AggregateFunctions/QuantilesCommon.h @@ -2,6 +2,8 @@ #include +#include + #include #include @@ -64,7 +66,7 @@ struct QuantileLevels permutation[i] = i; } - std::sort(permutation.begin(), permutation.end(), [this] (size_t a, size_t b) { return levels[a] < levels[b]; }); + ::sort(permutation.begin(), permutation.end(), [this] (size_t a, size_t b) { return levels[a] < levels[b]; }); } }; diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index d113d659520d..1d7529ee8e15 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include + namespace DB { struct Settings; @@ -249,7 +251,7 @@ class ReservoirSampler if (sorted) return; sorted = true; - std::sort(samples.begin(), samples.end(), Comparer()); + ::sort(samples.begin(), samples.end(), Comparer()); } template diff --git a/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/src/AggregateFunctions/ReservoirSamplerDeterministic.h index ca716b24ce2d..2baeea76996c 100644 --- a/src/AggregateFunctions/ReservoirSamplerDeterministic.h +++ b/src/AggregateFunctions/ReservoirSamplerDeterministic.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -258,7 +259,9 @@ class ReservoirSamplerDeterministic { if (sorted) return; - std::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; }); + + /// In order to provide deterministic result we must sort by value and hash + ::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs < rhs; }); sorted = true; } diff --git a/src/AggregateFunctions/StatCommon.h b/src/AggregateFunctions/StatCommon.h index a8fc7c530728..d670e646f4bd 100644 --- a/src/AggregateFunctions/StatCommon.h +++ b/src/AggregateFunctions/StatCommon.h @@ -1,13 +1,17 @@ #pragma once -#include -#include -#include - #include #include #include +#include + +#include + +#include +#include + + namespace DB { struct Settings; @@ -41,7 +45,7 @@ std::pair computeRanksAndTieCorrection(const Values & value /// Save initial positions, than sort indices according to the values. std::vector indexes(size); std::iota(indexes.begin(), indexes.end(), 0); - std::sort(indexes.begin(), indexes.end(), + ::sort(indexes.begin(), indexes.end(), [&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; }); size_t left = 0; diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index 33f6a5322242..351adac31bb3 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -48,6 +48,7 @@ void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &); void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &); void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &); void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &); +void registerAggregateFunctionMeanZTest(AggregateFunctionFactory &); void registerAggregateFunctionCramersV(AggregateFunctionFactory &); void registerAggregateFunctionTheilsU(AggregateFunctionFactory &); void registerAggregateFunctionContingency(AggregateFunctionFactory &); @@ -123,6 +124,7 @@ void registerAggregateFunctions() registerAggregateFunctionSequenceNextNode(factory); registerAggregateFunctionWelchTTest(factory); registerAggregateFunctionStudentTTest(factory); + registerAggregateFunctionMeanZTest(factory); registerAggregateFunctionNothing(factory); registerAggregateFunctionSingleValueOrNull(factory); registerAggregateFunctionIntervalLengthSum(factory); diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index 5da87cfd6f7b..c26eec440e64 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -632,7 +633,7 @@ BackupEntries makeBackupEntries(const Elements & elements, const ContextPtr & co throw Exception("Backup must not be empty", ErrorCodes::BACKUP_IS_EMPTY); /// Check that all backup entries are unique. - std::sort( + ::sort( backup_entries.begin(), backup_entries.end(), [](const std::pair> & lhs, const std::pair> & rhs) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8d14bd327b17..b99ffd7ee180 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,7 +11,7 @@ if(COMPILER_PIPE) else() set(MAX_COMPILER_MEMORY 1500) endif() -if(MAKE_STATIC_LIBRARIES) +if(USE_STATIC_LIBRARIES) set(MAX_LINKER_MEMORY 3500) else() set(MAX_LINKER_MEMORY 2500) @@ -23,12 +23,13 @@ set (CONFIG_COMMON "${CMAKE_CURRENT_BINARY_DIR}/Common/config.h") include (../cmake/version.cmake) message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}") +include (configure_config.cmake) configure_file (Common/config.h.in ${CONFIG_COMMON}) configure_file (Common/config_version.h.in ${CONFIG_VERSION}) configure_file (Core/config_core.h.in "${CMAKE_CURRENT_BINARY_DIR}/Core/include/config_core.h") if (USE_DEBUG_HELPERS) - get_target_property(MAGIC_ENUM_INCLUDE_DIR magic_enum INTERFACE_INCLUDE_DIRECTORIES) + get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES) # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. # Prefixing "SHELL:" will force it to use the original text. set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${ClickHouse_SOURCE_DIR}/base\" -I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/src/Core/iostream_debug_helpers.h\"") @@ -78,19 +79,20 @@ set(dbms_sources) add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) add_headers_and_sources(clickhouse_common_io IO) +add_headers_and_sources(clickhouse_common_io IO/Archives) add_headers_and_sources(clickhouse_common_io IO/S3) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) add_headers_and_sources(dbms Disks/IO) -if (USE_SQLITE) +if (TARGET ch_contrib::sqlite) add_headers_and_sources(dbms Databases/SQLite) endif() -if(USE_RDKAFKA) +if (TARGET ch_contrib::rdkafka) add_headers_and_sources(dbms Storages/Kafka) endif() -if (USE_AMQPCPP) +if (TARGET ch_contrib::amqp_cpp) add_headers_and_sources(dbms Storages/RabbitMQ) endif() @@ -100,32 +102,34 @@ if (USE_LIBPQXX) add_headers_and_sources(dbms Storages/PostgreSQL) endif() -if (USE_ROCKSDB) +if (TARGET ch_contrib::rocksdb) add_headers_and_sources(dbms Storages/RocksDB) endif() -if (USE_AWS_S3) +if (TARGET ch_contrib::aws_s3) add_headers_and_sources(dbms Common/S3) add_headers_and_sources(dbms Disks/S3) endif() -if (USE_AZURE_BLOB_STORAGE) +if (TARGET ch_contrib::azure_sdk) add_headers_and_sources(dbms Disks/AzureBlobStorage) endif() -if (USE_HDFS) +if (TARGET ch_contrib::hdfs) add_headers_and_sources(dbms Storages/HDFS) add_headers_and_sources(dbms Disks/HDFS) endif() add_headers_and_sources(dbms Storages/Cache) -if (USE_HIVE) +if (TARGET ch_contrib::hivemetastore) add_headers_and_sources(dbms Storages/Hive) endif() -if(USE_FILELOG) +if (OS_LINUX) add_headers_and_sources(dbms Storages/FileLog) -endif() +else() + message(STATUS "StorageFileLog is only supported on Linux") +endif () list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON}) @@ -174,10 +178,15 @@ if (((SANITIZE STREQUAL "thread") OR (SANITIZE STREQUAL "address")) AND COMPILER message(WARNING "Memory tracking is disabled, due to gcc sanitizers") else() add_library (clickhouse_new_delete STATIC Common/new_delete.cpp) - target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io jemalloc) + target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io) + if (TARGET ch_contrib::jemalloc) + target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::jemalloc) + endif() endif() -target_link_libraries (clickhouse_common_io PRIVATE jemalloc) +if (TARGET ch_contrib::jemalloc) + target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc) +endif() add_subdirectory(Access/Common) add_subdirectory(Common/ZooKeeper) @@ -185,7 +194,7 @@ add_subdirectory(Common/Config) set (all_modules) macro(add_object_library name common_path) - if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) + if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) add_headers_and_sources(dbms ${common_path}) else () list (APPEND all_modules ${name}) @@ -240,25 +249,26 @@ add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Alg add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations) -if (USE_NURAFT) +if (TARGET ch_contrib::nuraft) add_object_library(clickhouse_coordination Coordination) endif() set (DBMS_COMMON_LIBRARIES) -# libgcc_s does not provide an implementation of an atomics library. Instead, -# GCC’s libatomic library can be used to supply these when using libgcc_s. -if ((NOT USE_LIBCXX) AND COMPILER_CLANG AND OS_LINUX) - list (APPEND DBMS_COMMON_LIBRARIES atomic) -endif() -if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) +if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) add_library (dbms STATIC ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PRIVATE jemalloc libdivide ${DBMS_COMMON_LIBRARIES}) + target_link_libraries (dbms PRIVATE ch_contrib::libdivide ${DBMS_COMMON_LIBRARIES}) + if (TARGET ch_contrib::jemalloc) + target_link_libraries (dbms PRIVATE ch_contrib::jemalloc) + endif() set (all_modules dbms) else() add_library (dbms SHARED ${dbms_headers} ${dbms_sources}) target_link_libraries (dbms PUBLIC ${all_modules} ${DBMS_COMMON_LIBRARIES}) - target_link_libraries (clickhouse_interpreters PRIVATE jemalloc libdivide) + target_link_libraries (clickhouse_interpreters PRIVATE ch_contrib::libdivide) + if (TARGET ch_contrib::jemalloc) + target_link_libraries (clickhouse_interpreters PRIVATE ch_contrib::jemalloc) + endif() list (APPEND all_modules dbms) # force all split libs to be linked if (OS_DARWIN) @@ -283,9 +293,8 @@ endmacro () dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") -if (USE_EMBEDDED_COMPILER) - dbms_target_link_libraries (PUBLIC ${REQUIRED_LLVM_LIBRARIES}) - dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) +if (TARGET ch_contrib::llvm) + dbms_target_link_libraries (PUBLIC ch_contrib::llvm) endif () # Otherwise it will slow down stack traces printing too much. @@ -302,8 +311,8 @@ target_link_libraries (clickhouse_common_io ${LINK_LIBRARIES_ONLY_ON_X86_64} PUBLIC common - ${DOUBLE_CONVERSION_LIBRARIES} - dragonbox_to_chars + ch_contrib::double_conversion + ch_contrib::dragonbox_to_chars ) # Use X86 AVX2/AVX512 instructions to accelerate filter operations @@ -312,54 +321,48 @@ set_source_files_properties( Columns/ColumnsCommon.cpp Columns/ColumnVector.cpp Columns/ColumnDecimal.cpp + Columns/ColumnString.cpp PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}") -if(RE2_LIBRARY) - target_link_libraries(clickhouse_common_io PUBLIC ${RE2_LIBRARY}) -endif() -if(RE2_ST_LIBRARY) - target_link_libraries(clickhouse_common_io PUBLIC ${RE2_ST_LIBRARY}) -endif() +target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2_st) +target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2) target_link_libraries(clickhouse_common_io PRIVATE ${EXECINFO_LIBRARIES} - cpuid PUBLIC boost::program_options boost::system - ${CITYHASH_LIBRARIES} - ${ZLIB_LIBRARIES} + ch_contrib::cityhash + ch_contrib::zlib pcg_random Poco::Foundation ) +if (TARGET ch_contrib::cpuid) + target_link_libraries(clickhouse_common_io PRIVATE ch_contrib::cpuid) +endif() + +dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables) + # Make dbms depend on roaring instead of clickhouse_common_io so that roaring itself can depend on clickhouse_common_io # That way we we can redirect malloc/free functions avoiding circular dependencies -dbms_target_link_libraries(PUBLIC roaring) - -if (USE_RDKAFKA) - dbms_target_link_libraries(PRIVATE ${CPPKAFKA_LIBRARY} ${RDKAFKA_LIBRARY}) - if(NOT USE_INTERNAL_RDKAFKA_LIBRARY) - dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) - endif() -endif() +dbms_target_link_libraries(PUBLIC ch_contrib::roaring) -if (USE_CYRUS_SASL) - dbms_target_link_libraries(PRIVATE ${CYRUS_SASL_LIBRARY}) +if (TARGET ch_contrib::rdkafka) + dbms_target_link_libraries(PRIVATE ch_contrib::rdkafka ch_contrib::cppkafka) endif() -if (USE_KRB5) - dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${KRB5_INCLUDE_DIR}) - dbms_target_link_libraries(PRIVATE ${KRB5_LIBRARY}) +if (TARGET ch_contrib::sasl2) + dbms_target_link_libraries(PRIVATE ch_contrib::sasl2) endif() -if (USE_NURAFT) - dbms_target_link_libraries(PUBLIC ${NURAFT_LIBRARY}) +if (TARGET ch_contrib::krb5) + dbms_target_link_libraries(PRIVATE ch_contrib::krb5) endif() -if(RE2_INCLUDE_DIR) - target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) +if (TARGET ch_contrib::nuraft) + dbms_target_link_libraries(PUBLIC ch_contrib::nuraft) endif() dbms_target_link_libraries ( @@ -370,16 +373,19 @@ dbms_target_link_libraries ( clickhouse_common_zookeeper clickhouse_dictionaries_embedded clickhouse_parsers - lz4 + ch_contrib::lz4 Poco::JSON Poco::MongoDB string_utils PUBLIC - ${MYSQLXX_LIBRARY} boost::system clickhouse_common_io ) +if (TARGET ch::mysqlxx) + dbms_target_link_libraries (PUBLIC ch::mysqlxx) +endif() + dbms_target_link_libraries ( PUBLIC boost::circular_buffer @@ -388,176 +394,138 @@ dbms_target_link_libraries ( target_include_directories(clickhouse_common_io PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/Core/include") # uses some includes from core dbms_target_include_directories(PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/Core/include") -target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) -dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) -target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${MINISELECT_INCLUDE_DIR}) -dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${MINISELECT_INCLUDE_DIR}) - -if (ZSTD_LIBRARY) - dbms_target_link_libraries(PRIVATE ${ZSTD_LIBRARY}) - target_link_libraries (clickhouse_common_io PUBLIC ${ZSTD_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${ZSTD_INCLUDE_DIR}) - if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR) - dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR}) - endif () -endif() +target_link_libraries(clickhouse_common_io PUBLIC + ch_contrib::miniselect + ch_contrib::pdqsort) +dbms_target_link_libraries(PUBLIC + ch_contrib::miniselect + ch_contrib::pdqsort) -if (XZ_LIBRARY) - target_link_libraries (clickhouse_common_io PUBLIC ${XZ_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${XZ_INCLUDE_DIR}) -endif() +dbms_target_link_libraries(PRIVATE ch_contrib::zstd) +target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::zstd) +target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::xz) -if (USE_ICU) - dbms_target_link_libraries (PRIVATE ${ICU_LIBRARIES}) - dbms_target_include_directories (SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) +if (TARGET ch_contrib::icu) + dbms_target_link_libraries (PRIVATE ch_contrib::icu) endif () -if (USE_CAPNP) - dbms_target_link_libraries (PRIVATE ${CAPNP_LIBRARIES}) +if (TARGET ch_contrib::capnp) + dbms_target_link_libraries (PRIVATE ch_contrib::capnp) endif () -if (USE_PARQUET) - dbms_target_link_libraries(PRIVATE ${PARQUET_LIBRARY}) - if (NOT USE_INTERNAL_PARQUET_LIBRARY) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR}) - if (USE_STATIC_LIBRARIES) - dbms_target_link_libraries(PRIVATE ${ARROW_LIBRARY}) - endif() - endif () +if (TARGET ch_contrib::parquet) + dbms_target_link_libraries (PRIVATE ch_contrib::parquet) endif () -if (USE_AVRO) - dbms_target_link_libraries(PRIVATE ${AVROCPP_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${AVROCPP_INCLUDE_DIR}) +if (TARGET ch_contrib::avrocpp) + dbms_target_link_libraries(PRIVATE ch_contrib::avrocpp) endif () -if (OPENSSL_CRYPTO_LIBRARY) - dbms_target_link_libraries (PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) - target_link_libraries (clickhouse_common_io PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) +if (TARGET OpenSSL::Crypto) + dbms_target_link_libraries (PRIVATE OpenSSL::Crypto) + target_link_libraries (clickhouse_common_io PRIVATE OpenSSL::Crypto) endif () -if (USE_LDAP) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${OPENLDAP_INCLUDE_DIRS}) - dbms_target_link_libraries (PRIVATE ${OPENLDAP_LIBRARIES}) +if (TARGET ch_contrib::ldap) + dbms_target_link_libraries (PRIVATE ch_contrib::ldap ch_contrib::lber) endif () -dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) +dbms_target_link_libraries (PRIVATE ch_contrib::sparsehash) -if (USE_PROTOBUF) - dbms_target_link_libraries (PRIVATE ${Protobuf_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR}) +if (TARGET ch_contrib::protobuf) + dbms_target_link_libraries (PRIVATE ch_contrib::protobuf) endif () -if (USE_GRPC) +if (TARGET clickhouse_grpc_protos) dbms_target_link_libraries (PUBLIC clickhouse_grpc_protos) endif() -if (USE_HDFS) - dbms_target_link_libraries(PRIVATE ${HDFS3_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR}) +if (TARGET ch_contrib::hdfs) + dbms_target_link_libraries(PRIVATE ch_contrib::hdfs) endif() -if (USE_HIVE) - dbms_target_link_libraries(PRIVATE hivemetastore) - dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/hive-metastore) +if (TARGET ch_contrib::hivemetastore) + dbms_target_link_libraries(PRIVATE ch_contrib::hivemetastore) endif() -if (USE_AWS_S3) - target_link_libraries (clickhouse_common_io PUBLIC ${AWS_S3_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_CORE_INCLUDE_DIR}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR}) -endif() - -if (USE_AZURE_BLOB_STORAGE) - target_link_libraries (clickhouse_common_io PUBLIC ${AZURE_BLOB_STORAGE_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AZURE_SDK_INCLUDES}) +if (TARGET ch_contrib::aws_s3) + target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::aws_s3) endif() -if (USE_S2_GEOMETRY) - dbms_target_link_libraries (PUBLIC ${S2_GEOMETRY_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${S2_GEOMETRY_INCLUDE_DIR}) +if (TARGET ch_contrib::azure_sdk) + target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::azure_sdk) endif() -if (USE_BROTLI) - target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) +if (TARGET ch_contrib::s2) + dbms_target_link_libraries (PUBLIC ch_contrib::s2) endif() -if (USE_SNAPPY) - target_link_libraries (clickhouse_common_io PUBLIC ${SNAPPY_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${SNAPPY_INCLUDE_DIR}) +if (TARGET ch_contrib::brotli) + target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::brotli) endif() -if (USE_AMQPCPP) - dbms_target_link_libraries(PUBLIC ${AMQPCPP_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${AMQPCPP_INCLUDE_DIR}) +if (TARGET ch_contrib::snappy) + target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::snappy) endif() -if (USE_SQLITE) - dbms_target_link_libraries(PUBLIC sqlite) +if (TARGET ch_contrib::amqp_cpp) + dbms_target_link_libraries(PUBLIC ch_contrib::amqp_cpp) endif() -if (USE_CASSANDRA) - dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR}) +if (TARGET ch_contrib::sqlite) + dbms_target_link_libraries(PUBLIC ch_contrib::sqlite) endif() -target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR}) - -if (USE_MSGPACK) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) +if (TARGET ch_contrib::msgpack) + target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::msgpack) endif() -target_link_libraries (clickhouse_common_io PUBLIC ${FAST_FLOAT_LIBRARY}) -target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${FAST_FLOAT_INCLUDE_DIR}) - -if (USE_ORC) - dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES}) - dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} "${CMAKE_BINARY_DIR}/contrib/orc/c++/include") -endif () +target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float) -if (USE_ROCKSDB) - dbms_target_link_libraries(PUBLIC ${ROCKSDB_LIBRARY}) - dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ROCKSDB_INCLUDE_DIR}) +if (TARGET ch_contrib::rocksdb) + dbms_target_link_libraries(PUBLIC ch_contrib::rocksdb) endif() -if (USE_LIBPQXX) - dbms_target_link_libraries(PUBLIC ${LIBPQXX_LIBRARY}) - dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${LIBPQXX_INCLUDE_DIR}) +if (TARGET ch_contrib::libpqxx) + dbms_target_link_libraries(PUBLIC ch_contrib::libpqxx) endif() -if (USE_DATASKETCHES) - target_include_directories (clickhouse_aggregate_functions SYSTEM BEFORE PRIVATE ${DATASKETCHES_INCLUDE_DIR}) +if (TARGET ch_contrib::datasketches) + target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches) endif () -target_link_libraries (clickhouse_common_io PRIVATE lz4) +target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) dbms_target_link_libraries(PRIVATE _boost_context) -if (USE_NLP) - dbms_target_link_libraries (PUBLIC stemmer) - dbms_target_link_libraries (PUBLIC wnb) - dbms_target_link_libraries (PUBLIC lemmagen) +if (ENABLE_NLP) + dbms_target_link_libraries (PUBLIC ch_contrib::stemmer) + dbms_target_link_libraries (PUBLIC ch_contrib::wnb) + dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen) + dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data) endif() -if (USE_BZIP2) - target_link_libraries (clickhouse_common_io PRIVATE ${BZIP2_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR}) +if (TARGET ch_contrib::bzip2) + target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2) endif() -if(USE_SIMDJSON) - dbms_target_link_libraries(PRIVATE simdjson) +if (TARGET ch_contrib::minizip) + target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::minizip) +endif () + +if (TARGET ch_contrib::simdjson) + dbms_target_link_libraries(PRIVATE ch_contrib::simdjson) endif() -if(USE_RAPIDJSON) - dbms_target_include_directories(SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR}) +if (TARGET ch_contrib::rapidjson) + dbms_target_link_libraries(PRIVATE ch_contrib::rapidjson) endif() -dbms_target_link_libraries(PUBLIC consistent-hashing) +dbms_target_link_libraries(PUBLIC ch_contrib::consistent_hashing) include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") -if (ENABLE_TESTS AND USE_GTEST) +if (ENABLE_TESTS) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") @@ -579,7 +547,7 @@ if (ENABLE_TESTS AND USE_GTEST) ) target_link_libraries(unit_tests_dbms PRIVATE - ${GTEST_BOTH_LIBRARIES} + ch_contrib::gtest_all clickhouse_functions clickhouse_aggregate_functions clickhouse_parsers @@ -591,3 +559,4 @@ if (ENABLE_TESTS AND USE_GTEST) add_check(unit_tests_dbms) endif () + diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index eb00ee349eea..336935d48cc2 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -13,15 +13,16 @@ #include #include #include -#include "Common/Exception.h" -#include "Common/getNumberOfPhysicalCPUCores.h" -#include "Common/tests/gtest_global_context.h" -#include "Common/typeid_cast.h" -#include "Columns/ColumnString.h" -#include "Columns/ColumnsNumber.h" -#include "Core/Block.h" -#include "Core/Protocol.h" -#include "Formats/FormatFactory.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #include #include @@ -551,6 +553,37 @@ void ClientBase::initLogsOutputStream() } } +void ClientBase::updateSuggest(const ASTCreateQuery & ast_create) +{ + std::vector new_words; + + if (ast_create.database) + new_words.push_back(ast_create.getDatabase()); + new_words.push_back(ast_create.getTable()); + + if (ast_create.columns_list && ast_create.columns_list->columns) + { + for (const auto & elem : ast_create.columns_list->columns->children) + { + if (const auto * column = elem->as()) + new_words.push_back(column->name); + } + } + + suggest->addWords(std::move(new_words)); +} + +bool ClientBase::isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context) +{ + if (!insert_query.data) + return false; + + auto settings = context->getSettings(); + if (insert_query.settings_ast) + settings.applyChanges(insert_query.settings_ast->as()->changes); + + return !settings.async_insert; +} void ClientBase::processTextAsSingleQuery(const String & full_query) { @@ -564,10 +597,24 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) String query_to_execute; - // An INSERT query may have the data that follow query text. Remove the - /// Send part of query without data, because data will be sent separately. - auto * insert = parsed_query->as(); - if (insert && insert->data) + /// Query will be parsed before checking the result because error does not + /// always means a problem, i.e. if table already exists, and it is no a + /// huge problem if suggestion will be added even on error, since this is + /// just suggestion. + if (auto * create = parsed_query->as()) + { + /// Do not update suggest, until suggestion will be ready + /// (this will avoid extra complexity) + if (suggest) + updateSuggest(*create); + } + + /// An INSERT query may have the data that follows query text. + /// Send part of the query without data, because data will be sent separately. + /// But for asynchronous inserts we don't extract data, because it's needed + /// to be done on server side in that case (for coalescing the data from multiple inserts on server side). + const auto * insert = parsed_query->as(); + if (insert && isSyncInsertWithData(*insert, global_context)) query_to_execute = full_query.substr(0, insert->data - full_query.data()); else query_to_execute = full_query; @@ -1194,7 +1241,7 @@ bool ClientBase::receiveEndOfQuery() case Protocol::Server::Progress: onProgress(packet.progress); - return true; + break; default: throw NetException( @@ -1228,7 +1275,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin for (const auto & query_id_format : query_id_formats) { writeString(query_id_format.first, std_out); - writeString(fmt::format(query_id_format.second, fmt::arg("query_id", global_context->getCurrentQueryId())), std_out); + writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out); writeChar('\n', std_out); std_out.next(); } @@ -1270,8 +1317,10 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (insert && insert->select) insert->tryFindInputFunction(input_function); + bool is_async_insert = global_context->getSettings().async_insert && insert && insert->hasInlinedData(); + /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. - if (insert && (!insert->select || input_function) && !insert->watch) + if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert) { if (input_function && insert->format.empty()) throw Exception("FORMAT must be specified for function input()", ErrorCodes::INVALID_USAGE_OF_INPUT); @@ -1401,17 +1450,17 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // row input formats (e.g. TSV) can't tell when the input stops, // unlike VALUES. auto * insert_ast = parsed_query->as(); + const char * query_to_execute_end = this_query_end; + if (insert_ast && insert_ast->data) { this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end); insert_ast->end = this_query_end; - query_to_execute = all_queries_text.substr(this_query_begin - all_queries_text.data(), insert_ast->data - this_query_begin); - } - else - { - query_to_execute = all_queries_text.substr(this_query_begin - all_queries_text.data(), this_query_end - this_query_begin); + query_to_execute_end = isSyncInsertWithData(*insert_ast, global_context) ? insert_ast->data : this_query_end; } + query_to_execute = all_queries_text.substr(this_query_begin - all_queries_text.data(), query_to_execute_end - this_query_begin); + // Try to include the trailing comment with test hints. It is just // a guess for now, because we don't yet know where the query ends // if it is an INSERT query with inline data. We will do it again @@ -1452,6 +1501,25 @@ String ClientBase::prompt() const } +void ClientBase::initQueryIdFormats() +{ + if (!query_id_formats.empty()) + return; + + /// Initialize query_id_formats if any + if (config().has("query_id_formats")) + { + Poco::Util::AbstractConfiguration::Keys keys; + config().keys("query_id_formats", keys); + for (const auto & name : keys) + query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name)); + } + + if (query_id_formats.empty()) + query_id_formats.emplace_back("Query id:", " {query_id}\n"); +} + + void ClientBase::runInteractive() { if (config().has("query_id")) @@ -1459,10 +1527,11 @@ void ClientBase::runInteractive() if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); + initQueryIdFormats(); + /// Initialize DateLUT here to avoid counting time spent here as query execution time. const auto local_tz = DateLUT::instance().getTimeZone(); - std::optional suggest; suggest.emplace(); if (load_suggestions) { @@ -1480,18 +1549,6 @@ void ClientBase::runInteractive() home_path = home_path_cstr; } - /// Initialize query_id_formats if any - if (config().has("query_id_formats")) - { - Poco::Util::AbstractConfiguration::Keys keys; - config().keys("query_id_formats", keys); - for (const auto & name : keys) - query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name)); - } - - if (query_id_formats.empty()) - query_id_formats.emplace_back("Query id:", " {query_id}\n"); - /// Load command history if present. if (config().has("history_file")) history_file = config().getString("history_file"); @@ -1600,6 +1657,9 @@ void ClientBase::runInteractive() void ClientBase::runNonInteractive() { + if (delayed_interactive) + initQueryIdFormats(); + if (!queries_files.empty()) { auto process_multi_query_from_file = [&](const String & file) @@ -1869,7 +1929,7 @@ void ClientBase::init(int argc, char ** argv) /// Output of help message. if (options.count("help") - || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. + || (options.count("host") && options["host"].as>()[0].host == "elp")) /// If user writes -help instead of --help. { printHelpMessage(options_description); exit(0); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 1926df5afea5..e74a6a47d76d 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -136,7 +137,13 @@ class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase> void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector & external_tables_arguments); void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); + void updateSuggest(const ASTCreateQuery & ast_create); + + void initQueryIdFormats(); + protected: + static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context); + bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool is_multiquery = false; bool delayed_interactive = false; @@ -144,6 +151,8 @@ class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase> bool echo_queries = false; /// Print queries before execution in batch mode. bool ignore_error = false; /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode. bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode. + + std::optional suggest; bool load_suggestions = false; std::vector queries_files; /// If not empty, queries will be read from these files @@ -235,6 +244,25 @@ class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase> } profile_events; QueryProcessingStage::Enum query_processing_stage; + + struct HostPort + { + String host; + std::optional port{}; + friend std::istream & operator>>(std::istream & in, HostPort & hostPort) + { + String host_with_port; + in >> host_with_port; + DB::DNSResolver & resolver = DB::DNSResolver::instance(); + std::pair> + host_and_port = resolver.resolveHostOrAddress(host_with_port); + hostPort.host = host_and_port.first.toString(); + hostPort.port = host_and_port.second; + + return in; + } + }; + std::vector hosts_ports{}; }; } diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index 3a5d4f4cf33d..5ad34ba8e81a 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -6,7 +6,6 @@ #include #include - namespace DB { @@ -114,6 +113,7 @@ void highlight(const String & query, std::vector & colors {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::VerticalDelimiter, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::HereDoc, Replxx::Color::CYAN}, @@ -151,6 +151,11 @@ void highlight(const String & query, std::vector & colors for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken()) { + if (token.type == TokenType::Semicolon || token.type == TokenType::VerticalDelimiter) + ReplxxLineReader::setLastIsDelimiter(true); + else if (token.type != TokenType::Whitespace) + ReplxxLineReader::setLastIsDelimiter(false); + size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(token.begin), token.size()); for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) { diff --git a/src/Client/ClientBaseHelpers.h b/src/Client/ClientBaseHelpers.h index 048a4c17f10c..3fb2863082a5 100644 --- a/src/Client/ClientBaseHelpers.h +++ b/src/Client/ClientBaseHelpers.h @@ -1,6 +1,7 @@ #pragma once #include +#include #if USE_REPLXX # include diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 505a6514812b..ad2fc76f090f 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -405,7 +405,7 @@ bool Connection::ping() } catch (const Poco::Exception & e) { - LOG_TRACE(log_wrapper.get(), e.displayText()); + LOG_TRACE(log_wrapper.get(), fmt::runtime(e.displayText())); return false; } diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp index 4d27c9efc695..3385834e3869 100644 --- a/src/Client/ConnectionEstablisher.cpp +++ b/src/Client/ConnectionEstablisher.cpp @@ -58,9 +58,9 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std:: auto table_status_it = status_response.table_states_by_id.find(*table_to_check); if (table_status_it == status_response.table_states_by_id.end()) { - const char * message_pattern = "There is no table {}.{} on server: {}"; - fail_message = fmt::format(message_pattern, backQuote(table_to_check->database), backQuote(table_to_check->table), result.entry->getDescription()); - LOG_WARNING(log, fail_message); + fail_message = fmt::format("There is no table {}.{} on server: {}", + backQuote(table_to_check->database), backQuote(table_to_check->table), result.entry->getDescription()); + LOG_WARNING(log, fmt::runtime(fail_message)); ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable); return; } diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index dbd463583f58..55569f080f63 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -23,15 +23,13 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) +ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config, + std::string connection_host, + int connection_port) : host(connection_host), port(connection_port) { bool is_secure = config.getBool("secure", false); security = is_secure ? Protocol::Secure::Enable : Protocol::Secure::Disable; - host = config.getString("host", "localhost"); - port = config.getInt( - "port", config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); - default_database = config.getString("database", ""); /// changed the default value to "default" to fix the issue when the user in the prompt is blank @@ -61,12 +59,25 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati /// By default compression is disabled if address looks like localhost. compression = config.getBool("compression", !isLocalAddress(DNSResolver::instance().resolveHost(host))) - ? Protocol::Compression::Enable : Protocol::Compression::Disable; + ? Protocol::Compression::Enable : Protocol::Compression::Disable; timeouts = ConnectionTimeouts( - Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); + Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); +} + +ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) + : ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config)) +{ +} + +int ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + bool is_secure = config.getBool("secure", false); + return config.getInt("port", + config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", + is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); } } diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index a169df8390a1..dc509049c832 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -24,6 +24,9 @@ struct ConnectionParameters ConnectionParameters() {} ConnectionParameters(const Poco::Util::AbstractConfiguration & config); + ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, int port); + + static int getPortFromConfig(const Poco::Util::AbstractConfiguration & config); }; } diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 528c38f9b769..8ee4b9e1c1f5 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -74,6 +74,8 @@ void LocalConnection::sendQuery( query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); }); query_context->setFileProgressCallback([this](const FileProgress & value) { this->updateProgress(Progress(value)); }); } + if (!current_database.empty()) + query_context->setCurrentDatabase(current_database); CurrentThread::QueryScope query_scope_holder(query_context); @@ -427,9 +429,9 @@ void LocalConnection::getServerVersion( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); } -void LocalConnection::setDefaultDatabase(const String &) +void LocalConnection::setDefaultDatabase(const String & database) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); + current_database = database; } UInt64 LocalConnection::getServerRevision(const ConnectionTimeouts &) diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 92c2af30c809..b85022cf1837 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -142,5 +142,7 @@ class LocalConnection : public IServerConnection, WithContext /// Last "server" packet. std::optional next_packet_type; + + String current_database; }; } diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index b14af7ba8e90..738c98d2119b 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -29,19 +29,21 @@ namespace ErrorCodes Suggest::Suggest() { /// Keywords may be not up to date with ClickHouse parser. - words = {"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", - "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", - "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", - "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", - "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", - "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", - "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", - "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", - "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", - "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", - "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", - "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", - "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE"}; + addWords({ + "CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", + "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", + "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", + "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", + "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", + "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", + "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", + "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", + "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", + "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", + "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", + "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", + }); } static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion) @@ -124,18 +126,6 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p } /// Note that keyword suggestions are available even if we cannot load data from server. - - std::sort(words.begin(), words.end()); - words_no_case = words; - std::sort(words_no_case.begin(), words_no_case.end(), [](const std::string & str1, const std::string & str2) - { - return std::lexicographical_compare(begin(str1), end(str1), begin(str2), end(str2), [](const char char1, const char char2) - { - return std::tolower(char1) < std::tolower(char2); - }); - }); - - ready = true; }); } @@ -190,8 +180,14 @@ void Suggest::fillWordsFromBlock(const Block & block) const ColumnString & column = typeid_cast(*block.getByPosition(0).column); size_t rows = block.rows(); + + Words new_words; + new_words.reserve(rows); for (size_t i = 0; i < rows; ++i) - words.emplace_back(column.getDataAt(i).toString()); + { + new_words.emplace_back(column.getDataAt(i).toString()); + } + addWords(std::move(new_words)); } template diff --git a/src/Columns/Collator.cpp b/src/Columns/Collator.cpp index 312216054f59..0c2cdcf1baf2 100644 --- a/src/Columns/Collator.cpp +++ b/src/Columns/Collator.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB @@ -74,10 +75,10 @@ AvailableCollationLocales::LocalesVector AvailableCollationLocales::getAvailable result.push_back(name_and_locale.second); auto comparator = [] (const LocaleAndLanguage & f, const LocaleAndLanguage & s) - { - return f.locale_name < s.locale_name; - }; - std::sort(result.begin(), result.end(), comparator); + { + return f.locale_name < s.locale_name; + }; + ::sort(result.begin(), result.end(), comparator); return result; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 929c0153a0a7..c18887b7a135 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -1,5 +1,3 @@ -#include // memcpy - #include #include #include @@ -9,12 +7,7 @@ #include #include #include - -#include -#include - #include - #include #include #include @@ -22,6 +15,9 @@ #include #include #include +#include +#include +#include // memcpy namespace DB @@ -127,18 +123,8 @@ size_t ColumnArray::size() const Field ColumnArray::operator[](size_t n) const { - size_t offset = offsetAt(n); - size_t size = sizeAt(n); - - if (size > max_array_size_as_field) - throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array of size {} is too large to be manipulated as single field, maximum size {}", - size, max_array_size_as_field); - - Array res(size); - - for (size_t i = 0; i < size; ++i) - res[i] = getData()[offset + i]; - + Field res; + get(n, res); return res; } @@ -152,11 +138,12 @@ void ColumnArray::get(size_t n, Field & res) const throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array of size {} is too large to be manipulated as single field, maximum size {}", size, max_array_size_as_field); - res = Array(size); + res = Array(); Array & res_arr = DB::get(res); + res_arr.reserve(size); for (size_t i = 0; i < size; ++i) - getData().get(offset + i, res_arr[i]); + res_arr.push_back(getData()[offset + i]); } @@ -824,9 +811,9 @@ void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; if (limit) - partial_sort(res.begin(), res.begin() + limit, res.end(), less); + ::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - std::sort(res.begin(), res.end(), less); + ::sort(res.begin(), res.end(), less); } void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 99085f0f976c..9898f0138868 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -9,7 +9,6 @@ #include #include - #include #include @@ -32,12 +31,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -template class DecimalPaddedPODArray; -template class DecimalPaddedPODArray; -template class DecimalPaddedPODArray; -template class DecimalPaddedPODArray; -template class DecimalPaddedPODArray; - template int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const { @@ -131,19 +124,6 @@ void ColumnDecimal::updateHashFast(SipHash & hash) const template void ColumnDecimal::getPermutation(bool reverse, size_t limit, int , IColumn::Permutation & res) const { -#if 1 /// TODO: perf test - if (data.size() <= std::numeric_limits::max()) - { - PaddedPODArray tmp_res; - permutation(reverse, limit, tmp_res); - - res.resize(tmp_res.size()); - for (size_t i = 0; i < tmp_res.size(); ++i) - res[i] = tmp_res[i]; - return; - } -#endif - permutation(reverse, limit, res); } @@ -151,7 +131,7 @@ template void ColumnDecimal::updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_ranges) const { auto equals = [this](size_t lhs, size_t rhs) { return data[lhs] == data[rhs]; }; - auto sort = [](auto begin, auto end, auto pred) { std::sort(begin, end, pred); }; + auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); }; auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); }; if (reverse) diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 18d4526e0f35..1712753bda2e 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -1,66 +1,21 @@ #pragma once -#include -#include -#include +#include + +#include +#include #include #include -#include -#include #include -#include - -#include +#include +#include +#include +#include namespace DB { -/// PaddedPODArray extended by Decimal scale -template -class DecimalPaddedPODArray : public PaddedPODArray -{ -public: - using Base = PaddedPODArray; - using Base::operator[]; - - DecimalPaddedPODArray(size_t size, UInt32 scale_) - : Base(size), - scale(scale_) - {} - - DecimalPaddedPODArray(const DecimalPaddedPODArray & other) - : Base(other.begin(), other.end()), - scale(other.scale) - {} - - DecimalPaddedPODArray(DecimalPaddedPODArray && other) - { - this->swap(other); - std::swap(scale, other.scale); - } - - DecimalPaddedPODArray & operator=(DecimalPaddedPODArray && other) - { - this->swap(other); - std::swap(scale, other.scale); - return *this; - } - - UInt32 getScale() const { return scale; } - -private: - UInt32 scale; -}; - -/// Prevent implicit template instantiation of DecimalPaddedPODArray for common decimal types - -extern template class DecimalPaddedPODArray; -extern template class DecimalPaddedPODArray; -extern template class DecimalPaddedPODArray; -extern template class DecimalPaddedPODArray; -extern template class DecimalPaddedPODArray; - /// A ColumnVector for Decimals template class ColumnDecimal final : public COWHelper> @@ -72,22 +27,22 @@ class ColumnDecimal final : public COWHelper; + using Container = PaddedPODArray; private: ColumnDecimal(const size_t n, UInt32 scale_) - : data(n, scale_), + : data(n), scale(scale_) {} ColumnDecimal(const ColumnDecimal & src) - : data(src.data), + : data(src.data.begin(), src.data.end()), scale(src.scale) {} public: const char * getFamilyName() const override { return TypeName.data(); } - TypeIndex getDataType() const override { return TypeId; } + TypeIndex getDataType() const override { return TypeToTypeIndex; } bool isNumeric() const override { return false; } bool canBeInsideNullable() const override { return true; } @@ -195,7 +150,7 @@ class ColumnDecimal final : public COWHelper(i); auto sort_end = res.end(); if (limit && limit < s) sort_end = res.begin() + limit; if (reverse) - partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; }); + ::partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; }); else - partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; }); + ::partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; }); } }; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 0828f8ebd897..81eb9615ff98 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -192,9 +192,9 @@ void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_dir else { if (reverse) - std::sort(res.begin(), res.end(), greater(*this)); + ::sort(res.begin(), res.end(), greater(*this)); else - std::sort(res.begin(), res.end(), less(*this)); + ::sort(res.begin(), res.end(), less(*this)); } } diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index e595525d9e87..ef5d96da0f70 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -4,8 +4,6 @@ #include #include #include -#include -#include #include #include #include @@ -64,8 +62,9 @@ MutableColumnPtr ColumnMap::cloneResized(size_t new_size) const Field ColumnMap::operator[](size_t n) const { - auto array = DB::get((*nested)[n]); - return Map(std::make_move_iterator(array.begin()), std::make_move_iterator(array.end())); + Field res; + get(n, res); + return res; } void ColumnMap::get(size_t n, Field & res) const @@ -74,11 +73,12 @@ void ColumnMap::get(size_t n, Field & res) const size_t offset = offsets[n - 1]; size_t size = offsets[n] - offsets[n - 1]; - res = Map(size); + res = Map(); auto & map = DB::get(res); + map.reserve(size); for (size_t i = 0; i < size; ++i) - getNestedData().get(offset + i, map[i]); + map.push_back(getNestedData()[offset + i]); } bool ColumnMap::isDefaultAt(size_t n) const diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index cd8a3e698d8a..ef972059b00e 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -335,9 +335,9 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparato auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; if (limit) - partial_sort(res.begin(), res.begin() + limit, res.end(), less); + ::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - std::sort(res.begin(), res.end(), less); + ::sort(res.begin(), res.end(), less); } void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index d667b264d552..1b511f17f732 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -9,9 +10,6 @@ #include #include #include -#include -#include -#include #include @@ -101,17 +99,21 @@ MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const Field ColumnTuple::operator[](size_t n) const { - return collections::map(columns, [n] (const auto & column) { return (*column)[n]; }); + Field res; + get(n, res); + return res; } void ColumnTuple::get(size_t n, Field & res) const { const size_t tuple_size = columns.size(); - Tuple tuple(tuple_size); - for (const auto i : collections::range(0, tuple_size)) - columns[i]->get(n, tuple[i]); - res = tuple; + res = Tuple(); + Tuple & res_tuple = DB::get(res); + res_tuple.reserve(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) + res_tuple.push_back((*columns[i])[n]); } bool ColumnTuple::isDefaultAt(size_t n) const @@ -383,9 +385,9 @@ void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperat limit = 0; if (limit) - partial_sort(res.begin(), res.begin() + limit, res.end(), less); + ::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - std::sort(res.begin(), res.end(), less); + ::sort(res.begin(), res.end(), less); } void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const @@ -483,7 +485,7 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const Tuple min_tuple(tuple_size); Tuple max_tuple(tuple_size); - for (const auto i : collections::range(0, tuple_size)) + for (size_t i = 0; i < tuple_size; ++i) columns[i]->getExtremes(min_tuple[i], max_tuple[i]); min = min_tuple; @@ -504,7 +506,7 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const if (tuple_size != rhs_tuple->columns.size()) return false; - for (const auto i : collections::range(0, tuple_size)) + for (size_t i = 0; i < tuple_size; ++i) if (!columns[i]->structureEquals(*rhs_tuple->columns[i])) return false; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 9808acf48c82..eca10049a0b2 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -1,6 +1,5 @@ #include "ColumnVector.h" -#include #include #include #include @@ -118,7 +117,6 @@ struct ColumnVector::equals bool operator()(size_t lhs, size_t rhs) const { return CompareHelper::equals(parent.data[lhs], parent.data[rhs], nan_direction_hint); } }; - namespace { template @@ -158,9 +156,9 @@ void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_directi res[i] = i; if (reverse) - partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint)); + ::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint)); else - partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint)); + ::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint)); } else { @@ -204,16 +202,16 @@ void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_directi res[i] = i; if (reverse) - pdqsort(res.begin(), res.end(), greater(*this, nan_direction_hint)); + ::sort(res.begin(), res.end(), greater(*this, nan_direction_hint)); else - pdqsort(res.begin(), res.end(), less(*this, nan_direction_hint)); + ::sort(res.begin(), res.end(), less(*this, nan_direction_hint)); } } template void ColumnVector::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const { - auto sort = [](auto begin, auto end, auto pred) { pdqsort(begin, end, pred); }; + auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); }; auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); }; if (reverse) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bee7bfa738c7..62a0e3a11904 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -238,7 +238,7 @@ class ColumnVector final : public COWHelper> } const char * getFamilyName() const override { return TypeName.data(); } - TypeIndex getDataType() const override { return TypeId; } + TypeIndex getDataType() const override { return TypeToTypeIndex; } MutableColumnPtr cloneResized(size_t size) const override; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index b1a6e83ee980..303c78506c45 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -528,7 +528,7 @@ class IColumn : public COW template void getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const; - /// Uses std::sort and partial_sort as default algorithms. + /// Uses sort and partial_sort as default algorithms. /// Implements 'less' and 'equals' via comparator. /// If 'less' and 'equals' can be implemented more optimal /// (e.g. with less number of comparisons), you can use diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index 1be52087d117..450684aacea8 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -11,6 +11,7 @@ #include #include + namespace DB { namespace ErrorCodes @@ -203,7 +204,7 @@ void IColumn::updatePermutationImpl( limit, res, equal_ranges, [&cmp](size_t lhs, size_t rhs) { return cmp(lhs, rhs) < 0; }, [&cmp](size_t lhs, size_t rhs) { return cmp(lhs, rhs) == 0; }, - [](auto begin, auto end, auto pred) { std::sort(begin, end, pred); }, + [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); }, [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); }); } diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index 1e7d3591a482..490628a2180f 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -4,6 +4,6 @@ if (ENABLE_EXAMPLES) add_subdirectory(examples) endif() -if (USE_MYSQL) +if (ENABLE_MYSQL) add_subdirectory (mysqlxx) endif () diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index cc41a8b2bb2a..ec7bdd101964 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -27,7 +27,7 @@ target_link_libraries(clickhouse_common_config_no_zookeeper_log string_utils ) -if (USE_YAML_CPP) - target_link_libraries(clickhouse_common_config PRIVATE yaml-cpp) - target_link_libraries(clickhouse_common_config_no_zookeeper_log PRIVATE yaml-cpp) +if (TARGET ch_contrib::yaml_cpp) + target_link_libraries(clickhouse_common_config PRIVATE ch_contrib::yaml_cpp) + target_link_libraries(clickhouse_common_config_no_zookeeper_log PRIVATE ch_contrib::yaml_cpp) endif() diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 41e9a53e50fe..b86e8ed3e409 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -105,7 +106,7 @@ static ElementIdentifier getElementIdentifier(Node * element) std::string value = node->nodeValue(); attrs_kv.push_back(std::make_pair(name, value)); } - std::sort(attrs_kv.begin(), attrs_kv.end()); + ::sort(attrs_kv.begin(), attrs_kv.end()); ElementIdentifier res; res.push_back(element->nodeName()); @@ -443,7 +444,7 @@ ConfigProcessor::Files ConfigProcessor::getConfigMergeFiles(const std::string & } } - std::sort(files.begin(), files.end()); + ::sort(files.begin(), files.end()); return files; } diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 896168253cf8..a741f1f1bfc0 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -80,6 +80,7 @@ M(SyncDrainedConnections, "Number of connections drained synchronously.") \ M(ActiveSyncDrainedConnections, "Number of active connections drained synchronously.") \ M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \ + M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \ namespace CurrentMetrics { diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 36d0c13b1532..1fbd1416d67a 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -202,6 +202,45 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); } +std::pair> DNSResolver::resolveHostOrAddress(const std::string & host_and_port) +{ + Poco::Net::IPAddress ip; + + size_t number_of_colons = std::count(host_and_port.begin(), host_and_port.end(), ':'); + if (number_of_colons > 1) + { + /// IPv6 host + if (host_and_port.starts_with('[')) + { + size_t close_bracket_pos = host_and_port.find(']'); + assert(close_bracket_pos != std::string::npos); + ip = resolveHost(host_and_port.substr(0, close_bracket_pos)); + + if (close_bracket_pos == host_and_port.size() - 1) + return {ip, std::nullopt}; + if (host_and_port[close_bracket_pos + 1] != ':') + throw Exception("Missing delimiter between host and port", ErrorCodes::BAD_ARGUMENTS); + + unsigned int port; + if (!Poco::NumberParser::tryParseUnsigned(host_and_port.substr(close_bracket_pos + 2), port)) + throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS); + if (port > 0xFFFF) + throw Exception("Port must be less 0xFFFF", ErrorCodes::BAD_ARGUMENTS); + return {ip, port}; + } + return {resolveHost(host_and_port), std::nullopt}; + } + else if (number_of_colons == 1) + { + /// IPv4 host with port + Poco::Net::SocketAddress socket = resolveAddress(host_and_port); + return {socket.host(), socket.port()}; + } + + /// IPv4 host + return {resolveHost(host_and_port), std::nullopt}; +} + String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) { if (impl->disable_cache) @@ -272,7 +311,7 @@ bool DNSResolver::updateCacheImpl(UpdateF && update_func, ElemsT && elems, const } if (!lost_elems.empty()) - LOG_INFO(log, log_msg, lost_elems); + LOG_INFO(log, fmt::runtime(log_msg), lost_elems); return updated; } diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 3cefa37fd70b..4ab422ab4ec6 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -34,6 +34,10 @@ class DNSResolver : private boost::noncopyable Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port); + /// Accepts host names like 'example.com'/'example.com:port' or '127.0.0.1'/'127.0.0.1:port' or '::1'/'[::1]:port' + /// and resolves its IP and port, if port is set + std::pair> resolveHostOrAddress(const std::string & host_and_port); + /// Accepts host IP and resolves its host name String reverseResolve(const Poco::Net::IPAddress & address); diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index a22db0803744..e991daf32091 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -609,6 +609,9 @@ M(638, SNAPPY_UNCOMPRESS_FAILED) \ M(639, SNAPPY_COMPRESS_FAILED) \ M(640, NO_HIVEMETASTORE) \ + M(641, CANNOT_APPEND_TO_FILE) \ + M(642, CANNOT_PACK_ARCHIVE) \ + M(643, CANNOT_UNPACK_ARCHIVE) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 3aa06f8c9887..b6bc31a5821b 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -37,7 +37,7 @@ class Exception : public Poco::Exception // Format message with fmt::format, like the logging functions. template Exception(int code, const std::string & fmt, Args&&... args) - : Exception(fmt::format(fmt, std::forward(args)...), code) + : Exception(fmt::format(fmt::runtime(fmt), std::forward(args)...), code) {} struct CreateFromPocoTag {}; @@ -55,7 +55,7 @@ class Exception : public Poco::Exception template void addMessage(const std::string& format, Args&&... args) { - extendedMessage(fmt::format(format, std::forward(args)...)); + extendedMessage(fmt::format(fmt::runtime(format), std::forward(args)...)); } void addMessage(const std::string& message) @@ -119,7 +119,7 @@ class ParsingException : public Exception // Format message with fmt::format, like the logging functions. template ParsingException(int code, const std::string & fmt, Args&&... args) - : Exception(fmt::format(fmt, std::forward(args)...), code) + : Exception(fmt::format(fmt::runtime(fmt), std::forward(args)...), code) {} diff --git a/src/Common/FieldVisitorConvertToNumber.h b/src/Common/FieldVisitorConvertToNumber.h index 025fd6676090..7bbb7f0708ac 100644 --- a/src/Common/FieldVisitorConvertToNumber.h +++ b/src/Common/FieldVisitorConvertToNumber.h @@ -123,6 +123,8 @@ class FieldVisitorConvertToNumber : public StaticVisitor else return static_cast(x); } + + T operator() (const bool & x) const { return T(x); } }; } diff --git a/src/Common/FieldVisitorDump.cpp b/src/Common/FieldVisitorDump.cpp index d0203407900b..6c869e05fd4c 100644 --- a/src/Common/FieldVisitorDump.cpp +++ b/src/Common/FieldVisitorDump.cpp @@ -37,6 +37,7 @@ String FieldVisitorDump::operator() (const UInt256 & x) const { return formatQuo String FieldVisitorDump::operator() (const Int128 & x) const { return formatQuotedWithPrefix(x, "Int128_"); } String FieldVisitorDump::operator() (const Int256 & x) const { return formatQuotedWithPrefix(x, "Int256_"); } String FieldVisitorDump::operator() (const UUID & x) const { return formatQuotedWithPrefix(x, "UUID_"); } +String FieldVisitorDump::operator() (const bool & x) const { return formatQuotedWithPrefix(x, "Bool_"); } String FieldVisitorDump::operator() (const String & x) const diff --git a/src/Common/FieldVisitorDump.h b/src/Common/FieldVisitorDump.h index 22e34d66ff7e..0b1b311999e2 100644 --- a/src/Common/FieldVisitorDump.h +++ b/src/Common/FieldVisitorDump.h @@ -27,6 +27,7 @@ class FieldVisitorDump : public StaticVisitor String operator() (const DecimalField & x) const; String operator() (const DecimalField & x) const; String operator() (const AggregateFunctionStateData & x) const; + String operator() (const bool & x) const; }; } diff --git a/src/Common/FieldVisitorHash.cpp b/src/Common/FieldVisitorHash.cpp index 80d5f2daf654..09b8b7908f3d 100644 --- a/src/Common/FieldVisitorHash.cpp +++ b/src/Common/FieldVisitorHash.cpp @@ -146,4 +146,11 @@ void FieldVisitorHash::operator() (const Int256 & x) const hash.update(x); } +void FieldVisitorHash::operator() (const bool & x) const +{ + UInt8 type = Field::Types::Bool; + hash.update(type); + hash.update(x); +} + } diff --git a/src/Common/FieldVisitorHash.h b/src/Common/FieldVisitorHash.h index 6c786fda4ad2..7527e13ca20b 100644 --- a/src/Common/FieldVisitorHash.h +++ b/src/Common/FieldVisitorHash.h @@ -33,6 +33,7 @@ class FieldVisitorHash : public StaticVisitor<> void operator() (const DecimalField & x) const; void operator() (const DecimalField & x) const; void operator() (const AggregateFunctionStateData & x) const; + void operator() (const bool & x) const; }; } diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp index 0064830c08aa..c3d7f4f84626 100644 --- a/src/Common/FieldVisitorSum.cpp +++ b/src/Common/FieldVisitorSum.cpp @@ -33,5 +33,7 @@ bool FieldVisitorSum::operator() (AggregateFunctionStateData &) const throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } +bool FieldVisitorSum::operator() (bool &) const { throw Exception("Cannot sum Bools", ErrorCodes::LOGICAL_ERROR); } + } diff --git a/src/Common/FieldVisitorSum.h b/src/Common/FieldVisitorSum.h index e208933043b0..3e868e46f714 100644 --- a/src/Common/FieldVisitorSum.h +++ b/src/Common/FieldVisitorSum.h @@ -27,6 +27,7 @@ class FieldVisitorSum : public StaticVisitor bool operator() (Map &) const; bool operator() (UUID &) const; bool operator() (AggregateFunctionStateData &) const; + bool operator() (bool &) const; template bool operator() (DecimalField & x) const diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index 01b2db547353..6cc83f32a529 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -51,7 +51,6 @@ static String formatFloat(const Float64 x) return { buffer, buffer + builder.position() }; } - String FieldVisitorToString::operator() (const Null & x) const { return x.isNegativeInfinity() ? "-Inf" : (x.isPositiveInfinity() ? "+Inf" : "NULL"); } String FieldVisitorToString::operator() (const UInt64 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const Int64 & x) const { return formatQuoted(x); } @@ -67,6 +66,7 @@ String FieldVisitorToString::operator() (const UInt256 & x) const { return forma String FieldVisitorToString::operator() (const Int256 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const UUID & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const AggregateFunctionStateData & x) const { return formatQuoted(x.data); } +String FieldVisitorToString::operator() (const bool & x) const { return x ? "true" : "false"; } String FieldVisitorToString::operator() (const Array & x) const { diff --git a/src/Common/FieldVisitorToString.h b/src/Common/FieldVisitorToString.h index 39709f1c272e..991f7b4b2d7d 100644 --- a/src/Common/FieldVisitorToString.h +++ b/src/Common/FieldVisitorToString.h @@ -27,6 +27,7 @@ class FieldVisitorToString : public StaticVisitor String operator() (const DecimalField & x) const; String operator() (const DecimalField & x) const; String operator() (const AggregateFunctionStateData & x) const; + String operator() (const bool & x) const; }; } diff --git a/src/Common/FieldVisitorWriteBinary.cpp b/src/Common/FieldVisitorWriteBinary.cpp index d01188bef405..fc17b58b3349 100644 --- a/src/Common/FieldVisitorWriteBinary.cpp +++ b/src/Common/FieldVisitorWriteBinary.cpp @@ -66,5 +66,10 @@ void FieldVisitorWriteBinary::operator() (const Map & x, WriteBuffer & buf) cons } } +void FieldVisitorWriteBinary::operator()(const bool & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(x), buf); +} + } diff --git a/src/Common/FieldVisitorWriteBinary.h b/src/Common/FieldVisitorWriteBinary.h index ae864ca74f36..155cf0e10507 100644 --- a/src/Common/FieldVisitorWriteBinary.h +++ b/src/Common/FieldVisitorWriteBinary.h @@ -26,6 +26,7 @@ class FieldVisitorWriteBinary void operator() (const DecimalField & x, WriteBuffer & buf) const; void operator() (const DecimalField & x, WriteBuffer & buf) const; void operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const; + void operator() (const bool & x, WriteBuffer & buf) const; }; } diff --git a/src/Common/FieldVisitorsAccurateComparison.h b/src/Common/FieldVisitorsAccurateComparison.h index 795620da0cba..487f4f78a00c 100644 --- a/src/Common/FieldVisitorsAccurateComparison.h +++ b/src/Common/FieldVisitorsAccurateComparison.h @@ -32,6 +32,14 @@ class FieldVisitorAccurateEquals : public StaticVisitor return l == r; return false; } + else if constexpr (std::is_same_v) + { + return operator()(UInt8(l), r); + } + else if constexpr (std::is_same_v) + { + return operator()(l, UInt8(r)); + } else { if constexpr (std::is_same_v) @@ -91,6 +99,14 @@ class FieldVisitorAccurateLess : public StaticVisitor { return r.isPositiveInfinity(); } + else if constexpr (std::is_same_v) + { + return operator()(UInt8(l), r); + } + else if constexpr (std::is_same_v) + { + return operator()(l, UInt8(r)); + } else { if constexpr (std::is_same_v) diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h new file mode 100644 index 000000000000..a98ae0452d32 --- /dev/null +++ b/src/Common/FrequencyHolder.h @@ -0,0 +1,252 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; +} + +/// FrequencyHolder class is responsible for storing and loading dictionaries +/// needed for text classification functions: +/// +/// 1. detectLanguageUnknown +/// 2. detectCharset +/// 3. detectTonality +/// 4. detectProgrammingLanguage + +class FrequencyHolder +{ + +public: + struct Language + { + String name; + HashMap map; + }; + + struct Encoding + { + String name; + String lang; + HashMap map; + }; + +public: + using Map = HashMap; + using Container = std::vector; + using EncodingMap = HashMap; + using EncodingContainer = std::vector; + + static FrequencyHolder & getInstance() + { + static FrequencyHolder instance; + return instance; + } + + void loadEncodingsFrequency() + { + Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency"); + + LOG_TRACE(log, "Loading embedded charset frequencies"); + + auto resource = getResource("charset.zst"); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies"); + + String line; + UInt16 bigram; + Float64 frequency; + String charset_name; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + // Start loading a new charset + if (line.starts_with("// ")) + { + // Skip "// " + buf_line.ignore(3); + readString(charset_name, buf_line); + + /* In our dictionary we have lines with form: _ + * If we need to find language of data, we return + * If we need to find charset of data, we return . + */ + size_t sep = charset_name.find('_'); + + Encoding enc; + enc.lang = charset_name.substr(0, sep); + enc.name = charset_name.substr(sep + 1); + encodings_freq.push_back(std::move(enc)); + } + else + { + readIntText(bigram, buf_line); + buf_line.ignore(); + readFloatText(frequency, buf_line); + + encodings_freq.back().map[bigram] = frequency; + } + } + LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size()); + } + + + void loadEmotionalDict() + { + Poco::Logger * log = &Poco::Logger::get("EmotionalDict"); + LOG_TRACE(log, "Loading embedded emotional dictionary"); + + auto resource = getResource("tonality_ru.zst"); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary"); + + String line; + String word; + Float64 tonality; + size_t count = 0; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + readStringUntilWhitespace(word, buf_line); + buf_line.ignore(); + readFloatText(tonality, buf_line); + + StringRef ref{string_pool.insert(word.data(), word.size()), word.size()}; + emotional_dict[ref] = tonality; + ++count; + } + LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count)); + } + + + void loadProgrammingFrequency() + { + Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency"); + + LOG_TRACE(log, "Loading embedded programming languages frequencies loading"); + + auto resource = getResource("programming.zst"); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies"); + + String line; + String bigram; + Float64 frequency; + String programming_language; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + // Start loading a new language + if (line.starts_with("// ")) + { + // Skip "// " + buf_line.ignore(3); + readString(programming_language, buf_line); + + Language lang; + lang.name = programming_language; + programming_freq.push_back(std::move(lang)); + } + else + { + readStringUntilWhitespace(bigram, buf_line); + buf_line.ignore(); + readFloatText(frequency, buf_line); + + StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()}; + programming_freq.back().map[ref] = frequency; + } + } + LOG_TRACE(log, "Programming languages frequencies was added"); + } + + const Map & getEmotionalDict() + { + std::lock_guard lock(mutex); + if (emotional_dict.empty()) + loadEmotionalDict(); + + return emotional_dict; + } + + + const EncodingContainer & getEncodingsFrequency() + { + std::lock_guard lock(mutex); + if (encodings_freq.empty()) + loadEncodingsFrequency(); + + return encodings_freq; + } + + const Container & getProgrammingFrequency() + { + std::lock_guard lock(mutex); + if (programming_freq.empty()) + loadProgrammingFrequency(); + + return programming_freq; + } + + +private: + Arena string_pool; + + Map emotional_dict; + Container programming_freq; + EncodingContainer encodings_freq; + + std::mutex mutex; +}; +} diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h index fd2fec528a46..b8334653754e 100644 --- a/src/Common/IntervalTree.h +++ b/src/Common/IntervalTree.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -10,6 +11,7 @@ namespace DB { /** Structure that holds closed interval with left and right. + * Interval left must be less than interval right. * Example: [1, 1] is valid interval, that contain point 1. */ template @@ -70,6 +72,9 @@ struct IntervalTreeVoidValue * Search for all intervals intersecting point has complexity O(log(n) + k), k is count of intervals that intersect point. * If we need to only check if there are some interval intersecting point such operation has complexity O(log(n)). * + * There is invariant that interval left must be less than interval right, otherwise such interval could not contain any point. + * If that invariant is broken, inserting such interval in IntervalTree will return false. + * * Explanation: * * IntervalTree structure is balanced tree. Each node contains: @@ -125,44 +130,48 @@ class IntervalTree IntervalTree() { nodes.resize(1); } template , bool> = true> - void emplace(Interval interval) + ALWAYS_INLINE bool emplace(Interval interval) { assert(!tree_is_built); + if (unlikely(interval.left > interval.right)) + return false; + sorted_intervals.emplace_back(interval); increaseIntervalsSize(); + + return true; } template , bool> = true, typename... Args> - void emplace(Interval interval, Args &&... args) + ALWAYS_INLINE bool emplace(Interval interval, Args &&... args) { assert(!tree_is_built); + if (unlikely(interval.left > interval.right)) + return false; + sorted_intervals.emplace_back( std::piecewise_construct, std::forward_as_tuple(interval), std::forward_as_tuple(std::forward(args)...)); increaseIntervalsSize(); + + return true; } template , bool> = true> - void insert(Interval interval) + bool insert(Interval interval) { - assert(!tree_is_built); - sorted_intervals.emplace_back(interval); - increaseIntervalsSize(); + return emplace(interval); } template , bool> = true> - void insert(Interval interval, const Value & value) + bool insert(Interval interval, const Value & value) { - assert(!tree_is_built); - sorted_intervals.emplace_back(interval, value); - increaseIntervalsSize(); + return emplace(interval, value); } template , bool> = true> - void insert(Interval interval, Value && value) + bool insert(Interval interval, Value && value) { - assert(!tree_is_built); - sorted_intervals.emplace_back(interval, std::move(value)); - increaseIntervalsSize(); + return emplace(interval, std::move(value)); } /// Build tree, after that intervals cannot be inserted, and only search or iteration can be performed. @@ -283,6 +292,15 @@ class IntervalTree size_t getIntervalsSize() const { return intervals_size; } + size_t getSizeInBytes() const + { + size_t nodes_size_in_bytes = nodes.size() * sizeof(Node); + size_t intervals_size_in_bytes = sorted_intervals.size() * sizeof(IntervalWithValue); + size_t result = nodes_size_in_bytes + intervals_size_in_bytes; + + return result; + } + private: struct Node { @@ -472,14 +490,14 @@ class IntervalTree } } - std::sort(intervals_sorted_by_left_asc.begin(), intervals_sorted_by_left_asc.end(), [](auto & lhs, auto & rhs) + ::sort(intervals_sorted_by_left_asc.begin(), intervals_sorted_by_left_asc.end(), [](auto & lhs, auto & rhs) { auto & lhs_interval = getInterval(lhs); auto & rhs_interval = getInterval(rhs); return lhs_interval.left < rhs_interval.left; }); - std::sort(intervals_sorted_by_right_desc.begin(), intervals_sorted_by_right_desc.end(), [](auto & lhs, auto & rhs) + ::sort(intervals_sorted_by_right_desc.begin(), intervals_sorted_by_right_desc.end(), [](auto & lhs, auto & rhs) { auto & lhs_interval = getInterval(lhs); auto & rhs_interval = getInterval(rhs); @@ -664,7 +682,7 @@ class IntervalTree size_t size = points.size(); size_t middle_element_index = size / 2; - std::nth_element(points.begin(), points.begin() + middle_element_index, points.end()); + ::nth_element(points.begin(), points.begin() + middle_element_index, points.end()); /** We should not get median as average of middle_element_index and middle_element_index - 1 * because we want point in node to intersect some interval. diff --git a/src/Common/LockMemoryExceptionInThread.h b/src/Common/LockMemoryExceptionInThread.h index dc2bccf257bd..ec8f69806d7d 100644 --- a/src/Common/LockMemoryExceptionInThread.h +++ b/src/Common/LockMemoryExceptionInThread.h @@ -1,5 +1,6 @@ #pragma once +#include #include /// To be able to avoid MEMORY_LIMIT_EXCEEDED Exception in destructors: diff --git a/src/Common/MemoryTrackerBlockerInThread.h b/src/Common/MemoryTrackerBlockerInThread.h index caad28f636ec..381eb80df0c6 100644 --- a/src/Common/MemoryTrackerBlockerInThread.h +++ b/src/Common/MemoryTrackerBlockerInThread.h @@ -1,5 +1,6 @@ #pragma once +#include #include /// To be able to temporarily stop memory tracking from current thread. diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index f7223c2efa97..09b0dbe53371 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -8,12 +8,7 @@ #include #include - -#if USE_RE2_ST -# include -#else -# define re2_st re2 -#endif +#include /** Uses two ways to optimize a regular expression: diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index 85d4e84abcab..a82a6efc4c1a 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -41,6 +41,7 @@ class PoolBase : private boost::noncopyable ObjectPtr object; bool in_use = false; + std::atomic is_expired = false; PoolBase & pool; }; @@ -87,6 +88,14 @@ class PoolBase : private boost::noncopyable Object & operator*() & { return *data->data.object; } const Object & operator*() const & { return *data->data.object; } + /** + * Expire an object to make it reallocated later. + */ + void expire() + { + data->data.is_expired = true; + } + bool isNull() const { return data == nullptr; } PoolBase * getPool() const @@ -112,9 +121,22 @@ class PoolBase : private boost::noncopyable while (true) { for (auto & item : items) + { if (!item->in_use) - return Entry(*item); - + { + if (likely(!item->is_expired)) + { + return Entry(*item); + } + else + { + expireObject(item->object); + item->object = allocObject(); + item->is_expired = false; + return Entry(*item); + } + } + } if (items.size() < max_items) { ObjectPtr object = allocObject(); @@ -139,6 +161,12 @@ class PoolBase : private boost::noncopyable items.emplace_back(std::make_shared(allocObject(), *this)); } + inline size_t size() + { + std::unique_lock lock(mutex); + return items.size(); + } + private: /** The maximum size of the pool. */ unsigned max_items; @@ -162,4 +190,5 @@ class PoolBase : private boost::noncopyable /** Creates a new object to put into the pool. */ virtual ObjectPtr allocObject() = 0; + virtual void expireObject(ObjectPtr) {} }; diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index b8fa00d27030..ae14011834a5 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -178,7 +179,7 @@ PoolWithFailoverBase::getShuffledPools( shuffled_pools.reserve(nested_pools.size()); for (size_t i = 0; i < nested_pools.size(); ++i) shuffled_pools.push_back(ShuffledPool{nested_pools[i].get(), &pool_states[i], i, 0}); - std::sort( + ::sort( shuffled_pools.begin(), shuffled_pools.end(), [](const ShuffledPool & lhs, const ShuffledPool & rhs) { diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ea6c782ebb4d..aa507b1ce59d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -8,6 +8,7 @@ M(Query, "Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries.") \ M(SelectQuery, "Same as Query, but only for SELECT queries.") \ M(InsertQuery, "Same as Query, but only for INSERT queries.") \ + M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \ M(FailedQuery, "Number of failed queries.") \ M(FailedSelectQuery, "Same as FailedQuery, but only for SELECT queries.") \ M(FailedInsertQuery, "Same as FailedQuery, but only for INSERT queries.") \ diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index b9a8bc923f74..00e2326b0b45 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -243,7 +243,7 @@ void ProgressIndication::writeProgress() if (width_of_progress_bar > 0) { - size_t bar_width = UnicodeBar::getWidth(current_count, 0, max_count, width_of_progress_bar); + double bar_width = UnicodeBar::getWidth(current_count, 0, max_count, width_of_progress_bar); std::string bar = UnicodeBar::render(bar_width); /// Render profiling_msg at left on top of the progress bar. diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h index d1e6d079d17f..03d2c9638a4d 100644 --- a/src/Common/SpaceSaving.h +++ b/src/Common/SpaceSaving.h @@ -5,6 +5,8 @@ #include +#include + #include #include #include @@ -242,7 +244,7 @@ class SpaceSaving } } - std::sort(counter_list.begin(), counter_list.end(), [](Counter * l, Counter * r) { return *l > *r; }); + ::sort(counter_list.begin(), counter_list.end(), [](Counter * l, Counter * r) { return *l > *r; }); if (counter_list.size() > m_capacity) { diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index f34bc6f73227..406298382848 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -24,7 +24,6 @@ namespace DB namespace ErrorCodes { - extern const int UNSUPPORTED_PARAMETER; extern const int BAD_ARGUMENTS; } @@ -34,9 +33,12 @@ namespace ErrorCodes */ -struct StringSearcherBase +class StringSearcherBase { +public: + bool force_fallback = false; #ifdef __SSE2__ +protected: static constexpr auto n = sizeof(__m128i); const int page_size = ::getPageSize(); @@ -53,7 +55,7 @@ template class StringSearcher; /// Case-insensitive UTF-8 searcher template <> -class StringSearcher : private StringSearcherBase +class StringSearcher : public StringSearcherBase { private: using UTF8SequenceBuffer = uint8_t[6]; @@ -119,11 +121,14 @@ class StringSearcher : private StringSearcherBase size_t length_u = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq)); if (length_l != length_u) - throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER}; + force_fallback = true; } l = l_seq[0]; u = u_seq[0]; + + if (force_fallback) + return; } #ifdef __SSE4_1__ @@ -158,7 +163,10 @@ class StringSearcher : private StringSearcherBase /// @note Unicode standard states it is a rare but possible occasion if (!(dst_l_len == dst_u_len && dst_u_len == src_len)) - throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER}; + { + force_fallback = true; + return; + } } cache_actual_len += src_len; @@ -199,9 +207,10 @@ class StringSearcher : private StringSearcherBase if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point)) break; - /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true) - const auto len = UTF8::seqLength(*haystack_pos); + auto len = UTF8::seqLength(*haystack_pos); haystack_pos += len; + + len = UTF8::seqLength(*needle_pos); needle_pos += len; } @@ -213,7 +222,7 @@ class StringSearcher : private StringSearcherBase { #ifdef __SSE4_1__ - if (pageSafe(pos)) + if (pageSafe(pos) && !force_fallback) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel); @@ -262,7 +271,7 @@ class StringSearcher : private StringSearcherBase while (haystack < haystack_end) { #ifdef __SSE4_1__ - if (haystack + n <= haystack_end && pageSafe(haystack)) + if (haystack + n <= haystack_end && pageSafe(haystack) && !force_fallback) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl); @@ -339,7 +348,7 @@ class StringSearcher : private StringSearcherBase /// Case-insensitive ASCII searcher template <> -class StringSearcher : private StringSearcherBase +class StringSearcher : public StringSearcherBase { private: /// string to be searched for @@ -541,7 +550,7 @@ class StringSearcher : private StringSearcherBase /// Case-sensitive searcher (both ASCII and UTF-8) template -class StringSearcher : private StringSearcherBase +class StringSearcher : public StringSearcherBase { private: /// string to be searched for @@ -725,7 +734,7 @@ class StringSearcher : private StringSearcherBase // Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings // should work just fine. But any Unicode whitespace is not considered a token separtor. template -class TokenSearcher +class TokenSearcher : public StringSearcherBase { StringSearcher searcher; size_t needle_size; @@ -809,7 +818,7 @@ using ASCIICaseInsensitiveTokenSearcher = TokenSearcher #include +#include + /** ELF object can contain three different places with symbol names and addresses: @@ -498,8 +500,8 @@ void SymbolIndex::update() { dl_iterate_phdr(collectSymbols, &data); - std::sort(data.objects.begin(), data.objects.end(), [](const Object & a, const Object & b) { return a.address_begin < b.address_begin; }); - std::sort(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) { return a.address_begin < b.address_begin; }); + ::sort(data.objects.begin(), data.objects.end(), [](const Object & a, const Object & b) { return a.address_begin < b.address_begin; }); + ::sort(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) { return a.address_begin < b.address_begin; }); /// We found symbols both from loaded program headers and from ELF symbol tables. data.symbols.erase(std::unique(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp new file mode 100644 index 000000000000..0014e0d99274 --- /dev/null +++ b/src/Common/SystemLogBase.cpp @@ -0,0 +1,177 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TIMEOUT_EXCEEDED; +} + +namespace +{ + constexpr size_t DBMS_SYSTEM_LOG_QUEUE_SIZE = 1048576; +} + +void ISystemLog::stopFlushThread() +{ + { + std::lock_guard lock(mutex); + + if (!saving_thread.joinable()) + { + return; + } + + if (is_shutdown) + { + return; + } + + is_shutdown = true; + + /// Tell thread to shutdown. + flush_event.notify_all(); + } + + saving_thread.join(); +} + +void ISystemLog::startup() +{ + std::lock_guard lock(mutex); + saving_thread = ThreadFromGlobalPool([this] { savingThreadFunction(); }); +} + +static thread_local bool recursive_add_call = false; + +template +void SystemLogBase::add(const LogElement & element) +{ + /// It is possible that the method will be called recursively. + /// Better to drop these events to avoid complications. + if (recursive_add_call) + return; + recursive_add_call = true; + SCOPE_EXIT({ recursive_add_call = false; }); + + /// Memory can be allocated while resizing on queue.push_back. + /// The size of allocation can be in order of a few megabytes. + /// But this should not be accounted for query memory usage. + /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker(VariableContext::Global); + + /// Should not log messages under mutex. + bool queue_is_half_full = false; + + { + std::unique_lock lock(mutex); + + if (is_shutdown) + return; + + if (queue.size() == DBMS_SYSTEM_LOG_QUEUE_SIZE / 2) + { + queue_is_half_full = true; + + // The queue more than half full, time to flush. + // We only check for strict equality, because messages are added one + // by one, under exclusive lock, so we will see each message count. + // It is enough to only wake the flushing thread once, after the message + // count increases past half available size. + const uint64_t queue_end = queue_front_index + queue.size(); + if (requested_flush_up_to < queue_end) + requested_flush_up_to = queue_end; + + flush_event.notify_all(); + } + + if (queue.size() >= DBMS_SYSTEM_LOG_QUEUE_SIZE) + { + // Ignore all further entries until the queue is flushed. + // Log a message about that. Don't spam it -- this might be especially + // problematic in case of trace log. Remember what the front index of the + // queue was when we last logged the message. If it changed, it means the + // queue was flushed, and we can log again. + if (queue_front_index != logged_queue_full_at_index) + { + logged_queue_full_at_index = queue_front_index; + + // TextLog sets its logger level to 0, so this log is a noop and + // there is no recursive logging. + lock.unlock(); + LOG_ERROR(log, "Queue is full for system log '{}' at {}", demangle(typeid(*this).name()), queue_front_index); + } + + return; + } + + queue.push_back(element); + } + + if (queue_is_half_full) + LOG_INFO(log, "Queue is half full for system log '{}'.", demangle(typeid(*this).name())); +} + +template +void SystemLogBase::flush(bool force) +{ + uint64_t this_thread_requested_offset; + + { + std::unique_lock lock(mutex); + + if (is_shutdown) + return; + + this_thread_requested_offset = queue_front_index + queue.size(); + + // Publish our flush request, taking care not to overwrite the requests + // made by other threads. + is_force_prepare_tables |= force; + requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset); + + flush_event.notify_all(); + } + + LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset); + + // Use an arbitrary timeout to avoid endless waiting. 60s proved to be + // too fast for our parallel functional tests, probably because they + // heavily load the disk. + const int timeout_seconds = 180; + std::unique_lock lock(mutex); + bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] + { + return flushed_up_to >= this_thread_requested_offset && !is_force_prepare_tables; + }); + + if (!result) + { + throw Exception( + "Timeout exceeded (" + toString(timeout_seconds) + " s) while flushing system log '" + demangle(typeid(*this).name()) + "'.", + ErrorCodes::TIMEOUT_EXCEEDED); + } +} + +#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; +SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) + +} diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h new file mode 100644 index 000000000000..4b3ec5fe3790 --- /dev/null +++ b/src/Common/SystemLogBase.h @@ -0,0 +1,109 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define SYSTEM_LOG_ELEMENTS(M) \ + M(AsynchronousMetricLogElement) \ + M(CrashLogElement) \ + M(MetricLogElement) \ + M(OpenTelemetrySpanLogElement) \ + M(PartLogElement) \ + M(QueryLogElement) \ + M(QueryThreadLogElement) \ + M(QueryViewsLogElement) \ + M(SessionLogElement) \ + M(TraceLogElement) \ + M(ZooKeeperLogElement) \ + M(TextLogElement) + +namespace Poco +{ +class Logger; +namespace Util +{ + class AbstractConfiguration; +} +} + +namespace DB +{ + +struct StorageID; + +class ISystemLog +{ +public: + virtual String getName() = 0; + //// force -- force table creation (used for SYSTEM FLUSH LOGS) + virtual void flush(bool force = false) = 0; + virtual void prepareTable() = 0; + + /// Start the background thread. + virtual void startup(); + + /// Stop the background flush thread before destructor. No more data will be written. + virtual void shutdown() = 0; + + virtual ~ISystemLog() = default; + + virtual void savingThreadFunction() = 0; + +protected: + ThreadFromGlobalPool saving_thread; + + /// Data shared between callers of add()/flush()/shutdown(), and the saving thread + std::mutex mutex; + + bool is_shutdown = false; + std::condition_variable flush_event; + + void stopFlushThread(); +}; + +template +class SystemLogBase : public ISystemLog +{ +public: + using Self = SystemLogBase; + + /** Append a record into log. + * Writing to table will be done asynchronously and in case of failure, record could be lost. + */ + void add(const LogElement & element); + + /// Flush data in the buffer to disk + void flush(bool force) override; + + String getName() override { return LogElement::name(); } + +protected: + Poco::Logger * log; + + // Queue is bounded. But its size is quite large to not block in all normal cases. + std::vector queue; + // An always-incrementing index of the first message currently in the queue. + // We use it to give a global sequential index to every message, so that we + // can wait until a particular message is flushed. This is used to implement + // synchronous log flushing for SYSTEM FLUSH LOGS. + uint64_t queue_front_index = 0; + // A flag that says we must create the tables even if the queue is empty. + bool is_force_prepare_tables = false; + // Requested to flush logs up to this index, exclusive + uint64_t requested_flush_up_to = 0; + // Flushed log up to this index, exclusive + uint64_t flushed_up_to = 0; + // Logged overflow message at this queue front index + uint64_t logged_queue_full_at_index = -1; +}; + +} diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 9b01987c7cf2..8bfb93c9e947 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -54,6 +54,9 @@ void ThreadPoolImpl::setMaxThreads(size_t value) { std::lock_guard lock(mutex); max_threads = value; + /// We have to also adjust queue size, because it limits the number of scheduled and already running jobs in total. + queue_size = std::max(queue_size, max_threads); + jobs.reserve(queue_size); } template diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index f08172c8a772..881817f33bed 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -372,7 +372,7 @@ class VolnitskyBase , fallback{VolnitskyTraits::isFallbackNeedle(needle_size, haystack_size_hint)} , fallback_searcher{needle_, needle_size} { - if (fallback) + if (fallback || fallback_searcher.force_fallback) return; hash = std::unique_ptr(new VolnitskyTraits::Offset[VolnitskyTraits::hash_size]{}); @@ -393,7 +393,7 @@ class VolnitskyBase const auto haystack_end = haystack + haystack_size; - if (fallback || haystack_size <= needle_size) + if (fallback || haystack_size <= needle_size || fallback_searcher.force_fallback) return fallback_searcher.search(haystack, haystack_end); /// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle. diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index 7e0558dd5753..34ebad9bb50c 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -12,12 +12,6 @@ target_link_libraries (clickhouse_common_zookeeper PRIVATE string_utils ) -# To avoid circular dependency from interpreters. -if (OS_DARWIN) - target_link_libraries (clickhouse_common_zookeeper PRIVATE -Wl,-undefined,dynamic_lookup) -else() - target_link_libraries (clickhouse_common_zookeeper PRIVATE -Wl,--unresolved-symbols=ignore-all) -endif() # for examples -- no logging (to avoid extra dependencies) add_library(clickhouse_common_zookeeper_no_log ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index c8753c8edaf3..b1574341c408 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -1145,7 +1145,7 @@ std::string normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts if (check_starts_with_slash) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "ZooKeeper path must starts with '/', got '{}'", zookeeper_path); if (log) - LOG_WARNING(log, "ZooKeeper path ('{}') does not start with '/'. It will not be supported in future releases"); + LOG_WARNING(log, "ZooKeeper path ('{}') does not start with '/'. It will not be supported in future releases", zookeeper_path); zookeeper_path = "/" + zookeeper_path; } diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 28506a945814..edade4ce2be6 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -2,17 +2,14 @@ // .h autogenerated by cmake! +#cmakedefine01 USE_CPUID #cmakedefine01 USE_BASE64 -#cmakedefine01 USE_RE2_ST #cmakedefine01 USE_SSL -#cmakedefine01 USE_INTERNAL_SSL_LIBRARY #cmakedefine01 USE_HDFS -#cmakedefine01 USE_INTERNAL_HDFS3_LIBRARY #cmakedefine01 USE_AWS_S3 #cmakedefine01 USE_AZURE_BLOB_STORAGE #cmakedefine01 USE_BROTLI #cmakedefine01 USE_UNWIND -#cmakedefine01 USE_OPENCL #cmakedefine01 USE_CASSANDRA #cmakedefine01 USE_SENTRY #cmakedefine01 USE_GRPC @@ -20,5 +17,9 @@ #cmakedefine01 USE_YAML_CPP #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 USE_BZIP2 +#cmakedefine01 USE_MINIZIP #cmakedefine01 USE_SNAPPY #cmakedefine01 USE_HIVE +#cmakedefine01 USE_ODBC +#cmakedefine01 USE_REPLXX +#cmakedefine01 USE_JEMALLOC diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 7b21591f83e9..9e551f3aa54d 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -1,7 +1,7 @@ add_executable (hashes_test hashes_test.cpp) -target_link_libraries (hashes_test PRIVATE clickhouse_common_io ${CITYHASH_LIBRARIES}) -if(OPENSSL_CRYPTO_LIBRARY) - target_link_libraries (hashes_test PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) +target_link_libraries (hashes_test PRIVATE clickhouse_common_io ch_contrib::cityhash) +if (TARGET OpenSSL::Crypto) + target_link_libraries (hashes_test PRIVATE OpenSSL::Crypto) endif() add_executable (sip_hash_perf sip_hash_perf.cpp) @@ -23,8 +23,7 @@ add_executable (compact_array compact_array.cpp) target_link_libraries (compact_array PRIVATE clickhouse_common_io) add_executable (radix_sort radix_sort.cpp) -target_link_libraries (radix_sort PRIVATE clickhouse_common_io) -target_include_directories(radix_sort SYSTEM PRIVATE ${PDQSORT_INCLUDE_DIR}) +target_link_libraries (radix_sort PRIVATE clickhouse_common_io ch_contrib::pdqsort) add_executable (arena_with_free_lists arena_with_free_lists.cpp) target_link_libraries (arena_with_free_lists PRIVATE dbms) @@ -42,12 +41,10 @@ add_executable (space_saving space_saving.cpp) target_link_libraries (space_saving PRIVATE clickhouse_common_io) add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp) -target_include_directories (integer_hash_tables_and_hashes SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) -target_link_libraries (integer_hash_tables_and_hashes PRIVATE dbms abseil_swiss_tables) +target_link_libraries (integer_hash_tables_and_hashes PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash) add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp) -target_include_directories (integer_hash_tables_benchmark SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) -target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms abseil_swiss_tables) +target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash) add_executable (cow_columns cow_columns.cpp) target_link_libraries (cow_columns PRIVATE clickhouse_common_io) @@ -81,8 +78,10 @@ target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io) add_executable (executable_udf executable_udf.cpp) target_link_libraries (executable_udf PRIVATE dbms) -add_executable(hive_metastore_client hive_metastore_client.cpp) -target_link_libraries (hive_metastore_client PUBLIC hivemetastore ${THRIFT_LIBRARY}) +if (ENABLE_HIVE) + add_executable (hive_metastore_client hive_metastore_client.cpp) + target_link_libraries (hive_metastore_client PUBLIC ch_contrib::hivemetastore ch_contrib::thrift) +endif() add_executable (interval_tree interval_tree.cpp) target_link_libraries (interval_tree PRIVATE dbms) diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index 13485c634e87..2fc1dbf96695 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -1,26 +1,80 @@ #include "getNumberOfPhysicalCPUCores.h" +#include +#if defined(OS_LINUX) +# include +# include +#endif #if USE_CPUID # include #endif #include +#if defined(OS_LINUX) +unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count) +{ + // Try to look at cgroups limit if it is available. + auto read_from = [](const char * filename, int default_value) -> int { + std::ifstream infile(filename); + if (!infile.is_open()) + { + return default_value; + } + int idata; + if (infile >> idata) + return idata; + else + return default_value; + }; + + unsigned quota_count = default_cpu_count; + // Return the number of milliseconds per period process is guaranteed to run. + // -1 for no quota + int cgroup_quota = read_from("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1); + int cgroup_period = read_from("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1); + if (cgroup_quota > -1 && cgroup_period > 0) + { + quota_count = ceil(static_cast(cgroup_quota) / static_cast(cgroup_period)); + } + + // Share number (typically a number relative to 1024) (2048 typically expresses 2 CPUs worth of processing) + // -1 for no share setup + int cgroup_share = read_from("/sys/fs/cgroup/cpu/cpu.shares", -1); + // Convert 1024 to no shares setup + if (cgroup_share == 1024) + cgroup_share = -1; + +# define PER_CPU_SHARES 1024 + unsigned share_count = default_cpu_count; + if (cgroup_share > -1) + { + share_count = ceil(static_cast(cgroup_share) / static_cast(PER_CPU_SHARES)); + } + + return std::min(default_cpu_count, std::min(share_count, quota_count)); +} +#endif // OS_LINUX unsigned getNumberOfPhysicalCPUCores() { - static const unsigned number = [] - { -# if USE_CPUID + static const unsigned number = [] { + unsigned cpu_count = 0; // start with an invalid num +#if USE_CPUID + do + { cpu_raw_data_t raw_data; cpu_id_t data; /// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method. /// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151. if (0 != cpuid_get_raw_data(&raw_data) || 0 != cpu_identify(&raw_data, &data) || data.num_logical_cpus == 0) - return std::thread::hardware_concurrency(); + { + // Just fallback + break; + } - unsigned res = data.num_cores * data.total_logical_cpus / data.num_logical_cpus; + cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus; /// Also, libcpuid gives strange result on Google Compute Engine VMs. /// Example: @@ -28,14 +82,18 @@ unsigned getNumberOfPhysicalCPUCores() /// total_logical_cpus = 1, /// total number of logical cores on all sockets /// num_logical_cpus = 24. /// number of logical cores on current CPU socket /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1. - - if (res != 0) - return res; -# endif + } while (false); +#endif /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system. /// (Actually, only Aarch64 is supported). - return std::thread::hardware_concurrency(); + if (cpu_count == 0) + cpu_count = std::thread::hardware_concurrency(); + +#if defined(OS_LINUX) + cpu_count = getCGroupLimitedCPUCores(cpu_count); +#endif // OS_LINUX + return cpu_count; }(); return number; } diff --git a/src/Common/memcmpSmall.h b/src/Common/memcmpSmall.h index db8641cb44dd..57b9c731897c 100644 --- a/src/Common/memcmpSmall.h +++ b/src/Common/memcmpSmall.h @@ -25,8 +25,240 @@ inline int cmp(T a, T b) /// We can process uninitialized memory in the functions below. /// Results don't depend on the values inside uninitialized memory but Memory Sanitizer cannot see it. /// Disable optimized functions if compile with Memory Sanitizer. +#if defined(__AVX512BW__) && defined(__AVX512VL__) && !defined(MEMORY_SANITIZER) +#include -#if defined(__SSE2__) && !defined(MEMORY_SANITIZER) + +/** All functions works under the following assumptions: + * - it's possible to read up to 15 excessive bytes after end of 'a' and 'b' region; + * - memory regions are relatively small and extra loop unrolling is not worth to do. + */ + +/** Variant when memory regions may have different sizes. + */ +template +inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) +{ + size_t min_size = std::min(a_size, b_size); + + for (size_t offset = 0; offset < min_size; offset += 16) + { + uint16_t mask = _mm_cmp_epi8_mask( + _mm_loadu_si128(reinterpret_cast(a + offset)), + _mm_loadu_si128(reinterpret_cast(b + offset)), _MM_CMPINT_NE); + + if (mask) + { + offset += __builtin_ctz(mask); + + if (offset >= min_size) + break; + + return detail::cmp(a[offset], b[offset]); + } + } + + return detail::cmp(a_size, b_size); +} + + +/** Variant when memory regions may have different sizes. + * But compare the regions as the smaller one is padded with zero bytes up to the size of the larger. + * It's needed to hold that: toFixedString('abc', 5) = 'abc' + * for compatibility with SQL standard. + */ +template +inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) +{ + size_t min_size = std::min(a_size, b_size); + + for (size_t offset = 0; offset < min_size; offset += 16) + { + uint16_t mask = _mm_cmp_epi8_mask( + _mm_loadu_si128(reinterpret_cast(a + offset)), + _mm_loadu_si128(reinterpret_cast(b + offset)), _MM_CMPINT_NE); + + if (mask) + { + offset += __builtin_ctz(mask); + + if (offset >= min_size) + break; + + return detail::cmp(a[offset], b[offset]); + } + } + + /// The strings are equal up to min_size. + /// If the rest of the larger string is zero bytes then the strings are considered equal. + + size_t max_size; + const Char * longest; + int cmp; + + if (a_size == b_size) + { + return 0; + } + else if (a_size > b_size) + { + max_size = a_size; + longest = a; + cmp = 1; + } + else + { + max_size = b_size; + longest = b; + cmp = -1; + } + + const __m128i zero16 = _mm_setzero_si128(); + + for (size_t offset = min_size; offset < max_size; offset += 16) + { + uint16_t mask = _mm_cmpneq_epi8_mask( + _mm_loadu_si128(reinterpret_cast(longest + offset)), + zero16); + + if (mask) + { + offset += __builtin_ctz(mask); + + if (offset >= max_size) + return 0; + return cmp; + } + } + + return 0; +} + + +/** Variant when memory regions have same size. + * TODO Check if the compiler can optimize previous function when the caller pass identical sizes. + */ +template +inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size) +{ + for (size_t offset = 0; offset < size; offset += 16) + { + uint16_t mask = _mm_cmp_epi8_mask( + _mm_loadu_si128(reinterpret_cast(a + offset)), + _mm_loadu_si128(reinterpret_cast(b + offset)), _MM_CMPINT_NE); + + if (mask) + { + offset += __builtin_ctz(mask); + + if (offset >= size) + return 0; + + return detail::cmp(a[offset], b[offset]); + } + } + + return 0; +} + + +/** Compare memory regions for equality. + */ +template +inline bool memequalSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) +{ + if (a_size != b_size) + return false; + + for (size_t offset = 0; offset < a_size; offset += 16) + { + uint16_t mask = _mm_cmp_epi8_mask( + _mm_loadu_si128(reinterpret_cast(a + offset)), + _mm_loadu_si128(reinterpret_cast(b + offset)), _MM_CMPINT_NE); + + if (mask) + { + offset += __builtin_ctz(mask); + return offset >= a_size; + } + } + + return true; +} + + +/** Variant when the caller know in advance that the size is a multiple of 16. + */ +template +inline int memcmpSmallMultipleOf16(const Char * a, const Char * b, size_t size) +{ + for (size_t offset = 0; offset < size; offset += 16) + { + uint16_t mask = _mm_cmp_epi8_mask( + _mm_loadu_si128(reinterpret_cast(a + offset)), + _mm_loadu_si128(reinterpret_cast(b + offset)), _MM_CMPINT_NE); + + if (mask) + { + offset += __builtin_ctz(mask); + return detail::cmp(a[offset], b[offset]); + } + } + + return 0; +} + + +/** Variant when the size is 16 exactly. + */ +template +inline int memcmp16(const Char * a, const Char * b) +{ + uint16_t mask = _mm_cmp_epi8_mask( + _mm_loadu_si128(reinterpret_cast(a)), + _mm_loadu_si128(reinterpret_cast(b)), _MM_CMPINT_NE); + + if (mask) + { + auto offset = __builtin_ctz(mask); + return detail::cmp(a[offset], b[offset]); + } + + return 0; +} + + +/** Variant when the size is 16 exactly. + */ +inline bool memequal16(const void * a, const void * b) +{ + return 0xFFFF == _mm_cmp_epi8_mask( + _mm_loadu_si128(reinterpret_cast(a)), + _mm_loadu_si128(reinterpret_cast(b)), _MM_CMPINT_EQ); +} + + +/** Compare memory region to zero */ +inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size) +{ + const __m128i zero16 = _mm_setzero_si128(); + + for (size_t offset = 0; offset < size; offset += 16) + { + uint16_t mask = _mm_cmp_epi8_mask(zero16, + _mm_loadu_si128(reinterpret_cast(reinterpret_cast(data) + offset)), _MM_CMPINT_NE); + + if (mask) + { + offset += __builtin_ctz(mask); + return offset >= size; + } + } + + return true; +} + +#elif defined(__SSE2__) && !defined(MEMORY_SANITIZER) #include diff --git a/src/Common/memory.h b/src/Common/memory.h index 0dc163a54a1b..41b10a57db42 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -4,12 +4,13 @@ #include #include +#include #if USE_JEMALLOC # include #endif -#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4 +#if !USE_JEMALLOC # include #endif @@ -37,7 +38,7 @@ inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept free(ptr); } -#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4 +#if USE_JEMALLOC inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept { @@ -67,7 +68,7 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size) { size_t actual_size = size; -#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5 +#if USE_JEMALLOC /// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function /// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic. if (likely(size != 0)) @@ -87,7 +88,7 @@ inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t { try { -#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5 +#if USE_JEMALLOC /// @note It's also possible to use je_malloc_usable_size() here. if (likely(ptr != nullptr)) CurrentMemoryTracker::free(sallocx(ptr, 0)); diff --git a/src/Common/mysqlxx/CMakeLists.txt b/src/Common/mysqlxx/CMakeLists.txt index 76005651e614..d7292075aae1 100644 --- a/src/Common/mysqlxx/CMakeLists.txt +++ b/src/Common/mysqlxx/CMakeLists.txt @@ -15,10 +15,12 @@ target_include_directories (mysqlxx PUBLIC .) target_link_libraries (mysqlxx clickhouse_common_io - ${MYSQLCLIENT_LIBRARIES} - ${ZLIB_LIBRARIES} + ch_contrib::zlib + ch_contrib::mariadbclient ) +add_library(ch::mysqlxx ALIAS mysqlxx) + if (ENABLE_TESTS) add_subdirectory (tests) endif () diff --git a/src/Common/mysqlxx/mysqlxx/Types.h b/src/Common/mysqlxx/mysqlxx/Types.h index 5fd9aa8bbc88..6ad4eb7c3550 100644 --- a/src/Common/mysqlxx/mysqlxx/Types.h +++ b/src/Common/mysqlxx/mysqlxx/Types.h @@ -16,7 +16,15 @@ using MYSQL_ROW = char**; struct st_mysql_field; using MYSQL_FIELD = st_mysql_field; -enum struct enum_field_types; +enum struct enum_field_types { MYSQL_TYPE_DECIMAL, MYSQL_TYPE_TINY, + MYSQL_TYPE_SHORT, MYSQL_TYPE_LONG, + MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE, + MYSQL_TYPE_NULL, MYSQL_TYPE_TIMESTAMP, + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_INT24, + MYSQL_TYPE_DATE, MYSQL_TYPE_TIME, + MYSQL_TYPE_DATETIME, MYSQL_TYPE_YEAR, + MYSQL_TYPE_NEWDATE, MYSQL_TYPE_VARCHAR, + MYSQL_TYPE_BIT }; #endif diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index 27db87809d36..8908d140b904 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -1,4 +1,5 @@ #include +#include #include #if defined(OS_DARWIN) && (USE_JEMALLOC) diff --git a/src/Common/tests/gtest_interval_tree.cpp b/src/Common/tests/gtest_interval_tree.cpp index d9f19841b665..e99bfe83a982 100644 --- a/src/Common/tests/gtest_interval_tree.cpp +++ b/src/Common/tests/gtest_interval_tree.cpp @@ -309,6 +309,29 @@ TEST(IntervalTree, IntervalSetIterators) } } +TEST(IntervalTree, IntervalSetInvalidInterval) +{ + IntervalSet interval_set; + ASSERT_TRUE(!interval_set.insert(Int64Interval(10, 0))); + ASSERT_TRUE(!interval_set.insert(Int64Interval(15, 10))); + ASSERT_TRUE(interval_set.insert(Int64Interval(20, 25))); + + std::set expected; + expected.insert({20, 25}); + + auto actual = intervalSetFindIntervals(interval_set, 20); + + ASSERT_TRUE(actual == expected); + ASSERT_TRUE(interval_set.has(20)); + + interval_set.build(); + + actual = intervalSetFindIntervals(interval_set, 20); + + ASSERT_TRUE(actual == expected); + ASSERT_TRUE(interval_set.has(20)); +} + TEST(IntervalTree, IntervalMapBasic) { for (size_t intervals_size = 0; intervals_size < 120; ++intervals_size) @@ -538,3 +561,26 @@ TEST(IntervalTree, IntervalMapIterators) } } } + +TEST(IntervalTree, IntervalMapInvalidInterval) +{ + IntervalMap interval_map; + ASSERT_TRUE(!interval_map.insert(Int64Interval(10, 0), "Value")); + ASSERT_TRUE(!interval_map.insert(Int64Interval(15, 10), "Value")); + ASSERT_TRUE(interval_map.insert(Int64Interval(20, 25), "Value")); + + std::map expected; + expected.emplace(Int64Interval{20, 25}, "Value"); + + auto actual = intervalMapFindIntervals(interval_map, 20); + + ASSERT_TRUE(actual == expected); + ASSERT_TRUE(interval_map.has(20)); + + interval_map.build(); + + actual = intervalMapFindIntervals(interval_map, 20); + + ASSERT_TRUE(actual == expected); + ASSERT_TRUE(interval_map.has(20)); +} diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp index 5addb5acf5df..b25f1cf117ad 100644 --- a/src/Common/tests/gtest_log.cpp +++ b/src/Common/tests/gtest_log.cpp @@ -17,7 +17,7 @@ TEST(Logger, Log) Poco::Logger * log = &Poco::Logger::get("Log"); /// This test checks that we don't pass this string to fmtlib, because it is the only argument. - EXPECT_NO_THROW(LOG_INFO(log, "Hello {} World")); + EXPECT_NO_THROW(LOG_INFO(log, fmt::runtime("Hello {} World"))); } TEST(Logger, TestLog) diff --git a/src/Common/tests/gtest_poolbase.cpp b/src/Common/tests/gtest_poolbase.cpp new file mode 100644 index 000000000000..20c3281c9646 --- /dev/null +++ b/src/Common/tests/gtest_poolbase.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +using namespace DB; + +class PoolObject +{ +public: + int x = 0; +}; + +class MyPoolBase : public PoolBase +{ +public: + using Object = PoolBase::Object; + using ObjectPtr = std::shared_ptr; + using Ptr = PoolBase::Ptr; + + int last_destroy_value = 0; + MyPoolBase() : PoolBase(100, &Poco::Logger::get("MyPoolBase")) { } + +protected: + ObjectPtr allocObject() override { return std::make_shared(); } + + void expireObject(ObjectPtr obj) override + { + LOG_TRACE(log, "expire object"); + ASSERT_TRUE(obj->x == 100); + last_destroy_value = obj->x; + } +}; + +TEST(PoolBase, testDestroy1) +{ + MyPoolBase pool; + { + auto obj_entry = pool.get(-1); + ASSERT_TRUE(!obj_entry.isNull()); + obj_entry->x = 100; + obj_entry.expire(); + } + ASSERT_EQ(1, pool.size()); + + { + auto obj_entry = pool.get(-1); + ASSERT_TRUE(!obj_entry.isNull()); + ASSERT_EQ(obj_entry->x, 0); + ASSERT_EQ(1, pool.size()); + } + ASSERT_EQ(100, pool.last_destroy_value); +} diff --git a/src/Compression/CMakeLists.txt b/src/Compression/CMakeLists.txt index 34369d8dbc8e..efa3f2b1c09b 100644 --- a/src/Compression/CMakeLists.txt +++ b/src/Compression/CMakeLists.txt @@ -6,7 +6,7 @@ if (ENABLE_FUZZING) list(REMOVE_ITEM ${fuzz_compression_sources} CompressionFactoryAdditions.cpp) add_library(fuzz_compression ${fuzz_compression_headers} ${fuzz_compression_sources}) - target_link_libraries(fuzz_compression PUBLIC clickhouse_parsers clickhouse_common_io common lz4) + target_link_libraries(fuzz_compression PUBLIC clickhouse_parsers clickhouse_common_io common ch_contrib::lz4) endif() if (ENABLE_EXAMPLES) diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp index f942f81f5e92..bda86f8c6166 100644 --- a/src/Compression/CachedCompressedReadBuffer.cpp +++ b/src/Compression/CachedCompressedReadBuffer.cpp @@ -105,7 +105,7 @@ void CachedCompressedReadBuffer::seek(size_t offset_in_compressed_file, size_t o /// We will discard our working_buffer, but have to account rest bytes bytes += offset(); /// No data, everything discarded - pos = working_buffer.end(); + resetWorkingBuffer(); owned_cell.reset(); /// Remember required offset in decompressed block which will be set in diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 1a70b27e9f4e..cf08d68a7aae 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -80,7 +80,7 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t /// We will discard our working_buffer, but have to account rest bytes bytes += offset(); /// No data, everything discarded - pos = working_buffer.end(); + resetWorkingBuffer(); size_compressed = 0; /// Remember required offset in decompressed block which will be set in /// the next ReadBuffer::next() call @@ -113,7 +113,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) /// need to skip some bytes in decompressed data (seek happened before readBig call). if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read) { - decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; bytes += size_decompressed; diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index 6cb3874f808d..ddf2fb26712f 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -9,7 +9,7 @@ #include // This depends on BoringSSL-specific API, notably . -#if USE_SSL && USE_INTERNAL_SSL_LIBRARY +#if USE_SSL #include #include #include @@ -66,7 +66,7 @@ uint8_t getMethodCode(EncryptionMethod Method) } // end of namespace DB -#if USE_SSL && USE_INTERNAL_SSL_LIBRARY +#if USE_SSL namespace DB { @@ -513,7 +513,7 @@ void CompressionCodecEncrypted::doDecompressData(const char * source, UInt32 sou } -#else /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */ +#else /* USE_SSL */ namespace DB { @@ -551,7 +551,7 @@ void CompressionCodecEncrypted::Configuration::load(const Poco::Util::AbstractCo } -#endif /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */ +#endif /* USE_SSL */ namespace DB { diff --git a/src/Compression/fuzzers/CMakeLists.txt b/src/Compression/fuzzers/CMakeLists.txt index 189aea66a920..db1573f1354f 100644 --- a/src/Compression/fuzzers/CMakeLists.txt +++ b/src/Compression/fuzzers/CMakeLists.txt @@ -8,7 +8,7 @@ add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp) target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) add_executable (lz4_decompress_fuzzer lz4_decompress_fuzzer.cpp) -target_link_libraries (lz4_decompress_fuzzer PUBLIC dbms lz4 ${LIB_FUZZING_ENGINE}) +target_link_libraries (lz4_decompress_fuzzer PUBLIC dbms ch_contrib::lz4 ${LIB_FUZZING_ENGINE}) add_executable (delta_decompress_fuzzer delta_decompress_fuzzer.cpp) target_link_libraries (delta_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 3d0ebe86bf3c..4c76d052f9b6 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -228,6 +228,8 @@ String MonitorCommand::run() print(ret, "watch_count", state_machine.getTotalWatchesCount()); print(ret, "ephemerals_count", state_machine.getTotalEphemeralNodesCount()); print(ret, "approximate_data_size", state_machine.getApproximateDataSize()); + print(ret, "key_arena_size", state_machine.getKeyArenaSize()); + print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotBufSize()); #if defined(__linux__) || defined(__APPLE__) print(ret, "open_file_descriptor_count", getCurrentProcessFDCount()); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 518d569ca679..8d5df7c35e92 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -19,7 +20,6 @@ namespace ErrorCodes { extern const int UNKNOWN_FORMAT_VERSION; extern const int UNKNOWN_SNAPSHOT; - extern const int LOGICAL_ERROR; } namespace @@ -41,20 +41,6 @@ namespace return base; } - std::string getBaseName(const String & path) - { - size_t basename_start = path.rfind('/'); - return std::string{&path[basename_start + 1], path.length() - basename_start - 1}; - } - - String parentPath(const String & path) - { - auto rslash_pos = path.rfind('/'); - if (rslash_pos > 0) - return path.substr(0, rslash_pos); - return "/"; - } - void writeNode(const KeeperStorage::Node & node, SnapshotVersion version, WriteBuffer & out) { writeBinary(node.data, out); @@ -182,8 +168,11 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr { const auto & path = it->key; const auto & node = it->value; + /// Benign race condition possible while taking snapshot: NuRaft decide to create snapshot at some log id + /// and only after some time we lock storage and enable snapshot mode. So snapshot_container_size can be + /// slightly bigger than required. if (static_cast(node.stat.mzxid) > snapshot.snapshot_meta->get_last_log_idx()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to serialize node with mzxid {}, but last snapshot index {}", node.stat.mzxid, snapshot.snapshot_meta->get_last_log_idx()); + break; writeBinary(path, out); writeNode(node, snapshot.version, out); @@ -292,7 +281,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial if (itr.key != "/") { auto parent_path = parentPath(itr.key); - storage.container.updateValue(parent_path, [&path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); }); + storage.container.updateValue(parent_path, [path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); }); } } @@ -348,8 +337,8 @@ KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t , session_id(storage->session_id_counter) , cluster_config(cluster_config_) { - storage->enableSnapshotMode(); snapshot_container_size = storage->container.snapshotSize(); + storage->enableSnapshotMode(snapshot_container_size); begin = storage->getSnapshotIteratorBegin(); session_and_timeout = storage->getActiveSessions(); acl_map = storage->acl_map.getMapping(); @@ -362,8 +351,8 @@ KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, const Sna , session_id(storage->session_id_counter) , cluster_config(cluster_config_) { - storage->enableSnapshotMode(); snapshot_container_size = storage->container.snapshotSize(); + storage->enableSnapshotMode(snapshot_container_size); begin = storage->getSnapshotIteratorBegin(); session_and_timeout = storage->getActiveSessions(); acl_map = storage->acl_map.getMapping(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 1ac1a5844514..20d3bcbfd303 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -155,7 +155,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) { /// deserialize and apply snapshot to storage std::lock_guard lock(storage_and_responses_lock); - auto snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf); + auto snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr); storage = std::move(snapshot_deserialization_result.storage); latest_snapshot_meta = snapshot_deserialization_result.snapshot_meta; cluster_config = snapshot_deserialization_result.cluster_config; @@ -212,14 +212,13 @@ void KeeperStateMachine::create_snapshot( } { - /// Must do it with lock (clearing elements from list) + /// Destroy snapshot with lock std::lock_guard lock(storage_and_responses_lock); + LOG_TRACE(log, "Clearing garbage after snapshot"); /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); - /// Destroy snapshot with lock - snapshot.reset(); LOG_TRACE(log, "Cleared garbage after snapshot"); - + snapshot.reset(); } } catch (...) @@ -404,6 +403,20 @@ uint64_t KeeperStateMachine::getApproximateDataSize() const return storage->getApproximateDataSize(); } +uint64_t KeeperStateMachine::getKeyArenaSize() const +{ + std::lock_guard lock(storage_and_responses_lock); + return storage->getArenaDataSize(); +} + +uint64_t KeeperStateMachine::getLatestSnapshotBufSize() const +{ + std::lock_guard lock(snapshots_lock); + if (latest_snapshot_buf) + return latest_snapshot_buf->size(); + return 0; +} + ClusterConfigPtr KeeperStateMachine::getClusterConfig() const { std::lock_guard lock(cluster_config_lock); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 2803f4b90279..291b58e24985 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -97,6 +97,8 @@ class KeeperStateMachine : public nuraft::state_machine uint64_t getSessionWithEphemeralNodesCount() const; uint64_t getTotalEphemeralNodesCount() const; uint64_t getApproximateDataSize() const; + uint64_t getKeyArenaSize() const; + uint64_t getLatestSnapshotBufSize() const; private: @@ -120,7 +122,7 @@ class KeeperStateMachine : public nuraft::state_machine SnapshotsQueue & snapshots_queue; /// Mutex for snapshots - std::mutex snapshots_lock; + mutable std::mutex snapshots_lock; /// Lock for storage and responses_queue. It's important to process requests /// and push them to the responses queue while holding this lock. Otherwise diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 4f174e4e803c..f6992815a6c9 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -23,20 +24,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -static String parentPath(const String & path) -{ - auto rslash_pos = path.rfind('/'); - if (rslash_pos > 0) - return path.substr(0, rslash_pos); - return "/"; -} - -static std::string getBaseName(const String & path) -{ - size_t basename_start = path.rfind('/'); - return std::string{&path[basename_start + 1], path.length() - basename_start - 1}; -} - static String base64Encode(const String & decoded) { std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM @@ -155,12 +142,12 @@ static KeeperStorage::ResponsesForSessions processWatchesImpl(const String & pat Strings paths_to_check_for_list_watches; if (event_type == Coordination::Event::CREATED) { - paths_to_check_for_list_watches.push_back(parent_path); /// Trigger list watches for parent + paths_to_check_for_list_watches.push_back(parent_path.toString()); /// Trigger list watches for parent } else if (event_type == Coordination::Event::DELETED) { paths_to_check_for_list_watches.push_back(path); /// Trigger both list watches for this path - paths_to_check_for_list_watches.push_back(parent_path); /// And for parent path + paths_to_check_for_list_watches.push_back(parent_path.toString()); /// And for parent path } /// CHANGED event never trigger list wathes @@ -244,7 +231,8 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; - auto parent_path = parentPath(zk_request->getPath()); + auto path = zk_request->getPath(); + auto parent_path = parentPath(path); auto it = container.find(parent_path); if (it == container.end()) @@ -297,8 +285,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr response.error = Coordination::Error::ZNODEEXISTS; return { response_ptr, undo }; } - auto child_path = getBaseName(path_created); - if (child_path.empty()) + if (getBaseName(path_created).size == 0) { response.error = Coordination::Error::ZBADARGUMENTS; return { response_ptr, undo }; @@ -330,15 +317,18 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr created_node.data = request.data; created_node.is_sequental = request.is_sequential; + auto [map_key, _] = container.insert(path_created, std::move(created_node)); + /// Take child path from key owned by map. + auto child_path = getBaseName(map_key->getKey()); + int32_t parent_cversion = request.parent_cversion; int64_t prev_parent_zxid; int32_t prev_parent_cversion; container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid, parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent) { - parent.children.insert(child_path); - parent.size_bytes += child_path.size(); + parent.size_bytes += child_path.size; prev_parent_cversion = parent.stat.cversion; prev_parent_zxid = parent.stat.pzxid; @@ -356,14 +346,12 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr }); response.path_created = path_created; - container.insert(path_created, std::move(created_node)); if (request.is_ephemeral) ephemerals[session_id].emplace(path_created); undo = [&storage, prev_parent_zxid, prev_parent_cversion, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id] { - storage.container.erase(path_created); storage.acl_map.removeUsage(acl_id); if (is_ephemeral) @@ -376,8 +364,10 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr undo_parent.stat.cversion = prev_parent_cversion; undo_parent.stat.pzxid = prev_parent_zxid; undo_parent.children.erase(child_path); - undo_parent.size_bytes -= child_path.size(); + undo_parent.size_bytes -= child_path.size; }); + + storage.container.erase(path_created); }; response.error = Coordination::Error::ZOK; @@ -504,33 +494,34 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr storage.acl_map.removeUsage(prev_node.acl_id); - auto child_basename = getBaseName(it->key); - container.updateValue(parentPath(request.path), [&child_basename] (KeeperStorage::Node & parent) + container.updateValue(parentPath(request.path), [child_basename = getBaseName(it->key)] (KeeperStorage::Node & parent) { --parent.stat.numChildren; ++parent.stat.cversion; parent.children.erase(child_basename); - parent.size_bytes -= child_basename.size(); + parent.size_bytes -= child_basename.size; }); response.error = Coordination::Error::ZOK; - + /// Erase full path from container after child removed from parent container.erase(request.path); - undo = [prev_node, &storage, path = request.path, child_basename] + undo = [prev_node, &storage, path = request.path] { if (prev_node.stat.ephemeralOwner != 0) storage.ephemerals[prev_node.stat.ephemeralOwner].emplace(path); storage.acl_map.addUsage(prev_node.acl_id); - storage.container.insert(path, prev_node); - storage.container.updateValue(parentPath(path), [&child_basename] (KeeperStorage::Node & parent) + /// Dangerous place: we are adding StringRef to child into children unordered_hash set. + /// That's why we are taking getBaseName from inserted key, not from the path from request object. + auto [map_key, _] = storage.container.insert(path, prev_node); + storage.container.updateValue(parentPath(path), [child_name = getBaseName(map_key->getKey())] (KeeperStorage::Node & parent) { ++parent.stat.numChildren; --parent.stat.cversion; - parent.children.insert(child_basename); - parent.size_bytes += child_basename.size(); + parent.children.insert(child_name); + parent.size_bytes += child_name.size; }); }; } @@ -672,6 +663,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperListResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); + auto it = container.find(request.path); if (it == container.end()) { @@ -683,7 +675,10 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc if (path_prefix.empty()) throw DB::Exception("Logical error: path cannot be empty", ErrorCodes::LOGICAL_ERROR); - response.names.insert(response.names.end(), it->value.children.begin(), it->value.children.end()); + response.names.reserve(it->value.children.size()); + + for (const auto child : it->value.children) + response.names.push_back(child.toString()); response.stat = it->value.stat; response.error = Coordination::Error::ZOK; @@ -1092,15 +1087,17 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina { for (const auto & ephemeral_path : it->second) { - container.erase(ephemeral_path); container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (KeeperStorage::Node & parent) { --parent.stat.numChildren; ++parent.stat.cversion; - parent.children.erase(getBaseName(ephemeral_path)); - parent.size_bytes -= getBaseName(ephemeral_path).size(); + auto base_name = getBaseName(ephemeral_path); + parent.children.erase(base_name); + parent.size_bytes -= base_name.size; }); + container.erase(ephemeral_path); + auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED); results.insert(results.end(), responses.begin(), responses.end()); } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 11d191b7f50e..cbf33be61a0b 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -8,16 +8,17 @@ #include #include #include -#include #include +#include + namespace DB { struct KeeperStorageRequestProcessor; using KeeperStorageRequestProcessorPtr = std::shared_ptr; using ResponseCallback = std::function; -using ChildrenSet = std::unordered_set; +using ChildrenSet = absl::flat_hash_set; using SessionAndTimeout = std::unordered_map; struct KeeperStorageSnapshot; @@ -28,6 +29,7 @@ struct KeeperStorageSnapshot; class KeeperStorage { public: + struct Node { String data; @@ -158,9 +160,9 @@ class KeeperStorage /// Set of methods for creating snapshots /// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version. - void enableSnapshotMode() + void enableSnapshotMode(size_t up_to_size) { - container.enableSnapshotMode(); + container.enableSnapshotMode(up_to_size); } /// Turn off snapshot mode. @@ -203,6 +205,12 @@ class KeeperStorage return container.getApproximateDataSize(); } + uint64_t getArenaDataSize() const + { + return container.keyArenaSize(); + } + + uint64_t getTotalWatchesCount() const; uint64_t getWatchedPathsCount() const diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h index 002fa8702417..a24937636332 100644 --- a/src/Coordination/LoggerWrapper.h +++ b/src/Coordination/LoggerWrapper.h @@ -39,7 +39,7 @@ class LoggerWrapper : public nuraft::logger const std::string & msg) override { LogsLevel db_level = static_cast(level_); - LOG_IMPL(log, db_level, LEVELS.at(db_level), msg); + LOG_IMPL(log, db_level, LEVELS.at(db_level), fmt::runtime(msg)); } void set_level(int level_) override diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index 7704825f8300..b1d725785306 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -1,8 +1,11 @@ #pragma once #include +#include +#include #include #include #include +#include namespace DB { @@ -10,11 +13,12 @@ namespace DB template struct ListNode { - std::string key; + StringRef key; V value; - bool active_in_map; -}; + bool active_in_map{true}; + bool free_key{false}; +}; template class SnapshotableHashTable @@ -23,11 +27,15 @@ class SnapshotableHashTable using ListElem = ListNode; using List = std::list; - using IndexMap = std::unordered_map; + using Mapped = typename List::iterator; + using IndexMap = HashMap; List list; IndexMap map; bool snapshot_mode{false}; + /// Allows to avoid additional copies in updateValue function + size_t snapshot_up_to_size = 0; + ArenaWithFreeLists arena; uint64_t approximate_data_size{0}; @@ -105,51 +113,68 @@ class SnapshotableHashTable } } + StringRef copyStringInArena(const std::string & value_to_copy) + { + size_t value_to_copy_size = value_to_copy.size(); + char * place_for_key = arena.alloc(value_to_copy_size); + memcpy(reinterpret_cast(place_for_key), reinterpret_cast(value_to_copy.data()), value_to_copy_size); + StringRef updated_value{place_for_key, value_to_copy_size}; + + return updated_value; + } + + public: using iterator = typename List::iterator; using const_iterator = typename List::const_iterator; - using reverse_iterator = typename List::reverse_iterator; - using const_reverse_iterator = typename List::const_reverse_iterator; using ValueUpdater = std::function; - bool insert(const std::string & key, const V & value) + std::pair insert(const std::string & key, const V & value) { - auto it = map.find(key); - if (it == map.end()) + size_t hash_value = map.hash(key); + auto it = map.find(key, hash_value); + + if (!it) { - ListElem elem{key, value, true}; + ListElem elem{copyStringInArena(key), value, true}; auto itr = list.insert(list.end(), elem); - map.emplace(itr->key, itr); + bool inserted; + map.emplace(itr->key, it, inserted, hash_value); + assert(inserted); + + it->getMapped() = itr; updateDataSize(INSERT, key.size(), value.sizeInBytes(), 0); - return true; + return std::make_pair(it, true); } - return false; + return std::make_pair(it, false); } - void insertOrReplace(const std::string & key, const V & value) { - auto it = map.find(key); - uint64_t old_value_size = it == map.end() ? 0 : it->second->value.sizeInBytes(); + size_t hash_value = map.hash(key); + auto it = map.find(key, hash_value); + uint64_t old_value_size = it == map.end() ? 0 : it->getMapped()->value.sizeInBytes(); if (it == map.end()) { - ListElem elem{key, value, true}; + ListElem elem{copyStringInArena(key), value, true}; auto itr = list.insert(list.end(), elem); - map.emplace(itr->key, itr); + bool inserted; + map.emplace(itr->key, it, inserted, hash_value); + assert(inserted); + it->getMapped() = itr; } else { - auto list_itr = it->second; + auto list_itr = it->getMapped(); if (snapshot_mode) { - ListElem elem{key, value, true}; + ListElem elem{list_itr->key, value, true}; list_itr->active_in_map = false; auto new_list_itr = list.insert(list.end(), elem); - map.erase(it); - map.emplace(new_list_itr->key, new_list_itr); + it->getMapped() = new_list_itr; } else { @@ -165,16 +190,18 @@ class SnapshotableHashTable if (it == map.end()) return false; - auto list_itr = it->second; + auto list_itr = it->getMapped(); uint64_t old_data_size = list_itr->value.sizeInBytes(); if (snapshot_mode) { list_itr->active_in_map = false; - map.erase(it); + list_itr->free_key = true; + map.erase(it->getKey()); } else { - map.erase(it); + map.erase(it->getKey()); + arena.free(const_cast(list_itr->key.data), list_itr->key.size); list.erase(list_itr); } @@ -187,48 +214,62 @@ class SnapshotableHashTable return map.find(key) != map.end(); } - const_iterator updateValue(const std::string & key, ValueUpdater updater) + const_iterator updateValue(StringRef key, ValueUpdater updater) { - auto it = map.find(key); + size_t hash_value = map.hash(key); + auto it = map.find(key, hash_value); assert(it != map.end()); - auto list_itr = it->second; + auto list_itr = it->getMapped(); uint64_t old_value_size = list_itr->value.sizeInBytes(); const_iterator ret; if (snapshot_mode) { - auto elem_copy = *(list_itr); - list_itr->active_in_map = false; - map.erase(it); - updater(elem_copy.value); - auto itr = list.insert(list.end(), elem_copy); - map.emplace(itr->key, itr); - ret = itr; + /// We in snapshot mode but updating some node which is already more + /// fresh than snapshot distance. So it will not participate in + /// snapshot and we don't need to copy it. + size_t distance = std::distance(list.begin(), list_itr); + if (distance < snapshot_up_to_size) + { + auto elem_copy = *(list_itr); + list_itr->active_in_map = false; + updater(elem_copy.value); + auto itr = list.insert(list.end(), elem_copy); + it->getMapped() = itr; + ret = itr; + } + else + { + updater(list_itr->value); + ret = list_itr; + } } else { updater(list_itr->value); ret = list_itr; } - updateDataSize(UPDATE_VALUE, key.size(), ret->value.sizeInBytes(), old_value_size); + + updateDataSize(UPDATE_VALUE, key.size, ret->value.sizeInBytes(), old_value_size); return ret; } - const_iterator find(const std::string & key) const + const_iterator find(StringRef key) const { auto map_it = map.find(key); if (map_it != map.end()) - return map_it->second; + return map_it->getMapped(); return list.end(); } - const V & getValue(const std::string & key) const + + const V & getValue(StringRef key) const { auto it = map.find(key); - assert(it != map.end()); - return it->second->value; + assert(it); + return it->getMapped()->value; } void clearOutdatedNodes() @@ -239,29 +280,39 @@ class SnapshotableHashTable { if (!itr->active_in_map) { - updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size(), itr->value.sizeInBytes(), 0); + updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0); + if (itr->free_key) + arena.free(const_cast(itr->key.data), itr->key.size); itr = list.erase(itr); } else + { + assert(!itr->free_key); itr++; + } } } void clear() { - list.clear(); map.clear(); + for (auto itr = list.begin(); itr != list.end(); ++itr) + arena.free(const_cast(itr->key.data), itr->key.size); + list.clear(); updateDataSize(CLEAR, 0, 0, 0); } - void enableSnapshotMode() + void enableSnapshotMode(size_t up_to_size) { snapshot_mode = true; + snapshot_up_to_size = up_to_size; } void disableSnapshotMode() { + snapshot_mode = false; + snapshot_up_to_size = 0; } size_t size() const @@ -279,15 +330,15 @@ class SnapshotableHashTable return approximate_data_size; } + uint64_t keyArenaSize() const + { + return arena.size(); + } + iterator begin() { return list.begin(); } const_iterator begin() const { return list.cbegin(); } iterator end() { return list.end(); } const_iterator end() const { return list.cend(); } - - reverse_iterator rbegin() { return list.rbegin(); } - const_reverse_iterator rbegin() const { return list.crbegin(); } - reverse_iterator rend() { return list.rend(); } - const_reverse_iterator rend() const { return list.crend(); } }; diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index bd13a70252e2..5d7b78d6a28e 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -1,10 +1,13 @@ #include + #include #include +#include + #include #include #include -#include +#include namespace DB @@ -16,20 +19,6 @@ namespace ErrorCodes extern const int CORRUPTED_DATA; } -static String parentPath(const String & path) -{ - auto rslash_pos = path.rfind('/'); - if (rslash_pos > 0) - return path.substr(0, rslash_pos); - return "/"; -} - -static std::string getBaseName(const String & path) -{ - size_t basename_start = path.rfind('/'); - return std::string{&path[basename_start + 1], path.length() - basename_start - 1}; -} - int64_t getZxidFromName(const std::string & filename) { std::filesystem::path path(filename); @@ -148,7 +137,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L if (itr.key != "/") { auto parent_path = parentPath(itr.key); - storage.container.updateValue(parent_path, [&path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); value.stat.numChildren++; }); + storage.container.updateValue(parent_path, [path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); value.stat.numChildren++; }); } } diff --git a/src/Coordination/pathUtils.cpp b/src/Coordination/pathUtils.cpp new file mode 100644 index 000000000000..1e1da339d2eb --- /dev/null +++ b/src/Coordination/pathUtils.cpp @@ -0,0 +1,38 @@ +#include +#include + +namespace DB +{ + +static size_t findLastSlash(StringRef path) +{ + if (path.size == 0) + return std::string::npos; + + for (size_t i = path.size - 1; i > 0; --i) + { + if (path.data[i] == '/') + return i; + } + + if (path.data[0] == '/') + return 0; + + return std::string::npos; +} + +StringRef parentPath(StringRef path) +{ + auto rslash_pos = findLastSlash(path); + if (rslash_pos > 0) + return StringRef{path.data, rslash_pos}; + return "/"; +} + +StringRef getBaseName(StringRef path) +{ + size_t basename_start = findLastSlash(path); + return StringRef{path.data + basename_start + 1, path.size - basename_start - 1}; +} + +} diff --git a/src/Coordination/pathUtils.h b/src/Coordination/pathUtils.h new file mode 100644 index 000000000000..69ed2d8b177f --- /dev/null +++ b/src/Coordination/pathUtils.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + +namespace DB +{ + +StringRef parentPath(StringRef path); + +StringRef getBaseName(StringRef path); + +} diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index d274ee34a889..9c434ebb6537 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -844,10 +844,10 @@ struct IntNode TEST_P(CoordinationTest, SnapshotableHashMapSimple) { DB::SnapshotableHashTable hello; - EXPECT_TRUE(hello.insert("hello", 5)); + EXPECT_TRUE(hello.insert("hello", 5).second); EXPECT_TRUE(hello.contains("hello")); EXPECT_EQ(hello.getValue("hello"), 5); - EXPECT_FALSE(hello.insert("hello", 145)); + EXPECT_FALSE(hello.insert("hello", 145).second); EXPECT_EQ(hello.getValue("hello"), 5); hello.updateValue("hello", [](IntNode & value) { value = 7; }); EXPECT_EQ(hello.getValue("hello"), 7); @@ -859,10 +859,10 @@ TEST_P(CoordinationTest, SnapshotableHashMapSimple) TEST_P(CoordinationTest, SnapshotableHashMapTrySnapshot) { DB::SnapshotableHashTable map_snp; - EXPECT_TRUE(map_snp.insert("/hello", 7)); - EXPECT_FALSE(map_snp.insert("/hello", 145)); - map_snp.enableSnapshotMode(); - EXPECT_FALSE(map_snp.insert("/hello", 145)); + EXPECT_TRUE(map_snp.insert("/hello", 7).second); + EXPECT_FALSE(map_snp.insert("/hello", 145).second); + map_snp.enableSnapshotMode(100000); + EXPECT_FALSE(map_snp.insert("/hello", 145).second); map_snp.updateValue("/hello", [](IntNode & value) { value = 554; }); EXPECT_EQ(map_snp.getValue("/hello"), 554); EXPECT_EQ(map_snp.snapshotSize(), 2); @@ -880,7 +880,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapTrySnapshot) EXPECT_EQ(itr, map_snp.end()); for (size_t i = 0; i < 5; ++i) { - EXPECT_TRUE(map_snp.insert("/hello" + std::to_string(i), i)); + EXPECT_TRUE(map_snp.insert("/hello" + std::to_string(i), i).second); } EXPECT_EQ(map_snp.getValue("/hello3"), 3); @@ -951,7 +951,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize) hello.clear(); EXPECT_EQ(hello.getApproximateDataSize(), 0); - hello.enableSnapshotMode(); + hello.enableSnapshotMode(10000); hello.insert("hello", 1); EXPECT_EQ(hello.getApproximateDataSize(), 9); hello.updateValue("hello", [](IntNode & value) { value = 2; }); @@ -984,7 +984,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize) world.erase("world"); EXPECT_EQ(world.getApproximateDataSize(), 0); - world.enableSnapshotMode(); + world.enableSnapshotMode(100000); world.insert("world", n1); EXPECT_EQ(world.getApproximateDataSize(), 98); world.updateValue("world", [&](Node & value) { value = n2; }); diff --git a/src/Core/BackgroundSchedulePool.cpp b/src/Core/BackgroundSchedulePool.cpp index 9a42f752db26..18c43d8c45fc 100644 --- a/src/Core/BackgroundSchedulePool.cpp +++ b/src/Core/BackgroundSchedulePool.cpp @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB @@ -246,7 +245,6 @@ void BackgroundSchedulePool::threadFunction() setThreadName(thread_name.c_str()); attachToThreadGroup(); - SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); while (!shutdown) { @@ -273,7 +271,6 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() setThreadName((thread_name + "/D").c_str()); attachToThreadGroup(); - SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); while (!shutdown) { diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 07db1a1fafab..26c883b308de 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -12,6 +12,7 @@ #include #include +#include namespace DB @@ -538,7 +539,7 @@ Block Block::sortColumns() const for (auto it = index_by_name.begin(); it != index_by_name.end(); ++it) sorted_index_by_name[i++] = it; } - std::sort(sorted_index_by_name.begin(), sorted_index_by_name.end(), [](const auto & lhs, const auto & rhs) + ::sort(sorted_index_by_name.begin(), sorted_index_by_name.end(), [](const auto & lhs, const auto & rhs) { return lhs->first < rhs->first; }); @@ -754,4 +755,30 @@ void materializeBlockInplace(Block & block) block.getByPosition(i).column = recursiveRemoveSparse(block.getByPosition(i).column->convertToFullColumnIfConst()); } +Block concatenateBlocks(const std::vector & blocks) +{ + if (blocks.empty()) + return {}; + + size_t num_rows = 0; + for (const auto & block : blocks) + num_rows += block.rows(); + + Block out = blocks[0].cloneEmpty(); + MutableColumns columns = out.mutateColumns(); + + for (size_t i = 0; i < columns.size(); ++i) + { + columns[i]->reserve(num_rows); + for (const auto & block : blocks) + { + const auto & tmp_column = *block.getByPosition(i).column; + columns[i]->insertRangeFrom(tmp_column, 0, block.rows()); + } + } + + out.setColumns(std::move(columns)); + return out; +} + } diff --git a/src/Core/Block.h b/src/Core/Block.h index efa5ce7c3264..2624b57880c8 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -203,4 +203,6 @@ ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column Block materializeBlock(const Block & block); void materializeBlockInplace(Block & block); +Block concatenateBlocks(const std::vector & blocks); + } diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index a85b7cff46e3..70a1458c9f03 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -106,6 +106,12 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf) readStringBinary(value.data, buf); return value; } + case Field::Types::Bool: + { + UInt8 value; + readBinary(value, buf); + return bool(value); + } } return Field(); } @@ -346,6 +352,13 @@ Field Field::restoreFromDump(const std::string_view & dump_) return str; } + prefix = std::string_view{"Bool_"}; + if (dump.starts_with(prefix)) + { + bool value = parseFromString(dump.substr(prefix.length())); + return value; + } + prefix = std::string_view{"Array_["}; if (dump.starts_with(prefix)) { diff --git a/src/Core/Field.h b/src/Core/Field.h index 19573ed9831e..b525e3a83aba 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -282,6 +282,7 @@ class Field Int256 = 25, Map = 26, UUID = 27, + Bool = 28, }; }; @@ -323,7 +324,10 @@ class Field template Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t = nullptr); - Field(bool rhs) : Field(castToNearestFieldType(rhs)) {} + Field(bool rhs) : Field(castToNearestFieldType(rhs)) + { + which = Types::Bool; + } /// Create a string inplace. Field(const std::string_view & str) { create(str.data(), str.size()); } @@ -376,7 +380,12 @@ class Field enable_if_not_field_or_bool_or_stringlike_t & operator=(T && rhs); - Field & operator= (bool rhs) { return *this = castToNearestFieldType(rhs); } + Field & operator= (bool rhs) + { + *this = castToNearestFieldType(rhs); + which = Types::Bool; + return *this; + } Field & operator= (const std::string_view & str); Field & operator= (const String & str) { return *this = std::string_view{str}; } @@ -450,6 +459,7 @@ class Field switch (which) { case Types::Null: return false; + case Types::Bool: [[fallthrough]]; case Types::UInt64: return get() < rhs.get(); case Types::UInt128: return get() < rhs.get(); case Types::UInt256: return get() < rhs.get(); @@ -487,6 +497,7 @@ class Field switch (which) { case Types::Null: return true; + case Types::Bool: [[fallthrough]]; case Types::UInt64: return get() <= rhs.get(); case Types::UInt128: return get() <= rhs.get(); case Types::UInt256: return get() <= rhs.get(); @@ -524,6 +535,7 @@ class Field switch (which) { case Types::Null: return true; + case Types::Bool: [[fallthrough]]; case Types::UInt64: return get() == rhs.get(); case Types::Int64: return get() == rhs.get(); case Types::Float64: @@ -580,6 +592,11 @@ class Field case Types::Array: return f(field.template get()); case Types::Tuple: return f(field.template get()); case Types::Map: return f(field.template get()); + case Types::Bool: + { + bool value = bool(field.template get()); + return f(value); + } case Types::Decimal32: return f(field.template get>()); case Types::Decimal64: return f(field.template get>()); case Types::Decimal128: return f(field.template get>()); @@ -739,6 +756,7 @@ template <> struct Field::TypeToEnum>{ static const Typ template <> struct Field::TypeToEnum>{ static const Types::Which value = Types::Decimal256; }; template <> struct Field::TypeToEnum>{ static const Types::Which value = Types::Decimal64; }; template <> struct Field::TypeToEnum{ static const Types::Which value = Types::AggregateFunctionState; }; +template <> struct Field::TypeToEnum{ static const Types::Which value = Types::Bool; }; template <> struct Field::EnumToType { using Type = Null; }; template <> struct Field::EnumToType { using Type = UInt64; }; @@ -758,6 +776,7 @@ template <> struct Field::EnumToType { using Type = Dec template <> struct Field::EnumToType { using Type = DecimalField; }; template <> struct Field::EnumToType { using Type = DecimalField; }; template <> struct Field::EnumToType { using Type = DecimalField; }; +template <> struct Field::EnumToType { using Type = UInt64; }; inline constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t) { @@ -765,6 +784,13 @@ inline constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t) || t == Field::Types::UInt64; } +inline constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) +{ + return t == Field::Types::Int64 + || t == Field::Types::UInt64 + || t == Field::Types::Bool; +} + // Field value getter with type checking in debug builds. template NearestFieldType> & Field::get() @@ -781,7 +807,7 @@ NearestFieldType> & Field::get() // Disregard signedness when converting between int64 types. constexpr Field::Types::Which target = TypeToEnum::value; if (target != which - && (!isInt64OrUInt64FieldType(target) || !isInt64OrUInt64FieldType(which))) + && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which))) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Field get from type {} to type {}", which, target); #endif diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index fb230f412f01..50f6be23f83b 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -204,6 +204,7 @@ namespace MySQLReplication case MYSQL_TYPE_DATE: case MYSQL_TYPE_DATETIME: case MYSQL_TYPE_NEWDATE: + case MYSQL_TYPE_YEAR: { /// No data here. column_meta.emplace_back(0); @@ -214,7 +215,9 @@ namespace MySQLReplication case MYSQL_TYPE_DOUBLE: case MYSQL_TYPE_TIMESTAMP2: case MYSQL_TYPE_DATETIME2: + case MYSQL_TYPE_TIME2: case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_GEOMETRY: { column_meta.emplace_back(UInt16(meta[pos])); pos += 1; @@ -432,6 +435,98 @@ namespace MySQLReplication row.push_back(Field(date_day_number.toUnderType())); break; } + case MYSQL_TYPE_YEAR: { + Int16 val = 0; + payload.readStrict(reinterpret_cast(&val), 1); + row.push_back(Field{UInt16{static_cast(val + 1900)}}); + break; + } + case MYSQL_TYPE_TIME2: + { + UInt64 uintpart = 0UL; + Int32 frac = 0U; + Int64 ltime; + Int64 intpart; + switch (meta) + { + case 0: + { + readBigEndianStrict(payload, reinterpret_cast(&uintpart), 3); + intpart = uintpart - 0x800000L; + ltime = intpart << 24; + break; + } + case 1: + case 2: + { + readBigEndianStrict(payload, reinterpret_cast(&uintpart), 3); + intpart = uintpart - 0x800000L; + readBigEndianStrict(payload, reinterpret_cast(&frac), 1); + if (intpart < 0 && frac > 0) + { + intpart ++; + frac -= 0x100; + } + frac = frac * 10000; + ltime = intpart << 24; + break; + } + case 3: + case 4: + { + readBigEndianStrict(payload, reinterpret_cast(&uintpart), 3); + intpart = uintpart - 0x800000L; + readBigEndianStrict(payload, reinterpret_cast(&frac), 2); + if (intpart < 0 && frac > 0) + { + intpart ++; + frac -= 0x10000; + } + frac = frac * 100; + ltime = intpart << 24; + break; + } + case 5: + case 6: + { + readBigEndianStrict(payload, reinterpret_cast(&uintpart), 6); + intpart = uintpart - 0x800000000000L; + ltime = intpart; + frac = std::abs(intpart % (1L << 24)); + break; + } + default: + { + readBigEndianStrict(payload, reinterpret_cast(&uintpart), 3); + intpart = uintpart - 0x800000L; + ltime = intpart << 24; + break; + } + } + Int64 hh, mm, ss; + bool negative = false; + if (intpart == 0) + { + hh = 0; + mm = 0; + ss = 0; + } + else + { + if (ltime < 0) negative= true; + UInt64 ultime = std::abs(ltime); + intpart = ultime >> 24; + hh = (intpart >> 12) % (1 << 10); + mm = (intpart >> 6) % (1 << 6); + ss = intpart % (1 << 6); + } + + Int64 time_micro = 0; + time_micro = (hh * 3600 + mm * 60 + ss) * 1000000 + std::abs(frac); + if (negative) time_micro = - time_micro; + row.push_back(Field{Int64{time_micro}}); + break; + } case MYSQL_TYPE_DATETIME2: { Int64 val = 0; @@ -585,6 +680,14 @@ namespace MySQLReplication } break; } + case MYSQL_TYPE_SET: + { + UInt32 size = (meta & 0xff); + Bitmap bitmap1; + readBitmap(payload, bitmap1, size); + row.push_back(Field{UInt64{bitmap1.to_ulong()}}); + break; + } case MYSQL_TYPE_BIT: { UInt32 bits = ((meta >> 8) * 8) + (meta & 0xff); @@ -631,6 +734,7 @@ namespace MySQLReplication row.push_back(Field{String{val}}); break; } + case MYSQL_TYPE_GEOMETRY: case MYSQL_TYPE_BLOB: { UInt32 size = 0; diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp index b9098d3308d1..be947623a966 100644 --- a/src/Core/NamesAndTypes.cpp +++ b/src/Core/NamesAndTypes.cpp @@ -1,4 +1,6 @@ #include + +#include #include #include #include @@ -113,7 +115,7 @@ bool NamesAndTypesList::isSubsetOf(const NamesAndTypesList & rhs) const { NamesAndTypes vector(rhs.begin(), rhs.end()); vector.insert(vector.end(), begin(), end()); - std::sort(vector.begin(), vector.end()); + ::sort(vector.begin(), vector.end()); return std::unique(vector.begin(), vector.end()) == vector.begin() + rhs.size(); } @@ -121,16 +123,16 @@ size_t NamesAndTypesList::sizeOfDifference(const NamesAndTypesList & rhs) const { NamesAndTypes vector(rhs.begin(), rhs.end()); vector.insert(vector.end(), begin(), end()); - std::sort(vector.begin(), vector.end()); + ::sort(vector.begin(), vector.end()); return (std::unique(vector.begin(), vector.end()) - vector.begin()) * 2 - size() - rhs.size(); } void NamesAndTypesList::getDifference(const NamesAndTypesList & rhs, NamesAndTypesList & deleted, NamesAndTypesList & added) const { NamesAndTypes lhs_vector(begin(), end()); - std::sort(lhs_vector.begin(), lhs_vector.end()); + ::sort(lhs_vector.begin(), lhs_vector.end()); NamesAndTypes rhs_vector(rhs.begin(), rhs.end()); - std::sort(rhs_vector.begin(), rhs_vector.end()); + ::sort(rhs_vector.begin(), rhs_vector.end()); std::set_difference(lhs_vector.begin(), lhs_vector.end(), rhs_vector.begin(), rhs_vector.end(), std::back_inserter(deleted)); diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 8daf39d9928f..87d7eee0daa2 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -85,16 +85,18 @@ void Settings::addProgramOptions(boost::program_options::options_description & o { for (const auto & field : all()) { - const std::string_view name = field.getName(); - auto on_program_option - = boost::function1([this, name](const std::string & value) { set(name, value); }); - options.add(boost::shared_ptr(new boost::program_options::option_description( - name.data(), - boost::program_options::value()->composing()->notifier(on_program_option), - field.getDescription()))); + addProgramOption(options, field); } } +void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field) +{ + const std::string_view name = field.getName(); + auto on_program_option = boost::function1([this, name](const std::string & value) { set(name, value); }); + options.add(boost::shared_ptr(new boost::program_options::option_description( + name.data(), boost::program_options::value()->composing()->notifier(on_program_option), field.getDescription()))); +} + void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path) { if (config.getBool("skip_check_for_incorrect_settings", false)) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d55be808aa85..c4b4ab778670 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -75,7 +75,11 @@ class IColumn; M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ + M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ + M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ + M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ + M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \ M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \ M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \ @@ -166,6 +170,7 @@ class IColumn; M(Bool, force_index_by_date, false, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \ M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \ M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \ + M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \ M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \ \ M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \ @@ -490,6 +495,7 @@ class IColumn; \ M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \ M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \ + M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(UInt64, max_distributed_depth, 5, "Maximum distributed query depth", 0) \ @@ -549,7 +555,7 @@ class IColumn; /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - + M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. @@ -604,6 +610,7 @@ class IColumn; M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \ M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \ M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \ + M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \ \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \ @@ -711,6 +718,8 @@ struct Settings : public BaseSettings, public IHints<2, Settings static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path); std::vector getAllRegisteredNames() const override; + + void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field); }; /* diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index b62575c97303..5d16f0a5c85e 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -130,4 +130,10 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS, {"JSON", FormatSettings::EscapingRule::JSON}, {"XML", FormatSettings::EscapingRule::XML}, {"Raw", FormatSettings::EscapingRule::Raw}}) + +IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, + {{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN}, + {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, + {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 106589f5d24f..d29e4f15c274 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -172,4 +172,6 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparin DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) +DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) + } diff --git a/src/Core/TypeId.h b/src/Core/TypeId.h index 3218db10778a..390587731842 100644 --- a/src/Core/TypeId.h +++ b/src/Core/TypeId.h @@ -10,12 +10,12 @@ namespace DB * Returns TypeIndex::Nothing if type was not present in TypeIndex; * Returns TypeIndex element otherwise. * - * @example TypeId == TypeIndex::UInt8 - * @example TypeId == TypeIndex::Nothing + * @example TypeToTypeIndex == TypeIndex::UInt8 + * @example TypeToTypeIndex == TypeIndex::Nothing */ -template inline constexpr TypeIndex TypeId = TypeIndex::Nothing; +template inline constexpr TypeIndex TypeToTypeIndex = TypeIndex::Nothing; -template struct ReverseTypeIdT : std::false_type {}; +template struct TypeIndexToTypeHelper : std::false_type {}; /** * Obtain real type from TypeIndex if possible. @@ -23,14 +23,14 @@ template struct ReverseTypeIdT : std::false_type {}; * Returns a type alias if is corresponds to TypeIndex value. * Yields a compiler error otherwise. * - * @example ReverseTypeId == UInt8 + * @example TypeIndexToType == UInt8 */ -template using ReverseTypeId = typename ReverseTypeIdT::T; -template constexpr bool HasReverseTypeId = ReverseTypeIdT::value; +template using TypeIndexToType = typename TypeIndexToTypeHelper::T; +template constexpr bool TypeIndexHasType = TypeIndexToTypeHelper::value; #define TYPEID_MAP(_A) \ - template <> inline constexpr TypeIndex TypeId<_A> = TypeIndex::_A; \ - template <> struct ReverseTypeIdT : std::true_type { using T = _A; }; + template <> inline constexpr TypeIndex TypeToTypeIndex<_A> = TypeIndex::_A; \ + template <> struct TypeIndexToTypeHelper : std::true_type { using T = _A; }; TYPEID_MAP(UInt8) TYPEID_MAP(UInt16) @@ -58,4 +58,7 @@ TYPEID_MAP(String) struct Array; TYPEID_MAP(Array) + +#undef TYPEID_MAP + } diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 11dd9bf96f13..5d37f8cf361e 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -7,9 +7,7 @@ #cmakedefine01 USE_RDKAFKA #cmakedefine01 USE_AMQPCPP #cmakedefine01 USE_EMBEDDED_COMPILER -#cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_SSL -#cmakedefine01 USE_OPENCL #cmakedefine01 USE_LDAP #cmakedefine01 USE_ROCKSDB #cmakedefine01 USE_LIBPQXX @@ -18,3 +16,6 @@ #cmakedefine01 USE_NLP #cmakedefine01 USE_KRB5 #cmakedefine01 USE_FILELOG +#cmakedefine01 USE_ODBC +#cmakedefine01 USE_REPLXX +#cmakedefine01 USE_JEMALLOC diff --git a/src/Core/examples/CMakeLists.txt b/src/Core/examples/CMakeLists.txt index c8846eb1743e..cd74ce68136c 100644 --- a/src/Core/examples/CMakeLists.txt +++ b/src/Core/examples/CMakeLists.txt @@ -1,6 +1,5 @@ add_executable (string_pool string_pool.cpp) -target_link_libraries (string_pool PRIVATE clickhouse_common_io) -target_include_directories (string_pool SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) +target_link_libraries (string_pool PRIVATE clickhouse_common_io ch_contrib::sparsehash) add_executable (field field.cpp) target_link_libraries (field PRIVATE dbms) @@ -10,9 +9,6 @@ target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io) add_executable (mysql_protocol mysql_protocol.cpp) target_link_libraries (mysql_protocol PRIVATE dbms) -if(USE_SSL) - target_include_directories (mysql_protocol SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR}) -endif() add_executable (coro coro.cpp) target_link_libraries (coro PRIVATE clickhouse_common_io) diff --git a/src/Core/tests/gtest_field.cpp b/src/Core/tests/gtest_field.cpp index 5230f13bf8a3..5585442d835c 100644 --- a/src/Core/tests/gtest_field.cpp +++ b/src/Core/tests/gtest_field.cpp @@ -7,14 +7,14 @@ GTEST_TEST(Field, FromBool) { { Field f{false}; - ASSERT_EQ(f.getType(), Field::Types::UInt64); + ASSERT_EQ(f.getType(), Field::Types::Bool); ASSERT_EQ(f.get(), 0); ASSERT_EQ(f.get(), false); } { Field f{true}; - ASSERT_EQ(f.getType(), Field::Types::UInt64); + ASSERT_EQ(f.getType(), Field::Types::Bool); ASSERT_EQ(f.get(), 1); ASSERT_EQ(f.get(), true); } @@ -22,7 +22,7 @@ GTEST_TEST(Field, FromBool) { Field f; f = false; - ASSERT_EQ(f.getType(), Field::Types::UInt64); + ASSERT_EQ(f.getType(), Field::Types::Bool); ASSERT_EQ(f.get(), 0); ASSERT_EQ(f.get(), false); } @@ -30,7 +30,7 @@ GTEST_TEST(Field, FromBool) { Field f; f = true; - ASSERT_EQ(f.getType(), Field::Types::UInt64); + ASSERT_EQ(f.getType(), Field::Types::Bool); ASSERT_EQ(f.get(), 1); ASSERT_EQ(f.get(), true); } diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index dc8c99b06bcb..bdb399788250 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -1,14 +1,15 @@ #pragma once -#include +#include +#include + +#include #include +#include #include #include #include -#include -#include - namespace DB { @@ -59,7 +60,7 @@ class DataTypeDecimalBase : public IDataType public: using FieldType = T; using ColumnType = ColumnDecimal; - static constexpr auto type_id = TypeId; + static constexpr auto type_id = TypeToTypeIndex; static constexpr bool is_parametric = true; @@ -75,7 +76,7 @@ class DataTypeDecimalBase : public IDataType throw Exception("Scale " + std::to_string(scale) + " is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); } - TypeIndex getTypeId() const override { return TypeId; } + TypeIndex getTypeId() const override { return TypeToTypeIndex; } Field getDefault() const override; MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index 38b2109eec64..57f67ddad7af 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -13,7 +13,7 @@ class DataTypeLowCardinality : public IDataType DataTypePtr dictionary_type; public: - DataTypeLowCardinality(DataTypePtr dictionary_type_); + explicit DataTypeLowCardinality(DataTypePtr dictionary_type_); const DataTypePtr & getDictionaryType() const { return dictionary_type; } diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h index 59dc26ed13a8..01c298a4a33d 100644 --- a/src/DataTypes/DataTypeNumberBase.h +++ b/src/DataTypes/DataTypeNumberBase.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -20,13 +21,13 @@ class DataTypeNumberBase : public IDataType public: static constexpr bool is_parametric = false; static constexpr auto family_name = TypeName; - static constexpr auto type_id = TypeId; + static constexpr auto type_id = TypeToTypeIndex; using FieldType = T; using ColumnType = ColumnVector; const char * getFamilyName() const override { return TypeName.data(); } - TypeIndex getTypeId() const override { return TypeId; } + TypeIndex getTypeId() const override { return TypeToTypeIndex; } Field getDefault() const override; diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 7fa3a394be8a..b52d20242042 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -92,5 +92,7 @@ void registerDataTypeString(DataTypeFactory & factory) factory.registerAlias("BINARY LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("BINARY VARYING", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARBINARY", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("GEOMETRY", "String", DataTypeFactory::CaseInsensitive); //mysql + } } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index ad6d4e2943b4..a5e9868cf890 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -32,6 +32,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; extern const int ILLEGAL_INDEX; + extern const int LOGICAL_ERROR; } @@ -156,8 +157,19 @@ MutableColumnPtr DataTypeTuple::createColumn() const MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const { - const auto & element_serializations = - assert_cast(serialization).getElementsSerializations(); + /// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed + /// several times to allow to reconstruct the substream path name. + /// Here we don't need substream path name, so we drop first several wrapper serializations. + + const auto * current_serialization = &serialization; + while (const auto * serialization_named = typeid_cast(current_serialization)) + current_serialization = serialization_named->getNested().get(); + + const auto * serialization_tuple = typeid_cast(current_serialization); + if (!serialization_tuple) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected serialization to create column of type Tuple"); + + const auto & element_serializations = serialization_tuple->getElementsSerializations(); size_t size = elems.size(); assert(element_serializations.size() == size); diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index 199f7796f025..fb590dd1d4b2 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -38,7 +38,7 @@ class DataTypeDecimal final : public DataTypeDecimalBase const char * getFamilyName() const override { return family_name; } std::string doGetName() const override; - TypeIndex getTypeId() const override { return TypeId; } + TypeIndex getTypeId() const override { return TypeToTypeIndex; } bool canBePromoted() const override { return true; } DataTypePtr promoteNumericType() const override; diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 0c9a410077fc..d85f52a7e6b4 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -86,7 +86,10 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("INT UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive); factory.registerAlias("INTEGER UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive); factory.registerAlias("BIGINT UNSIGNED", "UInt64", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BIT", "UInt64", DataTypeFactory::CaseInsensitive); + factory.registerAlias("BIT", "UInt64", DataTypeFactory::CaseInsensitive); /// MySQL + factory.registerAlias("SET", "UInt64", DataTypeFactory::CaseInsensitive); /// MySQL + factory.registerAlias("YEAR", "UInt16", DataTypeFactory::CaseInsensitive); + factory.registerAlias("TIME", "Int64", DataTypeFactory::CaseInsensitive); } } diff --git a/src/DataTypes/EnumValues.cpp b/src/DataTypes/EnumValues.cpp index ab5ea0ca2492..ffa8f55a9745 100644 --- a/src/DataTypes/EnumValues.cpp +++ b/src/DataTypes/EnumValues.cpp @@ -1,5 +1,7 @@ #include #include +#include + namespace DB { @@ -18,7 +20,7 @@ EnumValues::EnumValues(const Values & values_) if (values.empty()) throw Exception{"DataTypeEnum enumeration cannot be empty", ErrorCodes::EMPTY_DATA_PASSED}; - std::sort(std::begin(values), std::end(values), [] (auto & left, auto & right) + ::sort(std::begin(values), std::end(values), [] (auto & left, auto & right) { return left.second < right.second; }); diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index c1a8cacd5c25..8ca5ffac7c5b 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -152,4 +152,9 @@ DataTypePtr FieldToDataType::operator() (const AggregateFunctionStateData & x) c return DataTypeFactory::instance().get(name); } +DataTypePtr FieldToDataType::operator()(const bool &) const +{ + return DataTypeFactory::instance().get("Bool"); +} + } diff --git a/src/DataTypes/FieldToDataType.h b/src/DataTypes/FieldToDataType.h index ca83ce868fc6..72575c070f57 100644 --- a/src/DataTypes/FieldToDataType.h +++ b/src/DataTypes/FieldToDataType.h @@ -38,6 +38,7 @@ class FieldToDataType : public StaticVisitor DataTypePtr operator() (const DecimalField & x) const; DataTypePtr operator() (const DecimalField & x) const; DataTypePtr operator() (const AggregateFunctionStateData & x) const; + DataTypePtr operator() (const bool & x) const; }; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 9f3458b1ecef..5bc089e085fd 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -1,14 +1,15 @@ #pragma once #include -#include #include #include #include +#include #include #include #include + namespace DB { @@ -475,7 +476,7 @@ template inline bool isColumnedAsDecimalT(const DataType & data_type) { const WhichDataType which(data_type); - return (which.isDecimal() || which.isDateTime64()) && which.idx == TypeId; + return (which.isDecimal() || which.isDateTime64()) && which.idx == TypeToTypeIndex; } template @@ -522,6 +523,7 @@ inline bool isBool(const DataTypePtr & data_type) template constexpr bool IsDataTypeDecimal = false; template constexpr bool IsDataTypeNumber = false; template constexpr bool IsDataTypeDateOrDateTime = false; +template constexpr bool IsDataTypeEnum = false; template constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal || IsDataTypeNumber; @@ -546,4 +548,9 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime = tru template <> inline constexpr bool IsDataTypeDateOrDateTime = true; template <> inline constexpr bool IsDataTypeDateOrDateTime = true; +template +class DataTypeEnum; + +template inline constexpr bool IsDataTypeEnum> = true; + } diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 5cdc037d5cbd..7df4a956c1a2 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -167,8 +167,10 @@ String getNameForSubstreamPath( /// Because nested data may be represented not by Array of Tuple, /// but by separate Array columns with names in a form of a.b, /// and name is encoded as a whole. - stream_name += (escape_tuple_delimiter && it->escape_tuple_delimiter ? - escapeForFileName(".") : ".") + escapeForFileName(it->tuple_element_name); + if (escape_tuple_delimiter && it->escape_tuple_delimiter) + stream_name += escapeForFileName("." + it->tuple_element_name); + else + stream_name += "." + it->tuple_element_name; } } diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index e3b535a2a11b..30ee5e98b74e 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -37,10 +37,11 @@ void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr) con { size_t size; readVarUInt(size, istr); - field = Array(size); + field = Array(); Array & arr = get(field); + arr.reserve(size); for (size_t i = 0; i < size; ++i) - nested->deserializeBinary(arr[i], istr); + nested->deserializeBinary(arr.emplace_back(), istr); } diff --git a/src/DataTypes/Serializations/SerializationInfo.cpp b/src/DataTypes/Serializations/SerializationInfo.cpp index 42d3d14b672e..22df95fc8f74 100644 --- a/src/DataTypes/Serializations/SerializationInfo.cpp +++ b/src/DataTypes/Serializations/SerializationInfo.cpp @@ -158,6 +158,19 @@ void SerializationInfoByName::add(const SerializationInfoByName & other) } } +void SerializationInfoByName::replaceData(const SerializationInfoByName & other) +{ + for (const auto & [name, new_info] : other) + { + auto & old_info = (*this)[name]; + + if (old_info) + old_info->replaceData(*new_info); + else + old_info = new_info->clone(); + } +} + void SerializationInfoByName::writeJSON(WriteBuffer & out) const { Poco::JSON::Object object; diff --git a/src/DataTypes/Serializations/SerializationInfo.h b/src/DataTypes/Serializations/SerializationInfo.h index f7af5d77217d..d83fc16f2f6d 100644 --- a/src/DataTypes/Serializations/SerializationInfo.h +++ b/src/DataTypes/Serializations/SerializationInfo.h @@ -89,6 +89,11 @@ class SerializationInfoByName : public std::unordered_map()) + field = Map(); + Map & map = field.get(); + map.reserve(size); + for (size_t i = 0; i < size; ++i) { Tuple tuple(2); key->deserializeBinary(tuple[0], istr); value->deserializeBinary(tuple[1], istr); - elem = std::move(tuple); + map.push_back(std::move(tuple)); } } diff --git a/src/DataTypes/Serializations/SerializationNamed.h b/src/DataTypes/Serializations/SerializationNamed.h index 91db0cf67f4b..343b96c16e30 100644 --- a/src/DataTypes/Serializations/SerializationNamed.h +++ b/src/DataTypes/Serializations/SerializationNamed.h @@ -5,6 +5,11 @@ namespace DB { +/// Serialization wrapper that acts like nested serialization, +/// but adds a passed name to the substream path like the +/// read column was the tuple element with this name. +/// It's used while reading subcolumns of complex types. +/// In particular while reading components of named tuples. class SerializationNamed final : public SerializationWrapper { private: diff --git a/src/DataTypes/Serializations/SerializationNumber.cpp b/src/DataTypes/Serializations/SerializationNumber.cpp index c5e2b31e0437..4b6b79151bcb 100644 --- a/src/DataTypes/Serializations/SerializationNumber.cpp +++ b/src/DataTypes/Serializations/SerializationNumber.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include namespace DB diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index cd5a6b65a3ce..8dc15fc98410 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -44,11 +43,11 @@ void SerializationTuple::deserializeBinary(Field & field, ReadBuffer & istr) con { const size_t size = elems.size(); - Tuple tuple(size); - for (const auto i : collections::range(0, size)) - elems[i]->deserializeBinary(tuple[i], istr); - - field = tuple; + field = Tuple(); + Tuple & tuple = get(field); + tuple.reserve(size); + for (size_t i = 0; i < size; ++i) + elems[i]->deserializeBinary(tuple.emplace_back(), istr); } void SerializationTuple::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const @@ -73,7 +72,7 @@ static void addElementSafe(size_t num_elems, IColumn & column, F && impl) // Check that all columns now have the same size. size_t new_size = column.size(); - for (auto i : collections::range(1, num_elems)) + for (size_t i = 1; i < num_elems; ++i) { const auto & element_column = extractElementColumn(column, i); if (element_column.size() != new_size) @@ -87,7 +86,7 @@ static void addElementSafe(size_t num_elems, IColumn & column, F && impl) } catch (...) { - for (const auto & i : collections::range(0, num_elems)) + for (size_t i = 0; i < num_elems; ++i) { auto & element_column = extractElementColumn(column, i); if (element_column.size() > old_size) @@ -102,7 +101,7 @@ void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr) { addElementSafe(elems.size(), column, [&] { - for (const auto & i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) elems[i]->deserializeBinary(extractElementColumn(column, i), istr); }); } @@ -110,7 +109,7 @@ void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr) void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('(', ostr); - for (const auto i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) { if (i != 0) writeChar(',', ostr); @@ -126,7 +125,7 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co addElementSafe(elems.size(), column, [&] { - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < size; ++i) { skipWhitespaceIfAny(istr); if (i != 0) @@ -158,7 +157,7 @@ void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_nu && have_explicit_names) { writeChar('{', ostr); - for (const auto i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) { if (i != 0) { @@ -173,7 +172,7 @@ void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_nu else { writeChar('[', ostr); - for (const auto i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) { if (i != 0) writeChar(',', ostr); @@ -195,7 +194,7 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr addElementSafe(elems.size(), column, [&] { // Require all elements but in arbitrary order. - for (auto i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) { if (i > 0) { @@ -226,7 +225,7 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr addElementSafe(elems.size(), column, [&] { - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < size; ++i) { skipWhitespaceIfAny(istr); if (i != 0) @@ -246,7 +245,7 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeCString("", ostr); - for (const auto i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) { writeCString("", ostr); elems[i]->serializeTextXML(extractElementColumn(column, i), row_num, ostr, settings); @@ -257,7 +256,7 @@ void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - for (const auto i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) { if (i != 0) writeChar(settings.csv.tuple_delimiter, ostr); @@ -270,7 +269,7 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, addElementSafe(elems.size(), column, [&] { const size_t size = elems.size(); - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < size; ++i) { if (i != 0) { @@ -362,7 +361,7 @@ void SerializationTuple::serializeBinaryBulkWithMultipleStreams( { auto * tuple_state = checkAndGetState(state); - for (const auto i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) { const auto & element_col = extractElementColumn(column, i); elems[i]->serializeBinaryBulkWithMultipleStreams(element_col, offset, limit, settings, tuple_state->states[i]); @@ -382,7 +381,7 @@ void SerializationTuple::deserializeBinaryBulkWithMultipleStreams( auto & column_tuple = assert_cast(*mutable_column); settings.avg_value_size_hint = 0; - for (const auto i : collections::range(0, elems.size())) + for (size_t i = 0; i < elems.size(); ++i) elems[i]->deserializeBinaryBulkWithMultipleStreams(column_tuple.getColumnPtr(i), limit, settings, tuple_state->states[i], cache); } diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index f8d10535be24..22f6a0775040 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -530,4 +530,16 @@ DataTypePtr getLeastSupertype(const DataTypes & types) throw Exception(getExceptionMessagePrefix(types), ErrorCodes::NO_COMMON_TYPE); } +DataTypePtr tryGetLeastSupertype(const DataTypes & types) +{ + try + { + return getLeastSupertype(types); + } + catch (...) + { + return nullptr; + } +} + } diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 57e011a0529c..c35ec7d722c9 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -14,4 +14,7 @@ namespace DB */ DataTypePtr getLeastSupertype(const DataTypes & types); +/// Same as above but return nullptr instead of throwing exception. +DataTypePtr tryGetLeastSupertype(const DataTypes & types); + } diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index cb0c1cdae953..721bf79199b1 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -80,7 +80,7 @@ void DatabaseAtomic::drop(ContextPtr) } catch (...) { - LOG_WARNING(log, getCurrentExceptionMessage(true)); + LOG_WARNING(log, fmt::runtime(getCurrentExceptionMessage(true))); } fs::remove_all(getMetadataPath()); } @@ -469,7 +469,7 @@ void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & } catch (...) { - LOG_WARNING(log, getCurrentExceptionMessage(true)); + LOG_WARNING(log, fmt::runtime(getCurrentExceptionMessage(true))); } } @@ -482,7 +482,7 @@ void DatabaseAtomic::tryRemoveSymlink(const String & table_name) } catch (...) { - LOG_WARNING(log, getCurrentExceptionMessage(true)); + LOG_WARNING(log, fmt::runtime(getCurrentExceptionMessage(true))); } } @@ -527,7 +527,7 @@ void DatabaseAtomic::renameDatabase(ContextPtr query_context, const String & new } catch (...) { - LOG_WARNING(log, getCurrentExceptionMessage(true)); + LOG_WARNING(log, fmt::runtime(getCurrentExceptionMessage(true))); } auto new_name_escaped = escapeForFileName(new_name); diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 1ff84b53eee9..7f8f1b917d7a 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -151,7 +152,7 @@ DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(ContextPtr, const Filt if (!filter_by_table_name || filter_by_table_name(table_name)) filtered_tables.push_back(table_name); } - std::sort(filtered_tables.begin(), filtered_tables.end()); + ::sort(filtered_tables.begin(), filtered_tables.end()); return std::make_unique(*this, std::move(filtered_tables)); } diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 3309d25b1c28..a92c19f67c00 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -78,7 +78,9 @@ ASTPtr DatabaseMemory::getCreateDatabaseQuery() const auto create_query = std::make_shared(); create_query->setDatabase(getDatabaseName()); create_query->set(create_query->storage, std::make_shared()); - create_query->storage->set(create_query->storage->engine, makeASTFunction(getEngineName())); + auto engine = makeASTFunction(getEngineName()); + engine->no_empty_args = true; + create_query->storage->set(create_query->storage->engine, engine); if (const auto comment_value = getDatabaseComment(); !comment_value.empty()) create_query->set(create_query->comment, std::make_shared(comment_value)); diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 165bad950f54..29591a5f88f3 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -316,7 +316,7 @@ void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_na } catch (...) { - LOG_WARNING(log, getCurrentExceptionMessage(__PRETTY_FUNCTION__)); + LOG_WARNING(log, fmt::runtime(getCurrentExceptionMessage(__PRETTY_FUNCTION__))); attachTable(local_context, table_name, table, table_data_path_relative); if (renamed) fs::rename(table_metadata_path_drop, table_metadata_path); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 84d2edd1bb18..d9d9f5b45f60 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -142,7 +142,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const "It's possible if the first replica is not fully created yet " "or if the last replica was just dropped or due to logical error", database_name); Int32 cversion = stat.cversion; - std::sort(hosts.begin(), hosts.end()); + ::sort(hosts.begin(), hosts.end()); std::vector futures; futures.reserve(hosts.size()); diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index ce5a3e9a947f..9dbe611537b8 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -315,6 +316,47 @@ getTableOutput(const String & database_name, const String & table_name, ContextM return std::move(res.pipeline); } +static inline String reWriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings) +{ + Block tables_columns_sample_block + { + { std::make_shared(), "column_name" }, + { std::make_shared(), "column_type" } + }; + + const String & query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS" + " WHERE TABLE_SCHEMA = '" + backQuoteIfNeed(database_name) + + "' AND TABLE_NAME = '" + backQuoteIfNeed(table_name) + "' ORDER BY ORDINAL_POSITION"; + + StreamSettings mysql_input_stream_settings(global_settings, false, true); + auto mysql_source = std::make_unique(connection, query, tables_columns_sample_block, mysql_input_stream_settings); + + Block block; + WriteBufferFromOwnString query_columns; + QueryPipeline pipeline(std::move(mysql_source)); + PullingPipelineExecutor executor(pipeline); + while (executor.pull(block)) + { + const auto & column_name_col = *block.getByPosition(0).column; + const auto & column_type_col = *block.getByPosition(1).column; + size_t rows = block.rows(); + for (size_t i = 0; i < rows; ++i) + { + String column_name = column_name_col[i].safeGet(); + String column_type = column_type_col[i].safeGet(); + //we can do something special conversion to guarantee select results is the same as the binlog parse results + if (column_type.starts_with("set")) + { + query_columns << (backQuote(column_name) + " + 0"); + } else + query_columns << backQuote(column_name); + query_columns << ","; + } + } + String query_columns_str = query_columns.str(); + return query_columns_str.substr(0, query_columns_str.length() - 1); +} + static inline void dumpDataForTables( mysqlxx::Pool::Entry & connection, const std::unordered_map & need_dumping_tables, const String & query_prefix, const String & database_name, const String & mysql_database_name, @@ -334,9 +376,10 @@ static inline void dumpDataForTables( auto pipeline = getTableOutput(database_name, table_name, query_context); StreamSettings mysql_input_stream_settings(context->getSettingsRef()); - auto input = std::make_unique( - connection, "SELECT * FROM " + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name), - pipeline.getHeader(), mysql_input_stream_settings); + String mysql_select_all_query = "SELECT " + reWriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettings()) + " FROM " + + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name); + LOG_INFO(&Poco::Logger::get("MaterializedMySQLSyncThread(" + database_name + ")"), "mysql_select_all_query is {}", mysql_select_all_query); + auto input = std::make_unique(connection, mysql_select_all_query, pipeline.getHeader(), mysql_input_stream_settings); auto counting = std::make_shared(pipeline.getHeader()); Pipe pipe(std::move(input)); pipe.addTransform(counting); diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index f4dab8a91a86..fb7425872856 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -94,7 +94,7 @@ bool DatabaseSQLite::checkSQLiteTable(const String & table_name) const if (!sqlite_db) sqlite_db = openSQLiteDB(database_path, getContext(), /* throw_on_error */true); - const String query = fmt::format("SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';", table_name); + const String query = fmt::format("SELECT name FROM sqlite_master WHERE type='table' AND name='{}';", table_name); auto callback_get_data = [](void * res, int, char **, char **) -> int { diff --git a/src/Databases/SQLite/SQLiteUtils.cpp b/src/Databases/SQLite/SQLiteUtils.cpp index 954576d9c05c..5b38caeabeeb 100644 --- a/src/Databases/SQLite/SQLiteUtils.cpp +++ b/src/Databases/SQLite/SQLiteUtils.cpp @@ -20,7 +20,7 @@ void processSQLiteError(const String & message, bool throw_on_error) if (throw_on_error) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, message); else - LOG_ERROR(&Poco::Logger::get("SQLiteEngine"), message); + LOG_ERROR(&Poco::Logger::get("SQLiteEngine"), fmt::runtime(message)); } diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt index b1b3d6d55e0a..19e82c45cc2e 100644 --- a/src/Dictionaries/CMakeLists.txt +++ b/src/Dictionaries/CMakeLists.txt @@ -33,14 +33,11 @@ target_link_libraries(clickhouse_dictionaries string_utils ) -target_link_libraries(clickhouse_dictionaries - PUBLIC - abseil_swiss_tables) +target_link_libraries(clickhouse_dictionaries PUBLIC ch_contrib::abseil_swiss_tables) -if(USE_CASSANDRA) - target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${CASSANDRA_INCLUDE_DIR}) +if (TARGET ch_contrib::cassandra) + target_link_libraries(clickhouse_dictionaries PRIVATE ch_contrib::cassandra) endif() add_subdirectory(Embedded) - -target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR}) +target_link_libraries(clickhouse_dictionaries PRIVATE ch_contrib::sparsehash) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index c21ea763ac39..cad3e3b8799c 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -271,7 +271,6 @@ ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & k if (dictionary_key_type == DictionaryKeyType::Complex) dict_struct.validateKeyTypes(key_types); - DictionaryKeysArenaHolder arena_holder; DictionaryKeysExtractor extractor(key_columns, arena_holder.getComplexKeyArena()); const auto keys = extractor.extractAllKeys(); @@ -494,7 +493,8 @@ Pipe CacheDictionary::read(const Names & column_names, size if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { auto keys = cache_storage_ptr->getCachedSimpleKeys(); - key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared(), dict_struct.id->name)}; + auto keys_column = getColumnFromPODArray(std::move(keys)); + key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; } else { diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index 5fd1bd420c6b..d6d04075a3d0 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -553,6 +553,7 @@ class CacheDictionaryStorage final : public ICacheDictionaryStorage ContainerType, ContainerType, ContainerType, + ContainerType, ContainerType, ContainerType, ContainerType, diff --git a/src/Dictionaries/CassandraHelpers.cpp b/src/Dictionaries/CassandraHelpers.cpp index a33ab288a345..235e29b5bd89 100644 --- a/src/Dictionaries/CassandraHelpers.cpp +++ b/src/Dictionaries/CassandraHelpers.cpp @@ -58,15 +58,15 @@ void cassandraLogCallback(const CassLogMessage * message, void * data) { Poco::Logger * logger = static_cast(data); if (message->severity == CASS_LOG_CRITICAL || message->severity == CASS_LOG_ERROR) - LOG_ERROR(logger, message->message); + LOG_ERROR(logger, fmt::runtime(message->message)); else if (message->severity == CASS_LOG_WARN) - LOG_WARNING(logger, message->message); + LOG_WARNING(logger, fmt::runtime(message->message)); else if (message->severity == CASS_LOG_INFO) - LOG_INFO(logger, message->message); + LOG_INFO(logger, fmt::runtime(message->message)); else if (message->severity == CASS_LOG_DEBUG) - LOG_DEBUG(logger, message->message); + LOG_DEBUG(logger, fmt::runtime(message->message)); else if (message->severity == CASS_LOG_TRACE) - LOG_TRACE(logger, message->message); + LOG_TRACE(logger, fmt::runtime(message->message)); } } diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 1e6a4a5fb44b..5c2b6b27afd3 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -682,6 +682,15 @@ static ColumnPtr getColumnFromPODArray(const PaddedPODArray & array) return column_vector; } +template +static ColumnPtr getColumnFromPODArray(PaddedPODArray && array) +{ + auto column_vector = ColumnVector::create(); + column_vector->getData() = std::move(array); + + return column_vector; +} + template static ColumnPtr getColumnFromPODArray(const PaddedPODArray & array, size_t start, size_t length) { diff --git a/src/Dictionaries/DictionarySource.cpp b/src/Dictionaries/DictionarySource.cpp index d3058db87f48..526d9fc85cdc 100644 --- a/src/Dictionaries/DictionarySource.cpp +++ b/src/Dictionaries/DictionarySource.cpp @@ -60,8 +60,8 @@ class DictionarySource : public SourceWithProgress const auto & attributes_types_to_read = coordinator->getAttributesTypesToRead(); const auto & attributes_default_values_columns = coordinator->getAttributesDefaultValuesColumns(); - const auto & dictionary = coordinator->getDictionary(); - auto attributes_columns = dictionary->getColumns( + const auto & read_columns_func = coordinator->getReadColumnsFunc(); + auto attributes_columns = read_columns_func( attributes_names_to_read, attributes_types_to_read, key_columns, diff --git a/src/Dictionaries/DictionarySource.h b/src/Dictionaries/DictionarySource.h index 0237e1338dfb..7809c9584198 100644 --- a/src/Dictionaries/DictionarySource.h +++ b/src/Dictionaries/DictionarySource.h @@ -19,6 +19,8 @@ class DictionarySourceCoordinator final : public shared_ptr_helper; + Pipe read(size_t num_streams); private: @@ -31,6 +33,15 @@ class DictionarySourceCoordinator final : public shared_ptr_helpergetColumns(attribute_names, result_types, key_columns, key_types, default_values_columns); + }) { initialize(column_names); } @@ -45,6 +56,31 @@ class DictionarySourceCoordinator final : public shared_ptr_helpergetColumns(attribute_names, result_types, key_columns, key_types, default_values_columns); + }) + { + initialize(column_names); + } + + explicit DictionarySourceCoordinator( + std::shared_ptr dictionary_, + const Names & column_names, + ColumnsWithTypeAndName && key_columns_with_type_, + ColumnsWithTypeAndName && data_columns_with_type_, + size_t max_block_size_, + ReadColumnsFunc read_columns_func_) + : dictionary(std::move(dictionary_)) + , key_columns_with_type(std::move(key_columns_with_type_)) + , data_columns_with_type(std::move(data_columns_with_type_)) + , max_block_size(max_block_size_) + , read_columns_func(std::move(read_columns_func_)) { initialize(column_names); } @@ -61,6 +97,8 @@ class DictionarySourceCoordinator final : public shared_ptr_helper & getAttributesDefaultValuesColumns() const { return attributes_default_values_columns; } + const ReadColumnsFunc & getReadColumnsFunc() const { return read_columns_func; } + const std::shared_ptr & getDictionary() const { return dictionary; } void initialize(const Names & column_names); @@ -79,6 +117,8 @@ class DictionarySourceCoordinator final : public shared_ptr_helper attributes_default_values_columns; const size_t max_block_size; + ReadColumnsFunc read_columns_func; + std::atomic parallel_read_block_index = 0; }; diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index aca566c9258e..3e29f3efe766 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -1,18 +1,21 @@ -#include "DictionaryStructure.h" +#include + +#include +#include +#include + +#include +#include + +#include + +#include #include #include #include #include #include #include -#include -#include -#include -#include - -#include -#include -#include namespace DB @@ -45,8 +48,8 @@ std::optional tryGetAttributeUnderlyingType(TypeIndex i switch (index) /// Special cases which do not map TypeIndex::T -> AttributeUnderlyingType::T { case TypeIndex::Date: return AttributeUnderlyingType::UInt16; + case TypeIndex::Date32: return AttributeUnderlyingType::Int32; case TypeIndex::DateTime: return AttributeUnderlyingType::UInt32; - case TypeIndex::DateTime64: return AttributeUnderlyingType::UInt64; default: break; } @@ -379,7 +382,8 @@ std::vector DictionaryStructure::getAttributes( void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix) { - const char * range_default_type = "Date"; + static constexpr auto range_default_type = "Date"; + if (config.has(structure_prefix + ".range_min")) range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type)); @@ -392,7 +396,10 @@ void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConf "Dictionary structure should have both 'range_min' and 'range_max' either specified or not."); } - if (range_min && range_max && !range_min->type->equals(*range_max->type)) + if (!range_min) + return; + + if (!range_min->type->equals(*range_max->type)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary structure 'range_min' and 'range_max' should have same type, " @@ -402,15 +409,20 @@ void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConf range_max->type->getName()); } - if (range_min && !range_min->type->isValueRepresentedByInteger()) + WhichDataType range_type(range_min->type); + + bool valid_range = range_type.isInt() || range_type.isUInt() || range_type.isDecimal() || range_type.isFloat() || range_type.isEnum() + || range_type.isDate() || range_type.isDate32() || range_type.isDateTime() || range_type.isDateTime64(); + + if (!valid_range) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum." + "Dictionary structure type of 'range_min' and 'range_max' should be an Integer, Float, Decimal, Date, Date32, DateTime DateTime64, or Enum." " Actual 'range_min' and 'range_max' type is {}", range_min->type->getName()); } - if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty())) + if (!range_min->expression.empty() || !range_max->expression.empty()) has_expressions = true; } diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 3b5164f7f48a..9014b09b072e 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -7,12 +7,14 @@ #include +#include + #include +#include #include #include #include -#include -#include + #if defined(__GNUC__) /// GCC mistakenly warns about the names in enum class. @@ -26,7 +28,7 @@ using TypeIndexUnderlying = magic_enum::underlying_type_t; // We need to be able to map TypeIndex -> AttributeUnderlyingType and AttributeUnderlyingType -> real type // The first can be done by defining AttributeUnderlyingType enum values to TypeIndex values and then performing // a enum_cast. -// The second can be achieved by using ReverseTypeId +// The second can be achieved by using TypeIndexToType #define map_item(__T) __T = static_cast(TypeIndex::__T) enum class AttributeUnderlyingType : TypeIndexUnderlying @@ -35,6 +37,7 @@ enum class AttributeUnderlyingType : TypeIndexUnderlying map_item(UInt8), map_item(UInt16), map_item(UInt32), map_item(UInt64), map_item(UInt128), map_item(UInt256), map_item(Float32), map_item(Float64), map_item(Decimal32), map_item(Decimal64), map_item(Decimal128), map_item(Decimal256), + map_item(DateTime64), map_item(UUID), map_item(String), map_item(Array) }; @@ -73,7 +76,7 @@ template struct DictionaryAttributeType { /// Converts @c type to it underlying type e.g. AttributeUnderlyingType::UInt8 -> UInt8 - using AttributeType = ReverseTypeId< + using AttributeType = TypeIndexToType< static_cast( static_cast(type))>; }; diff --git a/src/Dictionaries/Embedded/CMakeLists.txt b/src/Dictionaries/Embedded/CMakeLists.txt index 20c7b3c832ac..236111bc8012 100644 --- a/src/Dictionaries/Embedded/CMakeLists.txt +++ b/src/Dictionaries/Embedded/CMakeLists.txt @@ -2,4 +2,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_dictionaries_embedded .) add_headers_and_sources(clickhouse_dictionaries_embedded GeodataProviders) add_library(clickhouse_dictionaries_embedded ${clickhouse_dictionaries_embedded_sources}) -target_link_libraries(clickhouse_dictionaries_embedded PRIVATE clickhouse_common_io ${MYSQLXX_LIBRARY}) +target_link_libraries(clickhouse_dictionaries_embedded PRIVATE clickhouse_common_io) +if (TARGET ch::mysqlxx) + target_link_libraries(clickhouse_dictionaries_embedded PRIVATE ch::mysqlxx) +endif() diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 5d26ad3ebc24..40cc735557c4 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -291,30 +291,52 @@ void FlatDictionary::blockToAttributes(const Block & block) DictionaryKeysArenaHolder arena_holder; DictionaryKeysExtractor keys_extractor({ keys_column }, arena_holder.getComplexKeyArena()); - auto keys = keys_extractor.extractAllKeys(); + size_t keys_size = keys_extractor.getKeysSize(); - HashSet already_processed_keys; + static constexpr size_t key_offset = 1; - size_t key_offset = 1; - for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + size_t attributes_size = attributes.size(); + + if (unlikely(attributes_size == 0)) + { + for (size_t i = 0; i < keys_size; ++i) + { + auto key = keys_extractor.extractCurrentKey(); + + if (unlikely(key >= configuration.max_array_size)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "{}: identifier should be less than {}", + getFullName(), + toString(configuration.max_array_size)); + + if (key >= loaded_keys.size()) + { + const size_t elements_count = key + 1; + loaded_keys.resize(elements_count, false); + } + + loaded_keys[key] = true; + + keys_extractor.rollbackCurrentKey(); + } + + return; + } + + for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index) { const IColumn & attribute_column = *block.safeGetByPosition(attribute_index + key_offset).column; Attribute & attribute = attributes[attribute_index]; - for (size_t i = 0; i < keys.size(); ++i) + for (size_t i = 0; i < keys_size; ++i) { - auto key = keys[i]; - - if (already_processed_keys.find(key) != nullptr) - continue; - - already_processed_keys.insert(key); + auto key = keys_extractor.extractCurrentKey(); setAttributeValue(attribute, key, attribute_column[i]); - ++element_count; + keys_extractor.rollbackCurrentKey(); } - already_processed_keys.clear(); + keys_extractor.reset(); } } @@ -369,6 +391,12 @@ void FlatDictionary::loadData() else updateData(); + element_count = 0; + + size_t loaded_keys_size = loaded_keys.size(); + for (size_t i = 0; i < loaded_keys_size; ++i) + element_count += loaded_keys[i]; + if (configuration.require_nonempty && 0 == element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, "{}: dictionary source is empty and 'require_nonempty' property is set.", getFullName()); } @@ -495,21 +523,6 @@ void FlatDictionary::resize(Attribute & attribute, UInt64 key) } } -template -void FlatDictionary::setAttributeValueImpl(Attribute & attribute, UInt64 key, const T & value) -{ - auto & array = std::get>(attribute.container); - array[key] = value; - loaded_keys[key] = true; -} - -template <> -void FlatDictionary::setAttributeValueImpl(Attribute & attribute, UInt64 key, const String & value) -{ - auto arena_value = copyStringInArena(string_arena, value); - setAttributeValueImpl(attribute, key, arena_value); -} - void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 key, const Field & value) { auto type_call = [&](const auto & dictionary_attribute_type) @@ -520,17 +533,27 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 key, resize(attribute, key); - if (attribute.is_nullable_set) + if (attribute.is_nullable_set && value.isNull()) { - if (value.isNull()) - { - attribute.is_nullable_set->insert(key); - loaded_keys[key] = true; - return; - } + attribute.is_nullable_set->insert(key); + loaded_keys[key] = true; + return; } - setAttributeValueImpl(attribute, key, value.get()); + auto & attribute_value = value.get(); + + auto & container = std::get>(attribute.container); + loaded_keys[key] = true; + + if constexpr (std::is_same_v) + { + auto arena_value = copyStringInArena(string_arena, attribute_value); + container[key] = arena_value; + } + else + { + container[key] = attribute_value; + } }; callOnDictionaryAttributeType(attribute.type, type_call); @@ -547,7 +570,8 @@ Pipe FlatDictionary::read(const Names & column_names, size_t max_block_size, siz if (loaded_keys[key_index]) keys.push_back(key_index); - ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared(), dict_struct.id->name)}; + auto keys_column = getColumnFromPODArray(std::move(keys)); + ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; std::shared_ptr dictionary = shared_from_this(); auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index e8f40ea1d662..2578fef3ecbc 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -127,6 +127,7 @@ class FlatDictionary final : public IDictionary ContainerType, ContainerType, ContainerType, + ContainerType, ContainerType, ContainerType, ContainerType, @@ -154,9 +155,6 @@ class FlatDictionary final : public IDictionary template void resize(Attribute & attribute, UInt64 key); - template - void setAttributeValueImpl(Attribute & attribute, UInt64 key, const T & value); - void setAttributeValue(Attribute & attribute, UInt64 key, const Field & value); const DictionaryStructure dict_struct; diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 148aaafb1600..e35340c76189 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -158,12 +158,6 @@ ColumnUInt8::Ptr HashedArrayDictionary::hasKeys(const Colum auto result = ColumnUInt8::create(keys_size, false); auto & out = result->getData(); - if (attributes.empty()) - { - query_count.fetch_add(keys_size, std::memory_order_relaxed); - return result; - } - size_t keys_found = 0; for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index) @@ -753,9 +747,14 @@ Pipe HashedArrayDictionary::read(const Names & column_names ColumnsWithTypeAndName key_columns; if constexpr (dictionary_key_type == DictionaryKeyType::Simple) - key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared(), dict_struct.id->name)}; + { + auto keys_column = getColumnFromPODArray(std::move(keys)); + key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; + } else + { key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size()); + } std::shared_ptr dictionary = shared_from_this(); auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index 80436a3d0443..a649fddcc39f 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -147,6 +147,7 @@ class HashedArrayDictionary final : public IDictionary AttributeContainerType, AttributeContainerType, AttributeContainerType, + AttributeContainerType, AttributeContainerType, AttributeContainerType, AttributeContainerType, diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 7025c771e8fd..c83735a63303 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -177,15 +177,25 @@ ColumnUInt8::Ptr HashedDictionary::hasKeys(const Co auto result = ColumnUInt8::create(keys_size, false); auto & out = result->getData(); - if (attributes.empty()) + size_t keys_found = 0; + + if (unlikely(attributes.empty())) { + for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index) + { + auto requested_key = extractor.extractCurrentKey(); + out[requested_key_index] = no_attributes_container.find(requested_key) != no_attributes_container.end(); + keys_found += out[requested_key_index]; + extractor.rollbackCurrentKey(); + } + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); return result; } const auto & attribute = attributes.front(); bool is_attribute_nullable = attribute.is_nullable_set.has_value(); - size_t keys_found = 0; getAttributeContainer(0, [&](const auto & container) { @@ -423,7 +433,25 @@ void HashedDictionary::blockToAttributes(const Bloc Field column_value_to_insert; - for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + size_t attributes_size = attributes.size(); + + if (unlikely(attributes_size == 0)) + { + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + + if constexpr (std::is_same_v) + key = copyStringInArena(string_arena, key); + + no_attributes_container.insert(key); + keys_extractor.rollbackCurrentKey(); + } + + return; + } + + for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index) { const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column; auto & attribute = attributes[attribute_index]; @@ -487,7 +515,21 @@ void HashedDictionary::resize(size_t added_rows) if (unlikely(!added_rows)) return; - for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + size_t attributes_size = attributes.size(); + + if (unlikely(attributes_size == 0)) + { + size_t reserve_size = added_rows + no_attributes_container.size(); + + if constexpr (sparse) + no_attributes_container.resize(reserve_size); + else + no_attributes_container.reserve(reserve_size); + + return; + } + + for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index) { getAttributeContainer(attribute_index, [added_rows](auto & attribute_map) { @@ -570,7 +612,9 @@ void HashedDictionary::loadData() } } else + { resize(block.rows()); + } blockToAttributes(block); } @@ -589,9 +633,10 @@ void HashedDictionary::loadData() template void HashedDictionary::calculateBytesAllocated() { - bytes_allocated += attributes.size() * sizeof(attributes.front()); + size_t attributes_size = attributes.size(); + bytes_allocated += attributes_size * sizeof(attributes.front()); - for (size_t i = 0; i < attributes.size(); ++i) + for (size_t i = 0; i < attributes_size; ++i) { getAttributeContainer(i, [&](const auto & container) { @@ -622,6 +667,22 @@ void HashedDictionary::calculateBytesAllocated() bytes_allocated = attributes[i].is_nullable_set->getBufferSizeInBytes(); } + if (unlikely(attributes_size == 0)) + { + bytes_allocated += sizeof(no_attributes_container); + + if constexpr (sparse) + { + bytes_allocated += no_attributes_container.size() * (sizeof(KeyType)); + bucket_count = no_attributes_container.bucket_count(); + } + else + { + bytes_allocated += no_attributes_container.getBufferSizeInBytes(); + bucket_count = no_attributes_container.getBufferSizeInCells(); + } + } + bytes_allocated += string_arena.size(); if (update_field_loaded_block) @@ -657,13 +718,30 @@ Pipe HashedDictionary::read(const Names & column_na } }); } + else + { + keys.reserve(no_attributes_container.size()); + + for (const auto & key : no_attributes_container) + { + if constexpr (sparse) + keys.emplace_back(key); + else + keys.emplace_back(key.getKey()); + } + } ColumnsWithTypeAndName key_columns; if constexpr (dictionary_key_type == DictionaryKeyType::Simple) - key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared(), dict_struct.id->name)}; + { + auto keys_column = getColumnFromPODArray(std::move(keys)); + key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; + } else + { key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size()); + } std::shared_ptr dictionary = shared_from_this(); auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index c1761944b14b..1ef1c58b67c3 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -120,9 +121,14 @@ class HashedDictionary final : public IDictionary template using CollectionTypeNonSparse = std::conditional_t< dictionary_key_type == DictionaryKeyType::Simple, - HashMap, + HashMap>, HashMapWithSavedHash>>; + using NoAttributesCollectionTypeNonSparse = std::conditional_t< + dictionary_key_type == DictionaryKeyType::Simple, + HashSet>, + HashSetWithSavedHash>>; + /// Here we use sparse_hash_map with DefaultHash<> for the following reasons: /// /// - DefaultHash<> is used for HashMap @@ -140,9 +146,13 @@ class HashedDictionary final : public IDictionary google::sparse_hash_map>, google::sparse_hash_map>>; + using NoAttributesCollectionTypeSparse = google::sparse_hash_set>; + template using CollectionType = std::conditional_t, CollectionTypeNonSparse>; + using NoAttributesCollectionType = std::conditional_t; + using NullableSet = HashSet>; struct Attribute final @@ -167,6 +177,7 @@ class HashedDictionary final : public IDictionary CollectionType, CollectionType, CollectionType, + CollectionType, CollectionType, CollectionType, CollectionType, @@ -214,6 +225,7 @@ class HashedDictionary final : public IDictionary BlockPtr update_field_loaded_block; Arena string_arena; + NoAttributesCollectionType no_attributes_container; }; extern template class HashedDictionary; diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 9945ee1d4b36..929b04d14fa0 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -13,10 +13,12 @@ #include #include #include +#include #include #include #include + namespace DB { namespace ErrorCodes @@ -145,7 +147,7 @@ static void validateKeyTypes(const DataTypes & key_types) template size_t sortAndUnique(std::vector & vec, Comp comp) { - std::sort(vec.begin(), vec.end(), + ::sort(vec.begin(), vec.end(), [&](const auto & a, const auto & b) { return comp(a, b) < 0; }); auto new_end = std::unique(vec.begin(), vec.end(), diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index 33a9989a9e51..8dddc988caa9 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -114,6 +114,7 @@ class IPAddressDictionary final : public IDictionary Decimal64, Decimal128, Decimal256, + DateTime64, Float32, Float64, UUID, @@ -137,6 +138,7 @@ class IPAddressDictionary final : public IDictionary ContainerType, ContainerType, ContainerType, + ContainerType, ContainerType, ContainerType, ContainerType, diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index a291fcea47f8..29d70f3a7c4f 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -193,7 +193,7 @@ Pipe MySQLDictionarySource::loadAll() auto connection = pool->get(); last_modification = getLastModification(connection, false); - LOG_TRACE(log, load_all_query); + LOG_TRACE(log, fmt::runtime(load_all_query)); return loadFromQuery(load_all_query); } @@ -203,7 +203,7 @@ Pipe MySQLDictionarySource::loadUpdatedAll() last_modification = getLastModification(connection, false); std::string load_update_query = getUpdateFieldAndDate(); - LOG_TRACE(log, load_update_query); + LOG_TRACE(log, fmt::runtime(load_update_query)); return loadFromQuery(load_update_query); } @@ -289,7 +289,7 @@ LocalDateTime MySQLDictionarySource::getLastModification(mysqlxx::Pool::Entry & { auto query = connection->query("SHOW TABLE STATUS LIKE " + quoteForLike(configuration.table)); - LOG_TRACE(log, query.str()); + LOG_TRACE(log, fmt::runtime(query.str())); auto result = query.use(); diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index 8aa9527e4674..deec1e6a588a 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include @@ -250,7 +252,7 @@ void IPolygonDictionary::loadData() polygon_ids.emplace_back(polygon, i); } - std::sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs) + ::sort(polygon_ids.begin(), polygon_ids.end(), [& areas](const auto & lhs, const auto & rhs) { return areas[lhs.second] < areas[rhs.second]; }); diff --git a/src/Dictionaries/PolygonDictionaryUtils.cpp b/src/Dictionaries/PolygonDictionaryUtils.cpp index 15267481c0b7..804920f85e62 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.cpp +++ b/src/Dictionaries/PolygonDictionaryUtils.cpp @@ -3,11 +3,13 @@ #include #include +#include #include #include #include + namespace DB { @@ -87,7 +89,7 @@ std::vector SlabsPolygonIndex::uniqueX(const std::vector & polyg } /** Making all_x sorted and distinct */ - std::sort(all_x.begin(), all_x.end()); + ::sort(all_x.begin(), all_x.end()); all_x.erase(std::unique(all_x.begin(), all_x.end()), all_x.end()); return all_x; @@ -104,7 +106,7 @@ void SlabsPolygonIndex::indexBuild(const std::vector & polygons) } /** Sorting edges of (left_point, right_point, polygon_id) in that order */ - std::sort(all_edges.begin(), all_edges.end(), Edge::compareByLeftPoint); + ::sort(all_edges.begin(), all_edges.end(), Edge::compareByLeftPoint); for (size_t i = 0; i != all_edges.size(); ++i) all_edges[i].edge_id = i; @@ -298,7 +300,7 @@ bool SlabsPolygonIndex::find(const Point & point, size_t & id) const } while (pos != 0); /** Sort all ids and find smallest with odd occurrences */ - std::sort(intersections.begin(), intersections.end()); + ::sort(intersections.begin(), intersections.end()); for (size_t i = 0; i < intersections.size(); i += 2) { if (i + 1 == intersections.size() || intersections[i] != intersections[i + 1]) diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 9af3ea068384..6fdf486fdbf9 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -80,7 +80,7 @@ PostgreSQLDictionarySource::PostgreSQLDictionarySource(const PostgreSQLDictionar Pipe PostgreSQLDictionarySource::loadAll() { - LOG_TRACE(log, load_all_query); + LOG_TRACE(log, fmt::runtime(load_all_query)); return loadBase(load_all_query); } @@ -88,7 +88,7 @@ Pipe PostgreSQLDictionarySource::loadAll() Pipe PostgreSQLDictionarySource::loadUpdatedAll() { auto load_update_query = getUpdateFieldAndDate(); - LOG_TRACE(log, load_update_query); + LOG_TRACE(log, fmt::runtime(load_update_query)); return loadBase(load_update_query); } diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 2d98583d4a36..14c8fc7c7493 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -1,56 +1,21 @@ #include -#include -#include -#include +#include #include +#include #include #include #include -#include -#include - - -namespace -{ - -using RangeStorageType = DB::RangeStorageType; - -// Null values mean that specified boundary, either min or max is not set on range. -// To simplify comparison, null value of min bound should be bigger than any other value, -// and null value of maxbound - less than any value. -const RangeStorageType RANGE_MIN_NULL_VALUE = std::numeric_limits::max(); -const RangeStorageType RANGE_MAX_NULL_VALUE = std::numeric_limits::lowest(); - -bool isCorrectDate(const RangeStorageType & date) -{ - return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; -} - -// Handle both kinds of null values: explicit nulls of NullableColumn and 'implicit' nulls of Date type. -RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t index, bool isDate, const RangeStorageType & default_value) -{ - if (column.isNullAt(index)) - return default_value; - - const RangeStorageType result = static_cast(column.getInt(index)); - if (isDate && !isCorrectDate(result)) - return default_value; +#include - return result; -} +#include -const DB::IColumn & unwrapNullableColumn(const DB::IColumn & column) -{ - if (const auto * m = DB::checkAndGetColumn(&column)) - { - return m->getNestedColumn(); - } +#include +#include - return column; -} +#include +#include -} namespace DB { @@ -60,22 +25,53 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int DICTIONARY_IS_EMPTY; extern const int UNSUPPORTED_METHOD; + extern const int TYPE_MISMATCH; } +namespace +{ + template + void callOnRangeType(const DataTypePtr & range_type, F && func) + { + auto call = [&](const auto & types) + { + using Types = std::decay_t; + using DataType = typename Types::LeftType; + + if constexpr (IsDataTypeDecimalOrNumber || IsDataTypeDateOrDateTime || IsDataTypeEnum) + { + using ColumnType = typename DataType::ColumnType; + func(TypePair()); + return true; + } + + return false; + }; + + auto type_index = range_type->getTypeId(); + if (!callOnIndexAndDataType(type_index, call)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Dictionary structure type of 'range_min' and 'range_max' should be an Integer, Float, Decimal, Date, Date32, DateTime DateTime64, or Enum." + " Actual 'range_min' and 'range_max' type is {}", + range_type->getName()); + } + } +} template RangeHashedDictionary::RangeHashedDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, - bool require_nonempty_, + DictionaryLifetime dict_lifetime_, + RangeHashedDictionaryConfiguration configuration_, BlockPtr update_field_loaded_block_) : IDictionary(dict_id_) , dict_struct(dict_struct_) - , source_ptr{std::move(source_ptr_)} + , source_ptr(std::move(source_ptr_)) , dict_lifetime(dict_lifetime_) - , require_nonempty(require_nonempty_) + , configuration(configuration_) , update_field_loaded_block(std::move(update_field_loaded_block_)) { createAttributes(); @@ -104,15 +100,14 @@ ColumnPtr RangeHashedDictionary::getColumn( const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; const auto & attribute = attributes[attribute_index]; - /// Cast second column to storage type + /// Cast range column to storage type Columns modified_key_columns = key_columns; auto range_storage_column = key_columns.back(); ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; - auto range_column_storage_type = std::make_shared(); - modified_key_columns.back() = castColumnAccurate(column_to_cast, range_column_storage_type); + modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type); size_t keys_size = key_columns.front()->size(); - bool is_attribute_nullable = attribute.is_nullable; + bool is_attribute_nullable = attribute.is_value_nullable.has_value(); ColumnUInt8::MutablePtr col_null_map_to; ColumnUInt8::Container * vec_null_map_to = nullptr; @@ -122,7 +117,7 @@ ColumnPtr RangeHashedDictionary::getColumn( vec_null_map_to = &col_null_map_to->getData(); } - auto type_call = [&](const auto &dictionary_attribute_type) + auto type_call = [&](const auto & dictionary_attribute_type) { using Type = std::decay_t; using AttributeType = typename Type::AttributeType; @@ -206,6 +201,106 @@ ColumnPtr RangeHashedDictionary::getColumn( return result; } +template +ColumnPtr RangeHashedDictionary::getColumnInternal( + const std::string & attribute_name, + const DataTypePtr & result_type, + const PaddedPODArray & key_to_index) const +{ + ColumnPtr result; + + const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type); + const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; + const auto & attribute = attributes[attribute_index]; + + size_t keys_size = key_to_index.size(); + bool is_attribute_nullable = attribute.is_value_nullable.has_value(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to = nullptr; + if (is_attribute_nullable) + { + col_null_map_to = ColumnUInt8::create(keys_size, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + using ColumnProvider = DictionaryAttributeColumnProvider; + + auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size); + + if constexpr (std::is_same_v) + { + auto * out = column.get(); + + getItemsInternalImpl( + attribute, + key_to_index, + [&](size_t, const Array & value, bool) + { + out->insert(value); + }); + } + else if constexpr (std::is_same_v) + { + auto * out = column.get(); + + if (is_attribute_nullable) + getItemsInternalImpl( + attribute, + key_to_index, + [&](size_t row, const StringRef value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out->insertData(value.data, value.size); + }); + else + getItemsInternalImpl( + attribute, + key_to_index, + [&](size_t, const StringRef value, bool) + { + out->insertData(value.data, value.size); + }); + } + else + { + auto & out = column->getData(); + + if (is_attribute_nullable) + getItemsInternalImpl( + attribute, + key_to_index, + [&](size_t row, const auto value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out[row] = value; + }); + else + getItemsInternalImpl( + attribute, + key_to_index, + [&](size_t row, const auto value, bool) + { + out[row] = value; + }); + } + + result = std::move(column); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + if (is_attribute_nullable) + result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + + return result; +} + template ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const { @@ -216,41 +311,45 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Colum dict_struct.validateKeyTypes(key_types_copy); } - auto range_column_storage_type = std::make_shared(); + /// Cast range column to storage type auto range_storage_column = key_columns.back(); ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; - auto range_column_updated = castColumnAccurate(column_to_cast, range_column_storage_type); - PaddedPODArray range_backup_storage; - const PaddedPODArray & dates = getColumnVectorData(this, range_column_updated, range_backup_storage); - + auto range_column_updated = castColumnAccurate(column_to_cast, dict_struct.range_min->type); auto key_columns_copy = key_columns; key_columns_copy.pop_back(); + DictionaryKeysArenaHolder arena_holder; DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); const size_t keys_size = keys_extractor.getKeysSize(); - const auto & attribute = attributes.front(); - auto result = ColumnUInt8::create(keys_size); auto & out = result->getData(); size_t keys_found = 0; - auto type_call = [&](const auto & dictionary_attribute_type) + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) { - using Type = std::decay_t; - using AttributeType = typename Type::AttributeType; - using ValueType = DictionaryValueType; + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; - const auto & collection = std::get>(attribute.maps); + const auto * range_column_typed = typeid_cast(range_column_updated.get()); + if (!range_column_typed) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + const auto & range_column_data = range_column_typed->getData(); + + const auto & key_attribute_container = std::get>(key_attribute.container); for (size_t key_index = 0; key_index < keys_size; ++key_index) { const auto key = keys_extractor.extractCurrentKey(); - const auto it = collection.find(key); + const auto it = key_attribute_container.find(key); if (it) { - const auto date = dates[key_index]; + const auto date = range_column_data[key_index]; const auto & interval_tree = it->getMapped(); out[key_index] = interval_tree.has(date); keys_found += out[key_index]; @@ -262,9 +361,7 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Colum keys_extractor.rollbackCurrentKey(); } - }; - - callOnDictionaryAttributeType(attribute.type, type_call); + }); query_count.fetch_add(keys_size, std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); @@ -286,6 +383,16 @@ void RangeHashedDictionary::createAttributes() throw Exception(ErrorCodes::BAD_ARGUMENTS, "Hierarchical attributes not supported by {} dictionary.", getDictionaryID().getNameForLogs()); } + + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + + key_attribute.container = KeyAttributeContainerType(); + key_attribute.invalid_intervals_container = InvalidIntervalsContainerType(); + }); } template @@ -294,9 +401,9 @@ void RangeHashedDictionary::loadData() if (!source_ptr->hasUpdateField()) { QueryPipeline pipeline(source_ptr->loadAll()); - PullingPipelineExecutor executor(pipeline); Block block; + while (executor.pull(block)) { blockToAttributes(block); @@ -307,9 +414,19 @@ void RangeHashedDictionary::loadData() updateData(); } - buildAttributeIntervalTrees(); + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + + auto & key_attribute_container = std::get>(key_attribute.container); + + for (auto & [_, intervals] : key_attribute_container) + intervals.build(); + }); - if (require_nonempty && 0 == element_count) + if (configuration.require_nonempty && 0 == element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, "{}: dictionary source is empty and 'require_nonempty' property is set."); } @@ -317,8 +434,22 @@ void RangeHashedDictionary::loadData() template void RangeHashedDictionary::calculateBytesAllocated() { - bytes_allocated += attributes.size() * sizeof(attributes.front()); + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + auto & key_attribute_container = std::get>(key_attribute.container); + + bucket_count = key_attribute_container.getBufferSizeInCells(); + bytes_allocated += key_attribute_container.getBufferSizeInBytes(); + + for (auto & [_, intervals] : key_attribute_container) + bytes_allocated += intervals.getSizeInBytes(); + }); + + bytes_allocated += attributes.size() * sizeof(attributes.front()); for (const auto & attribute : attributes) { auto type_call = [&](const auto & dictionary_attribute_type) @@ -327,17 +458,17 @@ void RangeHashedDictionary::calculateBytesAllocated() using AttributeType = typename Type::AttributeType; using ValueType = DictionaryValueType; - const auto & collection = std::get>(attribute.maps); - bytes_allocated += sizeof(CollectionType) + collection.getBufferSizeInBytes(); - bucket_count = collection.getBufferSizeInCells(); + const auto & container = std::get>(attribute.container); + + bytes_allocated += container.size() * sizeof(ValueType); + + if (attribute.is_value_nullable) + bytes_allocated += (*attribute.is_value_nullable).size() * sizeof(bool); }; callOnDictionaryAttributeType(attribute.type, type_call); } - if constexpr (dictionary_key_type == DictionaryKeyType::Complex) - bytes_allocated += complex_key_arena.size(); - if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); @@ -347,15 +478,20 @@ void RangeHashedDictionary::calculateBytesAllocated() template typename RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute) { - Attribute attribute{dictionary_attribute.underlying_type, dictionary_attribute.is_nullable, {}}; + std::optional> is_value_nullable; + + if (dictionary_attribute.is_nullable) + is_value_nullable.emplace(std::vector()); + + Attribute attribute{dictionary_attribute.underlying_type, {}, std::move(is_value_nullable)}; - auto type_call = [&](const auto &dictionary_attribute_type) + auto type_call = [&](const auto & dictionary_attribute_type) { using Type = std::decay_t; using AttributeType = typename Type::AttributeType; using ValueType = DictionaryValueType; - attribute.maps = CollectionType(); + attribute.container = AttributeContainerType(); }; callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call); @@ -371,78 +507,156 @@ void RangeHashedDictionary::getItemsImpl( ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const { - const auto & collection = std::get>(attribute.maps); + const auto & attribute_container = std::get>(attribute.container); size_t keys_found = 0; - PaddedPODArray range_backup_storage; - const auto & dates = getColumnVectorData(this, key_columns.back(), range_backup_storage); - + auto range_column = key_columns.back(); auto key_columns_copy = key_columns; key_columns_copy.pop_back(); + DictionaryKeysArenaHolder arena_holder; DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); const size_t keys_size = keys_extractor.getKeysSize(); - for (size_t key_index = 0; key_index < keys_size; ++key_index) + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) { - auto key = keys_extractor.extractCurrentKey(); - const auto it = collection.find(key); - - if (it) - { - const auto date = dates[key_index]; - const auto & interval_tree = it->getMapped(); - - std::optional min_value; - std::optional min_range; - bool has_interval = false; + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + using RangeInterval = Interval; - interval_tree.find(date, [&](auto & interval, auto & value) - { - has_interval = true; + const auto * range_column_typed = typeid_cast(range_column.get()); + if (!range_column_typed) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); - if (min_range && interval < *min_range) - min_range = interval; - else - min_range = interval; + const auto & range_column_data = range_column_typed->getData(); - min_value = value; + const auto & key_attribute_container = std::get>(key_attribute.container); - return true; - }); + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + const auto it = key_attribute_container.find(key); - if (has_interval) + if (it) { - ++keys_found; + const auto date = range_column_data[key_index]; + const auto & interval_tree = it->getMapped(); + + size_t value_index = 0; + std::optional range; - if constexpr (is_nullable) + interval_tree.find(date, [&](auto & interval, auto & interval_value_index) { - if (min_value.has_value()) - set_value(key_index, *min_value, false); + if (range) + { + if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) + { + range = interval; + value_index = interval_value_index; + } + else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) + { + range = interval; + value_index = interval_value_index; + } + } else - set_value(key_index, default_value_extractor[key_index], true); - } - else + { + range = interval; + value_index = interval_value_index; + } + + return true; + }); + + if (range.has_value()) { - set_value(key_index, *min_value, false); - } + ++keys_found; - keys_extractor.rollbackCurrentKey(); - continue; + AttributeType value = attribute_container[value_index]; + + if constexpr (is_nullable) + { + bool is_null = (*attribute.is_value_nullable)[value_index]; + + if (!is_null) + set_value(key_index, value, false); + else + set_value(key_index, default_value_extractor[key_index], true); + } + else + { + set_value(key_index, value, false); + } + + keys_extractor.rollbackCurrentKey(); + continue; + } } + + if constexpr (is_nullable) + set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); + else + set_value(key_index, default_value_extractor[key_index], false); + + keys_extractor.rollbackCurrentKey(); + } + }); + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); +} + +template +template +void RangeHashedDictionary::getItemsInternalImpl( + const Attribute & attribute, + const PaddedPODArray & key_to_index, + ValueSetter && set_value) const +{ + size_t keys_size = key_to_index.size(); + + const auto & container = std::get>(attribute.container); + size_t container_size = container.size(); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + UInt64 container_index = key_to_index[key_index]; + + if (unlikely(container_index >= container_size)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Dictionary {} expected attribute container index {} must be less than attribute container size {}", + getFullName(), + container_index, + container_size + ); } + AttributeType value = container[container_index]; + if constexpr (is_nullable) - set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); - else - set_value(key_index, default_value_extractor[key_index], false); + { + bool is_null = (*attribute.is_value_nullable)[container_index]; - keys_extractor.rollbackCurrentKey(); + if (!is_null) + set_value(key_index, value, false); + else + set_value(key_index, value, true); + } + else + { + set_value(key_index, value, false); + } } query_count.fetch_add(keys_size, std::memory_order_relaxed); - found_count.fetch_add(keys_found, std::memory_order_relaxed); + found_count.fetch_add(keys_size, std::memory_order_relaxed); } template @@ -486,271 +700,379 @@ void RangeHashedDictionary::updateData() } template -void RangeHashedDictionary::blockToAttributes(const Block & block [[maybe_unused]]) +void RangeHashedDictionary::blockToAttributes(const Block & block) { - size_t skip_keys_size_offset = dict_struct.getKeysSize(); + size_t attributes_size = attributes.size(); + size_t dictionary_keys_size = dict_struct.getKeysSize(); + + static constexpr size_t ranges_size = 2; + + size_t block_columns = block.columns(); + size_t range_dictionary_attributes_size = attributes_size + dictionary_keys_size + ranges_size; + + if (range_dictionary_attributes_size != block.columns()) + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Block size mismatch. Actual {}. Expected {}", + block_columns, + range_dictionary_attributes_size); + } Columns key_columns; - key_columns.reserve(skip_keys_size_offset); + key_columns.reserve(dictionary_keys_size); /// Split into keys columns and attribute columns - for (size_t i = 0; i < skip_keys_size_offset; ++i) - key_columns.emplace_back(block.safeGetByPosition(i).column); + for (size_t i = 0; i < dictionary_keys_size; ++i) + key_columns.emplace_back(block.getByPosition(i).column); DictionaryKeysArenaHolder arena_holder; DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); const size_t keys_size = keys_extractor.getKeysSize(); - element_count += keys_size; + size_t block_attributes_skip_offset = dictionary_keys_size; - // Support old behaviour, where invalid date means 'open range'. - const bool is_date = isDate(dict_struct.range_min->type); + const auto * min_range_column = block.getByPosition(block_attributes_skip_offset).column.get(); + const auto * max_range_column = block.getByPosition(block_attributes_skip_offset + 1).column.get(); - const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column); - const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column); + const NullMap * min_range_null_map = nullptr; + const NullMap * max_range_null_map = nullptr; - skip_keys_size_offset += 2; + if (const auto * min_range_column_nullable = checkAndGetColumn(min_range_column)) + { + min_range_column = &min_range_column_nullable->getNestedColumn(); + min_range_null_map = &min_range_column_nullable->getNullMapColumn().getData(); + } - for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + if (const auto * max_range_column_nullable = checkAndGetColumn(max_range_column)) { - const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column; - auto & attribute = attributes[attribute_index]; + max_range_column = &max_range_column_nullable->getNestedColumn(); + max_range_null_map = &max_range_column_nullable->getNullMapColumn().getData(); + } + + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + + const auto * min_range_column_typed = typeid_cast(min_range_column); + if (!min_range_column_typed) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range min column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + const auto * max_range_column_typed = typeid_cast(max_range_column); + if (!max_range_column_typed) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range max column type should be equal to {}", + getFullName(), + dict_struct.range_max->type->getName()); + + const auto & min_range_column_data = min_range_column_typed->getData(); + const auto & max_range_column_data = max_range_column_typed->getData(); + + auto & key_attribute_container = std::get>(key_attribute.container); + auto & invalid_intervals_container = std::get>(key_attribute.invalid_intervals_container); + + block_attributes_skip_offset += 2; + + Field column_value; for (size_t key_index = 0; key_index < keys_size; ++key_index) { auto key = keys_extractor.extractCurrentKey(); - RangeStorageType lower_bound; - RangeStorageType upper_bound; + RangeStorageType lower_bound = min_range_column_data[key_index]; + RangeStorageType upper_bound = max_range_column_data[key_index]; - if (is_date) + bool invalid_range = false; + + if (unlikely(min_range_null_map && (*min_range_null_map)[key_index])) { - lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0); - upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1); + lower_bound = std::numeric_limits::min(); + invalid_range = true; } - else + + if (unlikely(max_range_null_map && (*max_range_null_map)[key_index])) { - lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE); - upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE); + upper_bound = std::numeric_limits::max(); + invalid_range = true; + } + + if (unlikely(!configuration.convert_null_range_bound_to_open && invalid_range)) + { + keys_extractor.rollbackCurrentKey(); + continue; } if constexpr (std::is_same_v) key = copyStringInArena(string_arena, key); - setAttributeValue(attribute, key, RangeInterval{lower_bound, upper_bound}, attribute_column[key_index]); + for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + { + const auto & attribute_column = *block.getByPosition(attribute_index + block_attributes_skip_offset).column; + auto & attribute = attributes[attribute_index]; + attribute_column.get(key_index, column_value); + + setAttributeValue(attribute, column_value); + } + + auto interval = Interval(lower_bound, upper_bound); + auto it = key_attribute_container.find(key); + + bool emplaced_in_interval_tree = false; + + if (it) + { + auto & intervals = it->getMapped(); + emplaced_in_interval_tree = intervals.emplace(interval, element_count); + } + else + { + IntervalMap intervals; + emplaced_in_interval_tree = intervals.emplace(interval, element_count); + key_attribute_container.insert({key, std::move(intervals)}); + } + + if (unlikely(!emplaced_in_interval_tree)) + { + InvalidIntervalWithKey invalid_interval{key, interval, element_count}; + invalid_intervals_container.emplace_back(invalid_interval); + } + + ++element_count; keys_extractor.rollbackCurrentKey(); } - - keys_extractor.reset(); - } + }); } template -void RangeHashedDictionary::buildAttributeIntervalTrees() +void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Field & value) { - for (auto & attribute : attributes) + auto type_call = [&](const auto & dictionary_attribute_type) { - auto type_call = [&](const auto & dictionary_attribute_type) + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + + auto & container = std::get>(attribute.container); + container.emplace_back(); + + if (unlikely(attribute.is_value_nullable.has_value())) { - using Type = std::decay_t; - using AttributeType = typename Type::AttributeType; - using ValueType = DictionaryValueType; + bool value_is_null = value.isNull(); + attribute.is_value_nullable->emplace_back(value_is_null); - auto & collection = std::get>(attribute.maps); - for (auto & [_, ranges] : collection) - ranges.build(); - }; + if (unlikely(value_is_null)) + return; + } - callOnDictionaryAttributeType(attribute.type, type_call); - } + ValueType value_to_insert; + + if constexpr (std::is_same_v) + { + const auto & string = value.get(); + StringRef string_ref = copyStringInArena(string_arena, string); + value_to_insert = string_ref; + } + else + { + value_to_insert = value.get(); + } + + container.back() = value_to_insert; + }; + + callOnDictionaryAttributeType(attribute.type, type_call); } template -template -void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value) +Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_size, size_t num_streams) const { - using ValueType = std::conditional_t, StringRef, T>; - auto & collection = std::get>(attribute.maps); + auto key_to_index_column = ColumnUInt64::create(); + auto range_min_column = dict_struct.range_min->type->createColumn(); + auto range_max_column = dict_struct.range_max->type->createColumn(); - std::optional value_to_insert; + PaddedPODArray keys; - if (attribute.is_nullable && value.isNull()) - { - value_to_insert = std::nullopt; - } - else + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) { - if constexpr (std::is_same_v) + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + + auto * range_min_column_typed = typeid_cast(range_min_column.get()); + if (!range_min_column_typed) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range min column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + auto * range_max_column_typed = typeid_cast(range_max_column.get()); + if (!range_max_column_typed) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range max column type should be equal to {}", + getFullName(), + dict_struct.range_max->type->getName()); + + auto & key_to_index_column_data = key_to_index_column->getData(); + auto & range_min_column_data = range_min_column_typed->getData(); + auto & range_max_column_data = range_max_column_typed->getData(); + + const auto & container = std::get>(key_attribute.container); + const auto & invalid_intervals_container = std::get>(key_attribute.invalid_intervals_container); + + keys.reserve(element_count); + key_to_index_column_data.reserve(element_count); + range_min_column_data.reserve(element_count); + range_max_column_data.reserve(element_count); + + for (const auto & key : container) { - const auto & string = value.get(); - StringRef string_ref = copyStringInArena(string_arena, string); - value_to_insert = { string_ref }; + for (const auto & [interval, index] : key.getMapped()) + { + keys.emplace_back(key.getKey()); + key_to_index_column_data.emplace_back(index); + range_min_column_data.push_back(interval.left); + range_max_column_data.push_back(interval.right); + } } - else + + for (const auto & invalid_interval_with_key : invalid_intervals_container) { - value_to_insert = { value.get() }; + keys.emplace_back(invalid_interval_with_key.key); + key_to_index_column_data.emplace_back(invalid_interval_with_key.attribute_value_index); + range_min_column_data.push_back(invalid_interval_with_key.interval.left); + range_max_column_data.push_back(invalid_interval_with_key.interval.right); } - } + }); - const auto it = collection.find(key); + auto range_min_column_with_type = ColumnWithTypeAndName{std::move(range_min_column), dict_struct.range_min->type, dict_struct.range_min->name}; + auto range_max_column_with_type = ColumnWithTypeAndName{std::move(range_max_column), dict_struct.range_max->type, dict_struct.range_max->name}; - if (it) + ColumnsWithTypeAndName key_columns; + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - auto & values = it->getMapped(); - values.emplace(interval, std::move(value_to_insert)); + auto keys_column = getColumnFromPODArray(std::move(keys)); + key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; } else { - Values values; - values.emplace(interval, value_to_insert); - collection.insert({key, std::move(values)}); + key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size()); } -} -template -void RangeHashedDictionary::setAttributeValue(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value) -{ - auto type_call = [&](const auto &dictionary_attribute_type) - { - using Type = std::decay_t; - using AttributeType = typename Type::AttributeType; + key_columns.emplace_back(ColumnWithTypeAndName{std::move(key_to_index_column), std::make_shared(), ""}); - setAttributeValueImpl(attribute, key, interval, value); - }; - - callOnDictionaryAttributeType(attribute.type, type_call); -} + ColumnsWithTypeAndName data_columns = {std::move(range_min_column_with_type), std::move(range_max_column_with_type)}; -template -template -void RangeHashedDictionary::getKeysAndDates( - PaddedPODArray & keys, - PaddedPODArray & start_dates, - PaddedPODArray & end_dates) const -{ - const auto & attribute = attributes.front(); + std::shared_ptr dictionary = shared_from_this(); - auto type_call = [&](const auto & dictionary_attribute_type) + DictionarySourceCoordinator::ReadColumnsFunc read_keys_func = [dictionary_copy = dictionary]( + const Strings & attribute_names, + const DataTypes & result_types, + const Columns & key_columns, + const DataTypes, + const Columns &) { - using Type = std::decay_t; - using AttributeType = typename Type::AttributeType; - using ValueType = DictionaryValueType; + auto range_dictionary_ptr = std::static_pointer_cast>(dictionary_copy); - getKeysAndDates(attribute, keys, start_dates, end_dates); - }; + size_t attribute_names_size = attribute_names.size(); - callOnDictionaryAttributeType(attribute.type, type_call); -} + Columns result; + result.reserve(attribute_names_size); -template -template -void RangeHashedDictionary::getKeysAndDates( - const Attribute & attribute, - PaddedPODArray & keys, - PaddedPODArray & start_dates, - PaddedPODArray & end_dates) const -{ - const auto & collection = std::get>(attribute.maps); + auto key_column = key_columns.back(); - keys.reserve(collection.size()); - start_dates.reserve(collection.size()); - end_dates.reserve(collection.size()); + const auto * key_to_index_column = typeid_cast(key_column.get()); + if (!key_to_index_column) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Dictionary {} read expect indexes column with type UInt64", + range_dictionary_ptr->getFullName()); - const bool is_date = isDate(dict_struct.range_min->type); - (void)(is_date); + const auto & data = key_to_index_column->getData(); - for (const auto & key : collection) - { - for (const auto & [interval, _] : key.getMapped()) + for (size_t i = 0; i < attribute_names_size; ++i) { - keys.push_back(key.getKey()); - start_dates.push_back(interval.left); - end_dates.push_back(interval.right); + const auto & attribute_name = attribute_names[i]; + const auto & result_type = result_types[i]; - if constexpr (std::numeric_limits::max() > DATE_LUT_MAX_DAY_NUM) /// Avoid warning about tautological comparison in next line. - if (is_date && static_cast(end_dates.back()) > DATE_LUT_MAX_DAY_NUM) - end_dates.back() = 0; + result.emplace_back(range_dictionary_ptr->getColumnInternal(attribute_name, result_type, data)); } - } -} -template -template -PaddedPODArray RangeHashedDictionary::makeDateKeys( - const PaddedPODArray & block_start_dates, - const PaddedPODArray & block_end_dates) const -{ - PaddedPODArray keys(block_start_dates.size()); + return result; + }; - for (size_t i = 0; i < keys.size(); ++i) - { - if (isCorrectDate(block_start_dates[i])) - keys[i] = block_start_dates[i]; // NOLINT - else - keys[i] = block_end_dates[i]; // NOLINT - } + auto coordinator = DictionarySourceCoordinator::create( + dictionary, + column_names, + std::move(key_columns), + std::move(data_columns), + max_block_size, + std::move(read_keys_func)); + auto result = coordinator->read(num_streams); - return keys; + return result; } template -Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_size, size_t num_streams) const +static DictionaryPtr createRangeHashedDictionary(const std::string & full_name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) { - auto type = dict_struct.range_min->type; + static constexpr auto layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "range_hashed" : "complex_key_range_hashed"; - ColumnsWithTypeAndName key_columns; - ColumnWithTypeAndName range_min_column; - ColumnWithTypeAndName range_max_column; - - auto type_call = [&](const auto & types) mutable -> bool + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - - if constexpr (IsDataTypeNumber || - std::is_same_v || - std::is_same_v || - std::is_same_v) - { - using RangeType = typename LeftDataType::FieldType; + if (dict_struct.key) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for dictionary of layout 'range_hashed'"); + } + else + { + if (dict_struct.id) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for dictionary of layout 'complex_key_range_hashed'"); + } - PaddedPODArray keys; - PaddedPODArray start_dates; - PaddedPODArray end_dates; - getKeysAndDates(keys, start_dates, end_dates); + if (!dict_struct.range_min || !dict_struct.range_max) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "{}: dictionary of layout '{}' requires .structure.range_min and .structure.range_max", + full_name, + layout_name); - range_min_column = ColumnWithTypeAndName{getColumnFromPODArray(start_dates), dict_struct.range_min->type, dict_struct.range_min->name}; - range_max_column = ColumnWithTypeAndName{getColumnFromPODArray(end_dates), dict_struct.range_max->type, dict_struct.range_max->name}; + const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - if constexpr (dictionary_key_type == DictionaryKeyType::Simple) - key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared(), dict_struct.id->name)}; - else - key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size()); + String dictionary_layout_prefix = config_prefix + ".layout." + layout_name; + const bool convert_null_range_bound_to_open = config.getBool(dictionary_layout_prefix + ".convert_null_range_bound_to_open", true); + String range_lookup_strategy = config.getString(dictionary_layout_prefix + ".range_lookup_strategy", "min"); + RangeHashedDictionaryLookupStrategy lookup_strategy = RangeHashedDictionaryLookupStrategy::min; - auto date_column = getColumnFromPODArray(makeDateKeys(start_dates, end_dates)); - key_columns.emplace_back(ColumnWithTypeAndName{std::move(date_column), std::make_shared(), ""}); + if (range_lookup_strategy == "min") + lookup_strategy = RangeHashedDictionaryLookupStrategy::min; + else if (range_lookup_strategy == "max") + lookup_strategy = RangeHashedDictionaryLookupStrategy::max; - return true; - } - else - { - return false; - } + RangeHashedDictionaryConfiguration configuration + { + .convert_null_range_bound_to_open = convert_null_range_bound_to_open, + .lookup_strategy = lookup_strategy, + .require_nonempty = require_nonempty }; - if (!callOnIndexAndDataType(type->getTypeId(), type_call)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "RangeHashedDictionary min max range type should be numeric"); - - ColumnsWithTypeAndName data_columns = {std::move(range_min_column), std::move(range_max_column)}; - - std::shared_ptr dictionary = shared_from_this(); - auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), std::move(data_columns), max_block_size); - auto result = coordinator->read(num_streams); + DictionaryPtr result = std::make_unique>( + dict_id, + dict_struct, + std::move(source_ptr), + dict_lifetime, + configuration); return result; } - void registerDictionaryRangeHashed(DictionaryFactory & factory) { auto create_layout_simple = [=](const std::string & full_name, @@ -761,19 +1083,9 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory) ContextPtr /* global_context */, bool /*created_from_ddl*/) -> DictionaryPtr { - if (dict_struct.key) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for dictionary of layout 'range_hashed'"); - - if (!dict_struct.range_min || !dict_struct.range_max) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "{}: dictionary of layout 'range_hashed' requires .structure.range_min and .structure.range_max", - full_name); - - const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); - const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, std::move(source_ptr)); }; + factory.registerLayout("range_hashed", create_layout_simple, false); auto create_layout_complex = [=](const std::string & full_name, @@ -784,19 +1096,9 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory) ContextPtr /* context */, bool /*created_from_ddl*/) -> DictionaryPtr { - if (dict_struct.id) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for dictionary of layout 'complex_key_range_hashed'"); - - if (!dict_struct.range_min || !dict_struct.range_max) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "{}: dictionary of layout 'complex_key_range_hashed' requires .structure.range_min and .structure.range_max", - full_name); - - const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); - const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, std::move(source_ptr)); }; + factory.registerLayout("complex_key_range_hashed", create_layout_complex, true); } diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index f31d6415dc80..78d62e9d7de3 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -19,7 +19,18 @@ namespace DB { -using RangeStorageType = Int64; +enum class RangeHashedDictionaryLookupStrategy : uint8_t +{ + min, + max +}; + +struct RangeHashedDictionaryConfiguration +{ + bool convert_null_range_bound_to_open; + RangeHashedDictionaryLookupStrategy lookup_strategy; + bool require_nonempty; +}; template class RangeHashedDictionary final : public IDictionary @@ -31,11 +42,17 @@ class RangeHashedDictionary final : public IDictionary const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, - bool require_nonempty_, + DictionaryLifetime dict_lifetime_, + RangeHashedDictionaryConfiguration configuration_, BlockPtr update_field_loaded_block_ = nullptr); - std::string getTypeName() const override { return "RangeHashed"; } + std::string getTypeName() const override + { + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + return "RangeHashed"; + else + return "ComplexKeyRangeHashed"; + } size_t getBytesAllocated() const override { return bytes_allocated; } @@ -57,7 +74,15 @@ class RangeHashedDictionary final : public IDictionary std::shared_ptr clone() const override { - return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, update_field_loaded_block); + auto result = std::make_shared( + getDictionaryID(), + dict_struct, + source_ptr->clone(), + dict_lifetime, + configuration, + update_field_loaded_block); + + return result; } DictionarySourcePtr getSource() const override { return source_ptr; } @@ -76,7 +101,7 @@ class RangeHashedDictionary final : public IDictionary DictionarySpecialKeyType getSpecialKeyType() const override { return DictionarySpecialKeyType::Range;} ColumnPtr getColumn( - const std::string& attribute_name, + const std::string & attribute_name, const DataTypePtr & result_type, const Columns & key_columns, const DataTypes & key_types, @@ -88,46 +113,90 @@ class RangeHashedDictionary final : public IDictionary private: - using RangeInterval = Interval; + template + using IntervalMap = IntervalMap, size_t>; - template - using Values = IntervalMap>; + template + using KeyAttributeContainerType = std::conditional_t< + dictionary_key_type == DictionaryKeyType::Simple, + HashMap, DefaultHash>, + HashMapWithSavedHash, DefaultHash>>; template - using CollectionType = std::conditional_t< - dictionary_key_type == DictionaryKeyType::Simple, - HashMap>, - HashMapWithSavedHash, DefaultHash>>; + using AttributeContainerType = std::conditional_t, std::vector, PaddedPODArray>; struct Attribute final { - public: AttributeUnderlyingType type; - bool is_nullable; std::variant< - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType, - CollectionType> - maps; + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType> + container; + + std::optional> is_value_nullable; + }; + + template + struct InvalidIntervalWithKey + { + KeyType key; + Interval interval; + size_t attribute_value_index; + }; + + template + using InvalidIntervalsContainerType = PaddedPODArray>; + + template typename ContainerType> + using RangeStorageTypeContainer = std::variant< + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType>; + + struct KeyAttribute final + { + RangeStorageTypeContainer container; + + RangeStorageTypeContainer invalid_intervals_container; + }; void createAttributes(); @@ -145,43 +214,31 @@ class RangeHashedDictionary final : public IDictionary ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const; - void updateData(); - - void blockToAttributes(const Block & block); - - void buildAttributeIntervalTrees(); - - template - void setAttributeValueImpl(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value); + ColumnPtr getColumnInternal( + const std::string & attribute_name, + const DataTypePtr & result_type, + const PaddedPODArray & key_to_index) const; - void setAttributeValue(Attribute & attribute, KeyType key, const RangeInterval & interval, const Field & value); + template + void getItemsInternalImpl( + const Attribute & attribute, + const PaddedPODArray & key_to_index, + ValueSetter && set_value) const; - template - void getKeysAndDates( - PaddedPODArray & keys, - PaddedPODArray & start_dates, - PaddedPODArray & end_dates) const; + void updateData(); - template - void getKeysAndDates( - const Attribute & attribute, - PaddedPODArray & keys, - PaddedPODArray & start_dates, - PaddedPODArray & end_dates) const; + void blockToAttributes(const Block & block); - template - PaddedPODArray makeDateKeys( - const PaddedPODArray & block_start_dates, - const PaddedPODArray & block_end_dates) const; + void setAttributeValue(Attribute & attribute, const Field & value); const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; - const bool require_nonempty; + const RangeHashedDictionaryConfiguration configuration; BlockPtr update_field_loaded_block; std::vector attributes; - Arena complex_key_arena; + KeyAttribute key_attribute; size_t bytes_allocated = 0; size_t element_count = 0; diff --git a/src/Dictionaries/RedisDictionarySource.cpp b/src/Dictionaries/RedisDictionarySource.cpp index 24a14d8cc800..a1b406b3424c 100644 --- a/src/Dictionaries/RedisDictionarySource.cpp +++ b/src/Dictionaries/RedisDictionarySource.cpp @@ -3,26 +3,6 @@ #include "DictionaryStructure.h" #include "registerDictionaries.h" -namespace DB -{ - -void registerDictionarySourceRedis(DictionarySourceFactory & factory) -{ - auto create_table_source = [=](const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - Block & sample_block, - ContextPtr /* global_context */, - const std::string & /* default_database */, - bool /* created_from_ddl */) -> DictionarySourcePtr { - return std::make_unique(dict_struct, config, config_prefix + ".redis", sample_block); - }; - factory.registerSource("redis", create_table_source); -} - -} - - #include #include #include @@ -33,7 +13,6 @@ void registerDictionarySourceRedis(DictionarySourceFactory & factory) #include "RedisSource.h" - namespace DB { namespace ErrorCodes @@ -42,34 +21,64 @@ namespace DB extern const int INVALID_CONFIG_PARAMETER; extern const int INTERNAL_REDIS_ERROR; extern const int LOGICAL_ERROR; + extern const int TIMEOUT_EXCEEDED; } + static RedisStorageType parseStorageType(const String & storage_type_str) + { + if (storage_type_str == "hash_map") + return RedisStorageType::HASH_MAP; + else if (!storage_type_str.empty() && storage_type_str != "simple") + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown storage type {} for Redis dictionary", storage_type_str); - static const size_t max_block_size = 8192; + return RedisStorageType::SIMPLE; + } + + void registerDictionarySourceRedis(DictionarySourceFactory & factory) + { + auto create_table_source = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + Block & sample_block, + ContextPtr /* global_context */, + const std::string & /* default_database */, + bool /* created_from_ddl */) -> DictionarySourcePtr { + + auto redis_config_prefix = config_prefix + ".redis"; + RedisDictionarySource::Configuration configuration = + { + .host = config.getString(redis_config_prefix + ".host"), + .port = static_cast(config.getUInt(redis_config_prefix + ".port")), + .db_index = config.getUInt(redis_config_prefix + ".db_index", 0), + .password = config.getString(redis_config_prefix + ".password", ""), + .storage_type = parseStorageType(config.getString(redis_config_prefix + ".storage_type", "")), + .pool_size = config.getUInt(redis_config_prefix + ".pool_size", 16), + }; + + return std::make_unique(dict_struct, configuration, sample_block); + }; + + factory.registerSource("redis", create_table_source); + } + + static constexpr size_t REDIS_MAX_BLOCK_SIZE = DEFAULT_BLOCK_SIZE; + static constexpr size_t REDIS_LOCK_ACQUIRE_TIMEOUT_MS = 5000; RedisDictionarySource::RedisDictionarySource( - const DictionaryStructure & dict_struct_, - const String & host_, - UInt16 port_, - UInt8 db_index_, - const String & password_, - RedisStorageType storage_type_, - const Block & sample_block_) - : dict_struct{dict_struct_} - , host{host_} - , port{port_} - , db_index{db_index_} - , password{password_} - , storage_type{storage_type_} - , sample_block{sample_block_} - , client{std::make_shared(host, port)} + const DictionaryStructure & dict_struct_, + const Configuration & configuration_, + const Block & sample_block_) + : dict_struct{dict_struct_} + , configuration(configuration_) + , pool(std::make_shared(configuration.pool_size)) + , sample_block{sample_block_} { if (dict_struct.attributes.size() != 1) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Invalid number of non key columns for Redis source: {}, expected 1", DB::toString(dict_struct.attributes.size())); - if (storage_type == RedisStorageType::HASH_MAP) + if (configuration.storage_type == RedisStorageType::HASH_MAP) { if (!dict_struct.key) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, @@ -87,61 +96,13 @@ namespace DB key.name, key.type->getName()); } - - if (!password.empty()) - { - RedisCommand command("AUTH"); - command << password; - String reply = client->execute(command); - if (reply != "OK") - throw Exception(ErrorCodes::INTERNAL_REDIS_ERROR, - "Authentication failed with reason {}", - reply); - } - - if (db_index != 0) - { - RedisCommand command("SELECT"); - command << std::to_string(db_index); - String reply = client->execute(command); - if (reply != "OK") - throw Exception(ErrorCodes::INTERNAL_REDIS_ERROR, - "Selecting database with index {} failed with reason {}", - DB::toString(db_index), - reply); - } } - - RedisDictionarySource::RedisDictionarySource( - const DictionaryStructure & dict_struct_, - const Poco::Util::AbstractConfiguration & config_, - const String & config_prefix_, - Block & sample_block_) - : RedisDictionarySource( - dict_struct_, - config_.getString(config_prefix_ + ".host"), - config_.getUInt(config_prefix_ + ".port"), - config_.getUInt(config_prefix_ + ".db_index", 0), - config_.getString(config_prefix_ + ".password",""), - parseStorageType(config_.getString(config_prefix_ + ".storage_type", "")), - sample_block_) - { - } - - RedisDictionarySource::RedisDictionarySource(const RedisDictionarySource & other) - : RedisDictionarySource{other.dict_struct, - other.host, - other.port, - other.db_index, - other.password, - other.storage_type, - other.sample_block} + : RedisDictionarySource(other.dict_struct, other.configuration, other.sample_block) { } - RedisDictionarySource::~RedisDictionarySource() = default; static String storageTypeToKeyType(RedisStorageType type) @@ -161,24 +122,25 @@ namespace DB Pipe RedisDictionarySource::loadAll() { - if (!client->isConnected()) - client->connect(host, port); + auto connection = getConnection(); RedisCommand command_for_keys("KEYS"); command_for_keys << "*"; /// Get only keys for specified storage type. - auto all_keys = client->execute(command_for_keys); + auto all_keys = connection->client->execute(command_for_keys); if (all_keys.isNull()) - return Pipe(std::make_shared(client, RedisArray{}, storage_type, sample_block, max_block_size)); + return Pipe(std::make_shared( + std::move(connection), RedisArray{}, + configuration.storage_type, sample_block, REDIS_MAX_BLOCK_SIZE)); RedisArray keys; - auto key_type = storageTypeToKeyType(storage_type); + auto key_type = storageTypeToKeyType(configuration.storage_type); for (const auto & key : all_keys) - if (key_type == client->execute(RedisCommand("TYPE").addRedisType(key))) + if (key_type == connection->client->execute(RedisCommand("TYPE").addRedisType(key))) keys.addRedisType(std::move(key)); - if (storage_type == RedisStorageType::HASH_MAP) + if (configuration.storage_type == RedisStorageType::HASH_MAP) { RedisArray hkeys; for (const auto & key : keys) @@ -186,7 +148,7 @@ namespace DB RedisCommand command_for_secondary_keys("HKEYS"); command_for_secondary_keys.addRedisType(key); - auto secondary_keys = client->execute(command_for_secondary_keys); + auto secondary_keys = connection->client->execute(command_for_secondary_keys); RedisArray primary_with_secondary; primary_with_secondary.addRedisType(key); @@ -194,7 +156,7 @@ namespace DB { primary_with_secondary.addRedisType(secondary_key); /// Do not store more than max_block_size values for one request. - if (primary_with_secondary.size() == max_block_size + 1) + if (primary_with_secondary.size() == REDIS_MAX_BLOCK_SIZE + 1) { hkeys.add(primary_with_secondary); primary_with_secondary.clear(); @@ -209,16 +171,16 @@ namespace DB keys = std::move(hkeys); } - return Pipe(std::make_shared(client, std::move(keys), storage_type, sample_block, max_block_size)); + return Pipe(std::make_shared( + std::move(connection), std::move(keys), + configuration.storage_type, sample_block, REDIS_MAX_BLOCK_SIZE)); } - Pipe RedisDictionarySource::loadIds(const std::vector & ids) { - if (!client->isConnected()) - client->connect(host, port); + auto connection = getConnection(); - if (storage_type == RedisStorageType::HASH_MAP) + if (configuration.storage_type == RedisStorageType::HASH_MAP) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot use loadIds with 'hash_map' storage type"); if (!dict_struct.id) @@ -229,13 +191,14 @@ namespace DB for (UInt64 id : ids) keys << DB::toString(id); - return Pipe(std::make_shared(client, std::move(keys), storage_type, sample_block, max_block_size)); + return Pipe(std::make_shared( + std::move(connection), std::move(keys), + configuration.storage_type, sample_block, REDIS_MAX_BLOCK_SIZE)); } Pipe RedisDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) { - if (!client->isConnected()) - client->connect(host, port); + auto connection = getConnection(); if (key_columns.size() != dict_struct.key->size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of key_columns does not equal to the size of dictionary key"); @@ -250,7 +213,7 @@ namespace DB if (isInteger(type)) key << DB::toString(key_columns[i]->get64(row)); else if (isString(type)) - key << get((*key_columns[i])[row]); + key << get((*key_columns[i])[row]); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of key in Redis dictionary"); } @@ -258,22 +221,65 @@ namespace DB keys.add(key); } - return Pipe(std::make_shared(client, std::move(keys), storage_type, sample_block, max_block_size)); + return Pipe(std::make_shared( + std::move(connection), std::move(keys), + configuration.storage_type, sample_block, REDIS_MAX_BLOCK_SIZE)); } - String RedisDictionarySource::toString() const { - return "Redis: " + host + ':' + DB::toString(port); + return "Redis: " + configuration.host + ':' + DB::toString(configuration.port); } - RedisStorageType RedisDictionarySource::parseStorageType(const String & storage_type_str) + RedisDictionarySource::ConnectionPtr RedisDictionarySource::getConnection() const { - if (storage_type_str == "hash_map") - return RedisStorageType::HASH_MAP; - else if (!storage_type_str.empty() && storage_type_str != "simple") - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown storage type {} for Redis dictionary", storage_type_str); + ClientPtr client; + bool ok = pool->tryBorrowObject(client, + [] { return std::make_unique(); }, + REDIS_LOCK_ACQUIRE_TIMEOUT_MS); - return RedisStorageType::SIMPLE; + if (!ok) + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, + "Could not get connection from pool, timeout exceeded {} seconds", + REDIS_LOCK_ACQUIRE_TIMEOUT_MS); + + if (!client->isConnected()) + { + try + { + client->connect(configuration.host, configuration.port); + + if (!configuration.password.empty()) + { + RedisCommand command("AUTH"); + command << configuration.password; + String reply = client->execute(command); + if (reply != "OK") + throw Exception(ErrorCodes::INTERNAL_REDIS_ERROR, + "Authentication failed with reason {}", reply); + } + + if (configuration.db_index != 0) + { + RedisCommand command("SELECT"); + command << std::to_string(configuration.db_index); + String reply = client->execute(command); + if (reply != "OK") + throw Exception(ErrorCodes::INTERNAL_REDIS_ERROR, + "Selecting database with index {} failed with reason {}", + configuration.db_index, reply); + } + } + catch (...) + { + if (client->isConnected()) + client->disconnect(); + + pool->returnObject(std::move(client)); + throw; + } + } + + return std::make_unique(pool, std::move(client)); } } diff --git a/src/Dictionaries/RedisDictionarySource.h b/src/Dictionaries/RedisDictionarySource.h index eff97dede0c7..af12981f348d 100644 --- a/src/Dictionaries/RedisDictionarySource.h +++ b/src/Dictionaries/RedisDictionarySource.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "DictionaryStructure.h" #include "IDictionarySource.h" @@ -20,13 +21,13 @@ namespace Poco } } - namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} + namespace ErrorCodes + { + extern const int NOT_IMPLEMENTED; + } + enum class RedisStorageType { SIMPLE, @@ -36,24 +37,46 @@ namespace ErrorCodes class RedisDictionarySource final : public IDictionarySource { - RedisDictionarySource( - const DictionaryStructure & dict_struct, - const std::string & host, - UInt16 port, - UInt8 db_index, - const std::string & password, - RedisStorageType storage_type, - const Block & sample_block); - public: using RedisArray = Poco::Redis::Array; using RedisCommand = Poco::Redis::Command; + using ClientPtr = std::unique_ptr; + using Pool = BorrowedObjectPool; + using PoolPtr = std::shared_ptr; + + struct Configuration + { + const std::string host; + const UInt16 port; + const UInt32 db_index; + const std::string password; + const RedisStorageType storage_type; + const size_t pool_size; + }; + + struct Connection + { + Connection(PoolPtr pool_, ClientPtr client_) + : pool(std::move(pool_)), client(std::move(client_)) + { + } + + ~Connection() + { + pool->returnObject(std::move(client)); + } + + PoolPtr pool; + ClientPtr client; + }; + + using ConnectionPtr = std::unique_ptr; + RedisDictionarySource( - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Block & sample_block); + const DictionaryStructure & dict_struct_, + const Configuration & configuration_, + const Block & sample_block_); RedisDictionarySource(const RedisDictionarySource & other); @@ -81,17 +104,12 @@ namespace ErrorCodes std::string toString() const override; private: - static RedisStorageType parseStorageType(const std::string& storage_type); + ConnectionPtr getConnection() const; const DictionaryStructure dict_struct; - const std::string host; - const UInt16 port; - const UInt8 db_index; - const std::string password; - const RedisStorageType storage_type; - Block sample_block; + const Configuration configuration; - std::shared_ptr client; + PoolPtr pool; + Block sample_block; }; - } diff --git a/src/Dictionaries/RedisSource.cpp b/src/Dictionaries/RedisSource.cpp index ad5cf8a0977b..6089b836d98b 100644 --- a/src/Dictionaries/RedisSource.cpp +++ b/src/Dictionaries/RedisSource.cpp @@ -30,20 +30,22 @@ namespace DB RedisSource::RedisSource( - const std::shared_ptr & client_, - const RedisArray & keys_, - const RedisStorageType & storage_type_, - const DB::Block & sample_block, - const size_t max_block_size_) - : SourceWithProgress(sample_block) - , client(client_), keys(keys_), storage_type(storage_type_), max_block_size{max_block_size_} + ConnectionPtr connection_, + const RedisArray & keys_, + const RedisStorageType & storage_type_, + const DB::Block & sample_block, + size_t max_block_size_) + : SourceWithProgress(sample_block) + , connection(std::move(connection_)) + , keys(keys_) + , storage_type(storage_type_) + , max_block_size{max_block_size_} { description.init(sample_block); } RedisSource::~RedisSource() = default; - namespace { using ValueType = ExternalResultDescription::ValueType; @@ -121,7 +123,6 @@ namespace DB } } - Chunk RedisSource::generate() { if (keys.isNull() || description.sample_block.rows() == 0 || cursor >= keys.size()) @@ -168,7 +169,7 @@ namespace DB for (const auto & elem : keys_array) command_for_values.addRedisType(elem); - auto values = client->execute(command_for_values); + auto values = connection->client->execute(command_for_values); if (keys_array.size() != values.size() + 1) // 'HMGET' primary_key secondary_keys throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, @@ -199,7 +200,7 @@ namespace DB for (size_t i = 0; i < need_values; ++i) command_for_values.add(keys.get(cursor + i)); - auto values = client->execute(command_for_values); + auto values = connection->client->execute(command_for_values); if (values.size() != need_values) throw Exception(ErrorCodes::INTERNAL_REDIS_ERROR, "Inconsistent sizes of keys and values in Redis request"); diff --git a/src/Dictionaries/RedisSource.h b/src/Dictionaries/RedisSource.h index db2e643eb4e2..24507998f581 100644 --- a/src/Dictionaries/RedisSource.h +++ b/src/Dictionaries/RedisSource.h @@ -24,13 +24,14 @@ namespace DB public: using RedisArray = Poco::Redis::Array; using RedisBulkString = Poco::Redis::BulkString; + using ConnectionPtr = RedisDictionarySource::ConnectionPtr; RedisSource( - const std::shared_ptr & client_, - const Poco::Redis::Array & keys_, - const RedisStorageType & storage_type_, - const Block & sample_block, - const size_t max_block_size); + ConnectionPtr connection_, + const Poco::Redis::Array & keys_, + const RedisStorageType & storage_type_, + const Block & sample_block, + size_t max_block_size); ~RedisSource() override; @@ -39,7 +40,7 @@ namespace DB private: Chunk generate() override; - std::shared_ptr client; + ConnectionPtr connection; Poco::Redis::Array keys; RedisStorageType storage_type; const size_t max_block_size; diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index 7c7dc838436a..292e60f17f9e 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include + namespace CurrentMetrics { extern const Metric Write; @@ -1092,7 +1094,7 @@ class SSDCacheDictionaryStorage final : public ICacheDictionaryStorage } /// Sort blocks by offset before start async io requests - std::sort(blocks_to_request.begin(), blocks_to_request.end()); + ::sort(blocks_to_request.begin(), blocks_to_request.end()); file_buffer.fetchBlocks(configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data) { diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index ab7cf65eb8bc..f08abcdc5167 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace DB @@ -120,7 +121,7 @@ std::string XDBCDictionarySource::getUpdateFieldAndDate() Pipe XDBCDictionarySource::loadAll() { - LOG_TRACE(log, load_all_query); + LOG_TRACE(log, fmt::runtime(load_all_query)); return loadFromQuery(bridge_url, sample_block, load_all_query); } @@ -129,7 +130,7 @@ Pipe XDBCDictionarySource::loadUpdatedAll() { std::string load_query_update = getUpdateFieldAndDate(); - LOG_TRACE(log, load_query_update); + LOG_TRACE(log, fmt::runtime(load_query_update)); return loadFromQuery(bridge_url, sample_block, load_query_update); } diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index f741b8242f59..da27eff0b545 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -8,15 +8,22 @@ namespace DB { /** - * Write buffer with possibility to set and invoke callback after 'finalize' call. + * This buffer writes to cache, but after finalize() copy written file from cache to disk. */ -class CompletionAwareWriteBuffer : public WriteBufferFromFileDecorator +class WritingToCacheWriteBuffer final : public WriteBufferFromFileDecorator { public: - CompletionAwareWriteBuffer(std::unique_ptr impl_, std::function completion_callback_) - : WriteBufferFromFileDecorator(std::move(impl_)), completion_callback(completion_callback_) { } + WritingToCacheWriteBuffer( + std::unique_ptr impl_, + std::function()> create_read_buffer_, + std::function()> create_write_buffer_) + : WriteBufferFromFileDecorator(std::move(impl_)) + , create_read_buffer(std::move(create_read_buffer_)) + , create_write_buffer(std::move(create_write_buffer_)) + { + } - virtual ~CompletionAwareWriteBuffer() override + virtual ~WritingToCacheWriteBuffer() override { try { @@ -28,15 +35,36 @@ class CompletionAwareWriteBuffer : public WriteBufferFromFileDecorator } } + void preFinalize() override + { + impl->next(); + impl->preFinalize(); + impl->finalize(); + + read_buffer = create_read_buffer(); + write_buffer = create_write_buffer(); + copyData(*read_buffer, *write_buffer); + write_buffer->next(); + write_buffer->preFinalize(); + + is_prefinalized = true; + } + void finalizeImpl() override { - WriteBufferFromFileDecorator::finalizeImpl(); + if (!is_prefinalized) + preFinalize(); - completion_callback(); + write_buffer->finalize(); } private: - const std::function completion_callback; + std::function()> create_read_buffer; + std::function()> create_write_buffer; + std::unique_ptr read_buffer; + std::unique_ptr write_buffer; + + bool is_prefinalized = false; }; enum FileDownloadStatus @@ -65,8 +93,9 @@ std::shared_ptr DiskCacheWrapper::acquireDownloadMetadata( std::unique_lock lock{mutex}; auto it = file_downloads.find(path); - if (it != file_downloads.end() && !it->second.expired()) - return it->second.lock(); + if (it != file_downloads.end()) + if (auto x = it->second.lock()) + return x; std::shared_ptr metadata( new FileDownloadMetadata, @@ -164,21 +193,22 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode if (!cache_file_predicate(path)) return DiskDecorator::writeFile(path, buf_size, mode); - LOG_TRACE(log, "Write file {} to cache", backQuote(path)); + LOG_TEST(log, "Write file {} to cache", backQuote(path)); auto dir_path = directoryPath(path); if (!cache_disk->exists(dir_path)) cache_disk->createDirectories(dir_path); - return std::make_unique( + return std::make_unique( cache_disk->writeFile(path, buf_size, mode), - [this, path, buf_size, mode]() + [this, path]() { /// Copy file from cache to actual disk when cached buffer is finalized. - auto src_buffer = cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {}); - auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode); - copyData(*src_buffer, *dst_buffer); - dst_buffer->finalize(); + return cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {}); + }, + [this, path, buf_size, mode]() + { + return DiskDecorator::writeFile(path, buf_size, mode); }); } diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index d4acb6fab0df..37911f169130 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -151,6 +151,11 @@ void DiskDecorator::removeSharedFile(const String & path, bool keep_s3) delegate->removeSharedFile(path, keep_s3); } +void DiskDecorator::removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) +{ + delegate->removeSharedFiles(files, keep_in_remote_fs); +} + void DiskDecorator::removeSharedRecursive(const String & path, bool keep_s3) { delegate->removeSharedRecursive(path, keep_s3); diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index ff4f16fdf3db..0bdfffa8f01a 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -52,6 +52,7 @@ class DiskDecorator : public IDisk void removeRecursive(const String & path) override; void removeSharedFile(const String & path, bool keep_s3) override; void removeSharedRecursive(const String & path, bool keep_s3) override; + void removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) override; void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; Poco::Timestamp getLastModified(const String & path) override; void setReadOnly(const String & path) override; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 3428a9aef543..cbdf6d6440b8 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -11,6 +11,14 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include namespace CurrentMetrics { @@ -25,10 +33,11 @@ namespace ErrorCodes extern const int UNKNOWN_ELEMENT_IN_CONFIG; extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int PATH_ACCESS_DENIED; - extern const int INCORRECT_DISK_INDEX; + extern const int LOGICAL_ERROR; extern const int CANNOT_TRUNCATE_FILE; extern const int CANNOT_UNLINK; extern const int CANNOT_RMDIR; + extern const int BAD_ARGUMENTS; } std::mutex DiskLocal::reservation_mutex; @@ -60,9 +69,6 @@ static void loadDiskLocalConfig(const String & name, throw Exception("Disk path must end with /. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); } - if (!FS::canRead(path) || !FS::canWrite(path)) - throw Exception("There is no RW access to the disk " + name + " (" + path + ")", ErrorCodes::PATH_ACCESS_DENIED); - bool has_space_ratio = config.has(config_prefix + ".keep_free_space_ratio"); if (config.has(config_prefix + ".keep_free_space_bytes") && has_space_ratio) @@ -112,13 +118,48 @@ class DiskLocalReservation : public IReservation UInt64 getSize() const override { return size; } - DiskPtr getDisk(size_t i) const override; + DiskPtr getDisk(size_t i) const override + { + if (i != 0) + throw Exception("Can't use i != 0 with single disk reservation. It's a bug", ErrorCodes::LOGICAL_ERROR); + return disk; + } Disks getDisks() const override { return {disk}; } - void update(UInt64 new_size) override; + void update(UInt64 new_size) override + { + std::lock_guard lock(DiskLocal::reservation_mutex); + disk->reserved_bytes -= size; + size = new_size; + disk->reserved_bytes += size; + } - ~DiskLocalReservation() override; + ~DiskLocalReservation() override + { + try + { + std::lock_guard lock(DiskLocal::reservation_mutex); + if (disk->reserved_bytes < size) + { + disk->reserved_bytes = 0; + LOG_ERROR(&Poco::Logger::get("DiskLocal"), "Unbalanced reservations size for disk '{}'.", disk->getName()); + } + else + { + disk->reserved_bytes -= size; + } + + if (disk->reservation_count == 0) + LOG_ERROR(&Poco::Logger::get("DiskLocal"), "Unbalanced reservation count for disk '{}'.", disk->getName()); + else + --disk->reservation_count; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } private: DiskLocalPtr disk; @@ -187,7 +228,7 @@ bool DiskLocal::tryReserve(UInt64 bytes) return false; } -UInt64 DiskLocal::getTotalSpace() const +static UInt64 getTotalSpaceByName(const String & name, const String & disk_path, UInt64 keep_free_space_bytes) { struct statvfs fs; if (name == "default") /// for default disk we get space from path/data/ @@ -200,8 +241,17 @@ UInt64 DiskLocal::getTotalSpace() const return total_size - keep_free_space_bytes; } +UInt64 DiskLocal::getTotalSpace() const +{ + if (broken || readonly) + return 0; + return getTotalSpaceByName(name, disk_path, keep_free_space_bytes); +} + UInt64 DiskLocal::getAvailableSpace() const { + if (broken || readonly) + return 0; /// we use f_bavail, because part of b_free space is /// available for superuser only and for system purposes struct statvfs fs; @@ -267,7 +317,7 @@ void DiskLocal::moveDirectory(const String & from_path, const String & to_path) DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path) { fs::path meta_path = fs::path(disk_path) / path; - if (fs::exists(meta_path) && fs::is_directory(meta_path)) + if (!broken && fs::exists(meta_path) && fs::is_directory(meta_path)) return std::make_unique(disk_path, path); else return std::make_unique(); @@ -408,49 +458,191 @@ void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & confi keep_free_space_bytes = new_keep_free_space_bytes; } -DiskPtr DiskLocalReservation::getDisk(size_t i) const +DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_) + : name(name_) + , disk_path(path_) + , keep_free_space_bytes(keep_free_space_bytes_) + , logger(&Poco::Logger::get("DiskLocal")) { - if (i != 0) +} + +DiskLocal::DiskLocal( + const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, UInt64 local_disk_check_period_ms) + : DiskLocal(name_, path_, keep_free_space_bytes_) +{ + if (local_disk_check_period_ms > 0) + disk_checker = std::make_unique(this, context, local_disk_check_period_ms); +} + +void DiskLocal::startup() +{ + try + { + broken = false; + disk_checker_magic_number = -1; + disk_checker_can_check_read = true; + readonly = !setup(); + } + catch (...) { - throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); + tryLogCurrentException(logger, fmt::format("Disk {} is marked as broken during startup", name)); + broken = true; + /// Disk checker is disabled when failing to start up. + disk_checker_can_check_read = false; } - return disk; + if (disk_checker && disk_checker_can_check_read) + disk_checker->startup(); } -void DiskLocalReservation::update(UInt64 new_size) +void DiskLocal::shutdown() { - std::lock_guard lock(DiskLocal::reservation_mutex); - disk->reserved_bytes -= size; - size = new_size; - disk->reserved_bytes += size; + if (disk_checker) + disk_checker->shutdown(); +} + +std::optional DiskLocal::readDiskCheckerMagicNumber() const noexcept +try +{ + ReadSettings read_settings; + /// Proper disk read checking requires direct io + read_settings.direct_io_threshold = 1; + auto buf = readFile(disk_checker_path, read_settings, {}, {}); + UInt32 magic_number; + readIntBinary(magic_number, *buf); + if (buf->eof()) + return magic_number; + LOG_WARNING(logger, "The size of disk check magic number is more than 4 bytes. Mark it as read failure"); + return {}; +} +catch (...) +{ + tryLogCurrentException(logger, fmt::format("Cannot read correct disk check magic number from from {}{}", disk_path, disk_checker_path)); + return {}; +} + +bool DiskLocal::canRead() const noexcept +try +{ + if (FS::canRead(fs::path(disk_path) / disk_checker_path)) + { + auto magic_number = readDiskCheckerMagicNumber(); + if (magic_number && *magic_number == disk_checker_magic_number) + return true; + } + return false; +} +catch (...) +{ + LOG_WARNING(logger, "Cannot achieve read over the disk directory: {}", disk_path); + return false; +} + +struct DiskWriteCheckData +{ + constexpr static size_t PAGE_SIZE = 4096; + char data[PAGE_SIZE]{}; + DiskWriteCheckData() + { + static const char * magic_string = "ClickHouse disk local write check"; + static size_t magic_string_len = strlen(magic_string); + memcpy(data, magic_string, magic_string_len); + memcpy(data + PAGE_SIZE - magic_string_len, magic_string, magic_string_len); + } +}; + +bool DiskLocal::canWrite() const noexcept +try +{ + static DiskWriteCheckData data; + String tmp_template = fs::path(disk_path) / ""; + { + auto buf = WriteBufferFromTemporaryFile::create(tmp_template); + buf->write(data.data, data.PAGE_SIZE); + buf->sync(); + } + return true; +} +catch (...) +{ + LOG_WARNING(logger, "Cannot achieve write over the disk directory: {}", disk_path); + return false; } -DiskLocalReservation::~DiskLocalReservation() +bool DiskLocal::setup() { try { - std::lock_guard lock(DiskLocal::reservation_mutex); - if (disk->reserved_bytes < size) + fs::create_directories(disk_path); + } + catch (...) + { + LOG_ERROR(logger, "Cannot create the directory of disk {} ({}).", name, disk_path); + throw; + } + + try + { + if (!FS::canRead(disk_path)) + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "There is no read access to disk {} ({}).", name, disk_path); + } + catch (...) + { + LOG_ERROR(logger, "Cannot gain read access of the disk directory: {}", disk_path); + throw; + } + + /// If disk checker is disabled, just assume RW by default. + if (!disk_checker) + return true; + + try + { + if (exists(disk_checker_path)) { - disk->reserved_bytes = 0; - LOG_ERROR(disk->log, "Unbalanced reservations size for disk '{}'.", disk->getName()); + auto magic_number = readDiskCheckerMagicNumber(); + if (magic_number) + disk_checker_magic_number = *magic_number; + else + { + /// The checker file is incorrect. Mark the magic number to uninitialized and try to generate a new checker file. + disk_checker_magic_number = -1; + } } - else + } + catch (...) + { + LOG_ERROR(logger, "We cannot tell if {} exists anymore, or read from it. Most likely disk {} is broken", disk_checker_path, name); + throw; + } + + /// Try to create a new checker file. The disk status can be either broken or readonly. + if (disk_checker_magic_number == -1) + try + { + pcg32_fast rng(randomSeed()); + UInt32 magic_number = rng(); { - disk->reserved_bytes -= size; + auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + writeIntBinary(magic_number, *buf); } - - if (disk->reservation_count == 0) - LOG_ERROR(disk->log, "Unbalanced reservation count for disk '{}'.", disk->getName()); - else - --disk->reservation_count; + disk_checker_magic_number = magic_number; } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + LOG_WARNING( + logger, + "Cannot create/write to {0}. Disk {1} is either readonly or broken. Without setting up disk checker file, DiskLocalCheckThread " + "will not be started. Disk is assumed to be RW. Try manually fix the disk and do `SYSTEM RESTART DISK {1}`", + disk_checker_path, + name); + disk_checker_can_check_read = false; + return true; } -} + if (disk_checker_magic_number == -1) + throw Exception("disk_checker_magic_number is not initialized. It's a bug", ErrorCodes::LOGICAL_ERROR); + return true; +} void registerDiskLocal(DiskFactory & factory) { @@ -458,11 +650,20 @@ void registerDiskLocal(DiskFactory & factory) const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const DisksMap & /*map*/) -> DiskPtr { + const DisksMap & map) -> DiskPtr + { String path; UInt64 keep_free_space_bytes; loadDiskLocalConfig(name, config, config_prefix, context, path, keep_free_space_bytes); - return std::make_shared(name, path, keep_free_space_bytes); + + for (const auto & [disk_name, disk_ptr] : map) + if (path == disk_ptr->getPath()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk {} and disk {} cannot have the same path ({})", name, disk_name, path); + + std::shared_ptr disk + = std::make_shared(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0)); + disk->startup(); + return std::make_shared(disk); }; factory.registerDiskType("local", creator); } diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index f16497ae4322..76d5a88a626c 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -10,24 +11,22 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} class DiskLocalReservation; class DiskLocal : public IDisk { public: + friend class DiskLocalCheckThread; friend class DiskLocalReservation; - DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_) - : name(name_), disk_path(path_), keep_free_space_bytes(keep_free_space_bytes_) - { - if (disk_path.back() != '/') - throw Exception("Disk path must end with '/', but '" + disk_path + "' doesn't.", ErrorCodes::LOGICAL_ERROR); - } + DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_); + DiskLocal( + const String & name_, + const String & path_, + UInt64 keep_free_space_bytes_, + ContextPtr context, + UInt64 local_disk_check_period_ms); const String & getName() const override { return name; } @@ -106,13 +105,33 @@ class DiskLocal : public IDisk void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &) override; + bool isBroken() const override { return broken; } + + void startup() override; + + void shutdown() override; + + /// Check if the disk is OK to proceed read/write operations. Currently the check is + /// rudimentary. The more advanced choice would be using + /// https://github.com/smartmontools/smartmontools. However, it's good enough for now. + bool canRead() const noexcept; + bool canWrite() const noexcept; + private: bool tryReserve(UInt64 bytes); -private: + /// Setup disk for healthy check. Returns true if it's read-write, false if read-only. + /// Throw exception if it's not possible to setup necessary files and directories. + bool setup(); + + /// Read magic number from disk checker file. Return std::nullopt if exception happens. + std::optional readDiskCheckerMagicNumber() const noexcept; + const String name; const String disk_path; + const String disk_checker_path = ".disk_checker_file"; std::atomic keep_free_space_bytes; + Poco::Logger * logger; UInt64 reserved_bytes = 0; UInt64 reservation_count = 0; @@ -120,6 +139,14 @@ class DiskLocal : public IDisk static std::mutex reservation_mutex; Poco::Logger * log = &Poco::Logger::get("DiskLocal"); + + std::atomic broken{false}; + std::atomic readonly{false}; + std::unique_ptr disk_checker; + /// A magic number to vaguely check if reading operation generates correct result. + /// -1 means there is no available disk_checker_file yet. + Int64 disk_checker_magic_number = -1; + bool disk_checker_can_check_read = true; }; diff --git a/src/Disks/DiskLocalCheckThread.cpp b/src/Disks/DiskLocalCheckThread.cpp new file mode 100644 index 000000000000..9ebedebccc2d --- /dev/null +++ b/src/Disks/DiskLocalCheckThread.cpp @@ -0,0 +1,70 @@ +#include + +#include +#include +#include + +namespace DB +{ +static const auto DISK_CHECK_ERROR_SLEEP_MS = 1000; +static const auto DISK_CHECK_ERROR_RETRY_TIME = 3; + +DiskLocalCheckThread::DiskLocalCheckThread(DiskLocal * disk_, ContextPtr context_, UInt64 local_disk_check_period_ms) + : WithContext(context_) + , disk(std::move(disk_)) + , check_period_ms(local_disk_check_period_ms) + , log(&Poco::Logger::get(fmt::format("DiskLocalCheckThread({})", disk->getName()))) +{ + task = getContext()->getSchedulePool().createTask(log->name(), [this] { run(); }); +} + +void DiskLocalCheckThread::startup() +{ + need_stop = false; + retry = 0; + task->activateAndSchedule(); +} + +void DiskLocalCheckThread::run() +{ + if (need_stop) + return; + + bool can_read = disk->canRead(); + bool can_write = disk->canWrite(); + if (can_read) + { + if (disk->broken) + LOG_INFO(log, "Disk {0} seems to be fine. It can be recovered using `SYSTEM RESTART DISK {0}`", disk->getName()); + retry = 0; + if (can_write) + disk->readonly = false; + else + { + disk->readonly = true; + LOG_INFO(log, "Disk {} is readonly", disk->getName()); + } + task->scheduleAfter(check_period_ms); + } + else if (!disk->broken && retry < DISK_CHECK_ERROR_RETRY_TIME) + { + ++retry; + task->scheduleAfter(DISK_CHECK_ERROR_SLEEP_MS); + } + else + { + retry = 0; + disk->broken = true; + LOG_INFO(log, "Disk {} is broken", disk->getName()); + task->scheduleAfter(check_period_ms); + } +} + +void DiskLocalCheckThread::shutdown() +{ + need_stop = true; + task->deactivate(); + LOG_TRACE(log, "DiskLocalCheck thread finished"); +} + +} diff --git a/src/Disks/DiskLocalCheckThread.h b/src/Disks/DiskLocalCheckThread.h new file mode 100644 index 000000000000..eb688d599ca0 --- /dev/null +++ b/src/Disks/DiskLocalCheckThread.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +namespace Poco +{ +class Logger; +} + +namespace DB +{ +class DiskLocal; + +class DiskLocalCheckThread : WithContext +{ +public: + friend class DiskLocal; + + DiskLocalCheckThread(DiskLocal * disk_, ContextPtr context_, UInt64 local_disk_check_period_ms); + + void startup(); + + void shutdown(); + +private: + bool check(); + void run(); + + DiskLocal * disk; + size_t check_period_ms; + Poco::Logger * log; + std::atomic need_stop{false}; + + BackgroundSchedulePool::TaskHolder task; + size_t retry{}; +}; + +} diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 9bd595130406..fe9dd8421b15 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -234,6 +234,12 @@ void DiskRestartProxy::removeSharedFile(const String & path, bool keep_s3) DiskDecorator::removeSharedFile(path, keep_s3); } +void DiskRestartProxy::removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) +{ + ReadLock lock (mutex); + DiskDecorator::removeSharedFiles(files, keep_in_remote_fs); +} + void DiskRestartProxy::removeSharedRecursive(const String & path, bool keep_s3) { ReadLock lock (mutex); diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h index 3644539e9413..30f553f4fe0b 100644 --- a/src/Disks/DiskRestartProxy.h +++ b/src/Disks/DiskRestartProxy.h @@ -54,6 +54,7 @@ class DiskRestartProxy : public DiskDecorator void removeDirectory(const String & path) override; void removeRecursive(const String & path) override; void removeSharedFile(const String & path, bool keep_s3) override; + void removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) override; void removeSharedRecursive(const String & path, bool keep_s3) override; void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; Poco::Timestamp getLastModified(const String & path) override; diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 5c117c1b69db..4c80b128b4bb 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -40,7 +40,12 @@ DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, con disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); } if (!has_default_disk) - disks.emplace(default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0)); + { + disks.emplace( + default_disk_name, + std::make_shared( + default_disk_name, context->getPath(), 0, context, config.getUInt("local_disk_check_period_ms", 0))); + } } @@ -96,7 +101,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig( } writeString(" disappeared from configuration, this change will be applied after restart of ClickHouse", warning); - LOG_WARNING(&Poco::Logger::get("DiskSelector"), warning.str()); + LOG_WARNING(&Poco::Logger::get("DiskSelector"), fmt::runtime(warning.str())); } return result; diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 547522154412..0cd1267c6ef5 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -37,6 +37,12 @@ class DiskSelector disks.emplace(name, disk); } + void shutdown() + { + for (auto & e : disks) + e.second->shutdown(); + } + private: DisksMap disks; }; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 665a35459c7c..223d2d48e305 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -197,6 +197,32 @@ class IDisk : public Space /// Second bool param is a flag to remove (true) or keep (false) shared data on S3 virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); } + struct RemoveRequest + { + String path; + bool if_exists = false; + + explicit RemoveRequest(String path_, bool if_exists_ = false) + : path(std::move(path_)), if_exists(std::move(if_exists_)) + { + } + }; + + using RemoveBatchRequest = std::vector; + + /// Batch request to remove multiple files. + /// May be much faster for blob storage. + virtual void removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) + { + for (const auto & file : files) + { + if (file.if_exists) + removeSharedFileIfExists(file.path, keep_in_remote_fs); + else + removeSharedFile(file.path, keep_in_remote_fs); + } + } + /// Set last modified time to file or directory at `path`. virtual void setLastModified(const String & path, const Poco::Timestamp & timestamp) = 0; @@ -224,6 +250,9 @@ class IDisk : public Space virtual bool isReadOnly() const { return false; } + /// Check if disk is broken. Broken disks will have 0 space and not be used. + virtual bool isBroken() const { return false; } + /// Invoked when Global Context is shutdown. virtual void shutdown() {} diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index 706f0f84f325..05aa4d3350b5 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -361,6 +361,19 @@ void IDiskRemote::removeSharedFileIfExists(const String & path, bool keep_in_rem } } +void IDiskRemote::removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) +{ + RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); + for (const auto & file : files) + { + bool skip = file.if_exists && !metadata_disk->exists(file.path); + if (!skip) + removeMeta(file.path, fs_paths_keeper); + } + + if (!keep_in_remote_fs) + removeFromRemoteFS(fs_paths_keeper); +} void IDiskRemote::removeSharedRecursive(const String & path, bool keep_in_remote_fs) { @@ -531,4 +544,12 @@ UInt32 IDiskRemote::getRefCount(const String & path) const return meta.ref_count; } +ThreadPool & IDiskRemote::getThreadPoolWriter() +{ + constexpr size_t pool_size = 100; + constexpr size_t queue_size = 1000000; + static ThreadPool writer(pool_size, pool_size, queue_size); + return writer; +} + } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index c4f475f5b3ed..8bb93cc345d6 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -98,6 +98,8 @@ friend class DiskRemoteReservation; void removeSharedFileIfExists(const String & path, bool keep_in_remote_fs) override; + void removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) override; + void removeSharedRecursive(const String & path, bool keep_in_remote_fs) override; void listFiles(const String & path, std::vector & file_names) override; @@ -135,6 +137,7 @@ friend class DiskRemoteReservation; virtual RemoteFSPathKeeperPtr createFSPathKeeper() const = 0; static AsynchronousReaderPtr getThreadPoolReader(); + static ThreadPool & getThreadPoolWriter(); virtual std::unique_ptr readMetaFile( const String & path, diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index c8484e6088df..184fcfe6f8cc 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -243,7 +243,7 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence prefetch_future = {}; } - pos = working_buffer.end(); + resetWorkingBuffer(); /** * Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer. diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index c21a55d68acf..cbf265ce7412 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -64,7 +64,7 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) throw Exception("Only SEEK_SET or SEEK_CUR modes are allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); impl->reset(); - pos = working_buffer.end(); + resetWorkingBuffer(); return impl->file_offset_of_buffer_end; } diff --git a/src/Disks/IVolume.h b/src/Disks/IVolume.h index d26ddea787d2..843ecc8f5e04 100644 --- a/src/Disks/IVolume.h +++ b/src/Disks/IVolume.h @@ -60,6 +60,7 @@ class IVolume : public Space DiskPtr getDisk() const { return getDisk(0); } virtual DiskPtr getDisk(size_t i) const { return disks[i]; } + Disks & getDisks() { return disks; } const Disks & getDisks() const { return disks; } /// Returns effective value of whether merges are allowed on this volume (true) or not (false). diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index ed960528abef..2638365c7ad4 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -9,6 +9,7 @@ #include +#include #include #include @@ -262,6 +263,21 @@ std::unique_ptr DiskS3::writeFile(const String & path, LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + s3_path); + ScheduleFunc schedule = [pool = &getThreadPoolWriter()](auto callback) + { + pool->scheduleOrThrow([callback = std::move(callback), thread_group = CurrentThread::getGroup()]() + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + ); + callback(); + }); + }; + auto s3_buffer = std::make_unique( settings->client, bucket, @@ -269,7 +285,8 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_min_upload_part_size, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size); + buf_size, + std::move(schedule)); return std::make_unique>(std::move(s3_buffer), std::move(metadata), s3_path); } diff --git a/src/Disks/S3/ProxyResolverConfiguration.cpp b/src/Disks/S3/ProxyResolverConfiguration.cpp index 0fc7a9a1fa8e..eeac54163b1d 100644 --- a/src/Disks/S3/ProxyResolverConfiguration.cpp +++ b/src/Disks/S3/ProxyResolverConfiguration.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB::ErrorCodes { @@ -44,13 +45,36 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig Poco::Timespan(1000000), /// Send timeout. Poco::Timespan(1000000) /// Receive timeout. ); - auto session = makeHTTPSession(endpoint, timeouts); try { /// It should be just empty GET request. Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1); - session->sendRequest(request); + + const auto & host = endpoint.getHost(); + auto resolved_hosts = DNSResolver::instance().resolveHostAll(host); + + if (resolved_hosts.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver cannot resolve host {}", host); + + HTTPSessionPtr session; + + for (size_t i = 0; i < resolved_hosts.size(); ++i) + { + auto resolved_endpoint = endpoint; + resolved_endpoint.setHost(resolved_hosts[i].toString()); + session = makeHTTPSession(endpoint, timeouts, false); + + try + { + session->sendRequest(request); + } + catch (...) + { + if (i + 1 == resolved_hosts.size()) + throw; + } + } Poco::Net::HTTPResponse response; auto & response_body_stream = session->receiveResponse(response); diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index c47f517bf88d..4c77528f1fc3 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -164,10 +164,18 @@ DiskPtr StoragePolicy::getAnyDisk() const if (volumes.empty()) throw Exception("Storage policy " + backQuote(name) + " has no volumes. It's a bug.", ErrorCodes::LOGICAL_ERROR); - if (volumes[0]->getDisks().empty()) - throw Exception("Volume " + backQuote(name) + "." + backQuote(volumes[0]->getName()) + " has no disks. It's a bug.", ErrorCodes::LOGICAL_ERROR); + for (const auto & volume : volumes) + { + if (volume->getDisks().empty()) + throw Exception("Volume '" + volume->getName() + "' has no disks. It's a bug", ErrorCodes::LOGICAL_ERROR); + for (const auto & disk : volume->getDisks()) + { + if (!disk->isBroken()) + return disk; + } + } - return volumes[0]->getDisks()[0]; + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "All disks in storage policy {} are broken", name); } @@ -233,6 +241,10 @@ ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const } } } + if (!max_disk) + throw Exception( + "There is no space on any disk in storage policy: " + name + ". It's likely all disks are broken", + ErrorCodes::NOT_ENOUGH_SPACE); auto reservation = max_disk->reserve(0); if (!reservation) { diff --git a/src/Formats/CMakeLists.txt b/src/Formats/CMakeLists.txt index 12def0fb1d01..6e6aa6d45531 100644 --- a/src/Formats/CMakeLists.txt +++ b/src/Formats/CMakeLists.txt @@ -1 +1,21 @@ +if (TARGET ch_contrib::avrocpp) + set(USE_AVRO 1) +endif() +if (TARGET ch_contrib::parquet) + set(USE_PARQUET 1) + set(USE_ARROW 1) + set(USE_ORC 1) +endif() +if (TARGET ch_contrib::snappy) + set(USE_SNAPPY 1) +endif() +if (TARGET ch_contrib::protobuf) + set(USE_PROTOBUF 1) +endif() +if (TARGET ch_contrib::msgpack) + set(USE_MSGPACK 1) +endif() +if (TARGET ch_contrib::capnp) + set(USE_CAPNP 1) +endif() configure_file(config_formats.h.in ${ConfigIncludePath}/config_formats.h) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index e00a473f584a..be565a532bb0 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include @@ -127,6 +129,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns; + format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation; format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context @@ -394,6 +397,27 @@ void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name target = std::move(non_trivial_prefix_and_suffix_checker); } +void FormatFactory::registerAppendSupportChecker(const String & name, AppendSupportChecker append_support_checker) +{ + auto & target = dict[name].append_support_checker; + if (target) + throw Exception("FormatFactory: Suffix checker " + name + " is already registered", ErrorCodes::LOGICAL_ERROR); + target = std::move(append_support_checker); +} + +void FormatFactory::markFormatHasNoAppendSupport(const String & name) +{ + registerAppendSupportChecker(name, [](const FormatSettings &){ return false; }); +} + +bool FormatFactory::checkIfFormatSupportAppend(const String & name, ContextPtr context, const std::optional & format_settings_) +{ + auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); + auto & append_support_checker = dict[name].append_support_checker; + /// By default we consider that format supports append + return !append_support_checker || append_support_checker(format_settings); +} + void FormatFactory::registerOutputFormat(const String & name, OutputCreator output_creator) { auto & target = dict[name].output_creator; @@ -410,6 +434,9 @@ void FormatFactory::registerFileExtension(const String & extension, const String String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_found) { + if (file_name == "stdin") + return getFormatFromFileDescriptor(STDIN_FILENO); + CompressionMethod compression_method = chooseCompressionMethod(file_name, ""); if (CompressionMethod::None != compression_method) { @@ -438,6 +465,25 @@ String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_ return it->second; } +String FormatFactory::getFormatFromFileDescriptor(int fd) +{ +#ifdef OS_LINUX + char buf[32] = {'\0'}; + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd); + char file_path[PATH_MAX] = {'\0'}; + if (readlink(buf, file_path, sizeof(file_path) - 1) != -1) + return getFormatFromFileName(file_path, false); + return ""; +#elif defined(__APPLE__) + char file_path[PATH_MAX] = {'\0'}; + if (fcntl(fd, F_GETPATH, file_path) != -1) + return getFormatFromFileName(file_path, false); + return ""; +#else + return ""; +#endif +} + void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine) { auto & target = dict[name].file_segmentation_engine; diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index a5eaa43a29fa..344dabd3f4dc 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -93,6 +93,10 @@ class FormatFactory final : private boost::noncopyable /// The checker should return true if parallel parsing should be disabled. using NonTrivialPrefixAndSuffixChecker = std::function; + /// Some formats can support append depending on settings. + /// The checker should return true if format support append. + using AppendSupportChecker = std::function; + using SchemaReaderCreator = std::function; using ExternalSchemaReaderCreator = std::function; @@ -106,6 +110,7 @@ class FormatFactory final : private boost::noncopyable bool supports_parallel_formatting{false}; bool is_column_oriented{false}; NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; + AppendSupportChecker append_support_checker; }; using FormatsDictionary = std::unordered_map; @@ -167,6 +172,14 @@ class FormatFactory final : private boost::noncopyable void registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker); + void registerAppendSupportChecker(const String & name, AppendSupportChecker append_support_checker); + + /// If format always doesn't support append, you can use this method instead of + /// registerAppendSupportChecker with append_support_checker that always returns true. + void markFormatHasNoAppendSupport(const String & name); + + bool checkIfFormatSupportAppend(const String & name, ContextPtr context, const std::optional & format_settings_ = std::nullopt); + /// Register format by its name. void registerInputFormat(const String & name, InputCreator input_creator); void registerOutputFormat(const String & name, OutputCreator output_creator); @@ -174,6 +187,7 @@ class FormatFactory final : private boost::noncopyable /// Register file extension for format void registerFileExtension(const String & extension, const String & format_name); String getFormatFromFileName(String file_name, bool throw_if_not_found = false); + String getFormatFromFileDescriptor(int fd); /// Register schema readers for format its name. void registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator); diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index b484d6239442..265c879e768a 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -231,9 +231,17 @@ struct FormatSettings EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES; } capn_proto; + enum class MsgPackUUIDRepresentation + { + STR, // Output UUID as a string of 36 characters. + BIN, // Output UUID as 16-bytes binary. + EXT, // Output UUID as ExtType = 2 + }; + struct { UInt64 number_of_columns = 0; + MsgPackUUIDRepresentation output_uuid_representation = MsgPackUUIDRepresentation::EXT; } msgpack; }; diff --git a/src/Formats/MsgPackExtensionTypes.h b/src/Formats/MsgPackExtensionTypes.h new file mode 100644 index 000000000000..139d2f9047bd --- /dev/null +++ b/src/Formats/MsgPackExtensionTypes.h @@ -0,0 +1,11 @@ +#pragma once + +namespace DB +{ + +enum class MsgPackExtensionTypes +{ + UUID = 0x02, +}; + +} diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index b59db12a16c6..389d25a1f467 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -36,6 +36,7 @@ # include # include # include +# include # include # include # include @@ -2163,7 +2164,7 @@ namespace for (auto & desc : field_descs_) field_infos.emplace_back(std::move(desc.column_indices), *desc.field_descriptor, std::move(desc.field_serializer)); - std::sort(field_infos.begin(), field_infos.end(), + ::sort(field_infos.begin(), field_infos.end(), [](const FieldInfo & lhs, const FieldInfo & rhs) { return lhs.field_tag < rhs.field_tag; }); for (size_t i : collections::range(field_infos.size())) @@ -2643,7 +2644,7 @@ namespace missing_column_indices.clear(); missing_column_indices.reserve(column_names.size() - used_column_indices.size()); auto used_column_indices_sorted = std::move(used_column_indices); - std::sort(used_column_indices_sorted.begin(), used_column_indices_sorted.end()); + ::sort(used_column_indices_sorted.begin(), used_column_indices_sorted.end()); boost::range::set_difference(collections::range(column_names.size()), used_column_indices_sorted, std::back_inserter(missing_column_indices)); @@ -2755,7 +2756,7 @@ namespace } /// Shorter suffixes first. - std::sort(out_field_descriptors_with_suffixes.begin(), out_field_descriptors_with_suffixes.end(), + ::sort(out_field_descriptors_with_suffixes.begin(), out_field_descriptors_with_suffixes.end(), [](const std::pair & f1, const std::pair & f2) { diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 37067eae64f7..559fac4cfaa3 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -17,7 +17,12 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional & format_settings, ReadBufferCreator read_buffer_creator, ContextPtr context) +ColumnsDescription readSchemaFromFormat( + const String & format_name, + const std::optional & format_settings, + ReadBufferCreator read_buffer_creator, + ContextPtr context, + std::unique_ptr & buf_out) { NamesAndTypesList names_and_types; if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name)) @@ -34,11 +39,11 @@ ColumnsDescription readSchemaFromFormat(const String & format_name, const std::o } else if (FormatFactory::instance().checkIfFormatHasSchemaReader(format_name)) { - auto read_buf = read_buffer_creator(); - if (read_buf->eof()) + buf_out = read_buffer_creator(); + if (buf_out->eof()) throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file, file is empty", format_name); - auto schema_reader = FormatFactory::instance().getSchemaReader(format_name, *read_buf, context, format_settings); + auto schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf_out, context, format_settings); try { names_and_types = schema_reader->readSchema(); @@ -54,6 +59,12 @@ ColumnsDescription readSchemaFromFormat(const String & format_name, const std::o return ColumnsDescription(names_and_types); } +ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional & format_settings, ReadBufferCreator read_buffer_creator, ContextPtr context) +{ + std::unique_ptr buf_out; + return readSchemaFromFormat(format_name, format_settings, read_buffer_creator, context, buf_out); +} + DataTypePtr generalizeDataType(DataTypePtr type) { WhichDataType which(type); diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h index fb43acc3cd62..4446393a5816 100644 --- a/src/Formats/ReadSchemaUtils.h +++ b/src/Formats/ReadSchemaUtils.h @@ -15,7 +15,19 @@ namespace DB /// If format doesn't have any schema reader or a schema reader /// couldn't determine the schema, an exception will be thrown. using ReadBufferCreator = std::function()>; -ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional & format_settings, ReadBufferCreator read_buffer_creator, ContextPtr context); +ColumnsDescription readSchemaFromFormat( + const String & format_name, + const std::optional & format_settings, + ReadBufferCreator read_buffer_creator, + ContextPtr context); + +/// If ReadBuffer is created, it will be written to buf_out. +ColumnsDescription readSchemaFromFormat( + const String & format_name, + const std::optional & format_settings, + ReadBufferCreator read_buffer_creator, + ContextPtr context, + std::unique_ptr & buf_out); /// Convert type to the most general type: /// - IntN, UIntN, FloatN, Decimal -> Float64 diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 120eac7bb5fd..b7020ea128ed 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -1,3 +1,4 @@ +include(configure_config.cmake) configure_file(config_functions.h.in ${ConfigIncludePath}/config_functions.h) add_subdirectory(divide) @@ -12,30 +13,26 @@ add_library(clickhouse_functions ${clickhouse_functions_sources}) target_link_libraries(clickhouse_functions PUBLIC - ${BASE64_LIBRARY} - ${CITYHASH_LIBRARIES} - ${FARMHASH_LIBRARIES} - ${FASTOPS_LIBRARY} + ch_contrib::cityhash + ch_contrib::farmhash clickhouse_dictionaries clickhouse_dictionaries_embedded clickhouse_parsers - consistent-hashing + ch_contrib::consistent_hashing dbms - metrohash - murmurhash + ch_contrib::metrohash + ch_contrib::murmurhash PRIVATE - ${ZLIB_LIBRARIES} + ch_contrib::zlib boost::filesystem divide_impl ) -if (OPENSSL_CRYPTO_LIBRARY) - target_link_libraries(clickhouse_functions PUBLIC ${OPENSSL_CRYPTO_LIBRARY}) +if (TARGET OpenSSL::Crypto) + target_link_libraries(clickhouse_functions PUBLIC OpenSSL::Crypto) endif() -target_include_directories(clickhouse_functions SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR}) - if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") @@ -57,43 +54,46 @@ else() message(STATUS "Generating debugger info for ClickHouse functions") endif() -if (USE_ICU) - target_link_libraries (clickhouse_functions PRIVATE ${ICU_LIBRARIES}) - target_include_directories(clickhouse_functions SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) +if (TARGET ch_contrib::icu) + target_link_libraries (clickhouse_functions PRIVATE ch_contrib::icu) endif () -if (USE_FASTOPS) - target_include_directories (clickhouse_functions SYSTEM PRIVATE ${FASTOPS_INCLUDE_DIR}) +if (TARGET ch_contrib::fastops) + target_link_libraries (clickhouse_functions PRIVATE ch_contrib::fastops) endif () if (ENABLE_EXAMPLES) add_subdirectory(examples) endif () -if (USE_EMBEDDED_COMPILER) - target_link_libraries(clickhouse_functions PRIVATE ${REQUIRED_LLVM_LIBRARIES}) - target_include_directories(clickhouse_functions SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) +if (TARGET ch_contrib::llvm) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::llvm) endif () -if(USE_BASE64) - target_include_directories(clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR}) +if (TARGET ch_contrib::base64) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base64) endif() -target_link_libraries(clickhouse_functions PRIVATE lz4) +target_link_libraries(clickhouse_functions PRIVATE ch_contrib::lz4) -if (USE_H3) - target_link_libraries(clickhouse_functions PRIVATE ${H3_LIBRARY}) - target_include_directories(clickhouse_functions SYSTEM PRIVATE ${H3_INCLUDE_DIR}) +if (ENABLE_NLP) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::cld2) endif() -target_link_libraries(clickhouse_functions PRIVATE hyperscan) +if (TARGET ch_contrib::h3) + target_link_libraries (clickhouse_functions PRIVATE ch_contrib::h3) +endif() -if(USE_SIMDJSON) - target_link_libraries(clickhouse_functions PRIVATE simdjson) +if (TARGET ch_contrib::hyperscan) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::hyperscan) endif() -if(USE_RAPIDJSON) - target_include_directories(clickhouse_functions SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR}) +if (TARGET ch_contrib::simdjson) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::simdjson) +endif() + +if (TARGET ch_contrib::rapidjson) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson) endif() # ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`). @@ -121,9 +121,6 @@ add_subdirectory(JSONPath) # Signed integer overflow on user-provided data inside boost::geometry - ignore. set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow) -# target_link_libraries(clickhouse_functions PRIVATE ${S2_LIBRARY}) -target_include_directories(clickhouse_functions SYSTEM PUBLIC ${S2_GEOMETRY_INCLUDE_DIR}) - if (ENABLE_FUZZING) add_compile_definitions(FUZZING_MODE=1) endif () diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 6b1234f62287..a1d6b9666603 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -124,13 +124,26 @@ class FunctionBase64Conversion : public IFunction if constexpr (std::is_same_v) { - outlen = _tb64e(reinterpret_cast(source), srclen, reinterpret_cast(dst_pos)); + /* + * Some bug in sse arm64 implementation? + * `base64Encode(repeat('a', 46))` returns wrong padding character + */ +#if defined(__aarch64__) + outlen = tb64senc(reinterpret_cast(source), srclen, reinterpret_cast(dst_pos)); +#else + outlen = _tb64e(reinterpret_cast(source), srclen, reinterpret_cast(dst_pos)); +#endif } else if constexpr (std::is_same_v) { if (srclen > 0) { - outlen = _tb64d(reinterpret_cast(source), srclen, reinterpret_cast(dst_pos)); +#if defined(__aarch64__) + outlen = tb64sdec(reinterpret_cast(source), srclen, reinterpret_cast(dst_pos)); +#else + outlen = _tb64d(reinterpret_cast(source), srclen, reinterpret_cast(dst_pos)); +#endif + if (!outlen) throw Exception("Failed to " + getName() + " input '" + String(reinterpret_cast(source), srclen) + "'", ErrorCodes::INCORRECT_DATA); } diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index d9ca162ba16a..fa10c004e874 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -43,16 +43,19 @@ class FunctionMathUnary : public IFunction DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - const auto & arg = arguments.front(); - if (!isNumber(arg)) - throw Exception{"Illegal type " + arg->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + const auto & argument = arguments.front(); + + if (!isNumber(argument)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + argument->getName(), + getName()); /// Integers are converted to Float64. - if (Impl::always_returns_float64 || !isFloat(arg)) + if (Impl::always_returns_float64 || !isFloat(argument)) return std::make_shared(); else - return arg; + return argument; } template @@ -122,7 +125,7 @@ class FunctionMathUnary : public IFunction { const auto & src_data = col->getData(); const size_t size = src_data.size(); - UInt32 scale = src_data.getScale(); + UInt32 scale = col->getScale(); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); @@ -155,8 +158,10 @@ class FunctionMathUnary : public IFunction }; if (!callOnBasicType(col.type->getTypeId(), call)) - throw Exception{"Illegal column " + col.column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of argument of function {}", + col.column->getName(), + getName()); return res; } @@ -164,19 +169,17 @@ class FunctionMathUnary : public IFunction template -struct UnaryFunctionPlain +struct UnaryFunctionVectorized { static constexpr auto name = Name::name; static constexpr auto rows_per_iteration = 1; static constexpr bool always_returns_float64 = true; template - static void execute(const T * src, Float64 * dst) + static void execute(const T * __restrict src, Float64 * __restrict dst) { - dst[0] = static_cast(Function(static_cast(src[0]))); + *dst = Function(static_cast(*src)); } }; -#define UnaryFunctionVectorized UnaryFunctionPlain - } diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 5248f524a2b7..8c248d79c4b9 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -18,6 +18,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int DECIMAL_OVERFLOW; + extern const int ILLEGAL_COLUMN; } /// Cast DateTime64 to Int64 representation narrowed down (or scaled up) to any scale value defined in Impl. @@ -108,8 +109,8 @@ class FunctionFromUnixTimestamp64 : public IFunction if (arguments.size() < 1 || arguments.size() > 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", name); - if (!typeid_cast(arguments[0].type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The first argument for function {} must be Int64", name); + if (!isInteger(arguments[0].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The first argument for function {} must be integer", name); std::string timezone; if (arguments.size() == 2) @@ -118,21 +119,48 @@ class FunctionFromUnixTimestamp64 : public IFunction return std::make_shared(target_scale, timezone); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + template + bool executeType(auto & result_column, const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const auto & src = arguments[0]; const auto & col = *src.column; - auto res_column = ColumnDecimal::create(input_rows_count, target_scale); - auto & result_data = res_column->getData(); + if (!checkAndGetColumn>(col)) + return 0; + + auto & result_data = result_column->getData(); - const auto & source_data = typeid_cast(col).getData(); + const auto & source_data = typeid_cast &>(col).getData(); for (size_t i = 0; i < input_rows_count; ++i) result_data[i] = source_data[i]; - return res_column; + return 1; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto result_column = ColumnDecimal::create(input_rows_count, target_scale); + + if (!((executeType(result_column, arguments, input_rows_count)) + || (executeType(result_column, arguments, input_rows_count)) + || (executeType(result_column, arguments, input_rows_count)) + || (executeType(result_column, arguments, input_rows_count)) + || (executeType(result_column, arguments, input_rows_count)) + || (executeType(result_column, arguments, input_rows_count)) + || (executeType(result_column, arguments, input_rows_count)) + || (executeType(result_column, arguments, input_rows_count)) + || (executeType(result_column, arguments, input_rows_count)))) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), + getName()); + } + + return result_column; } + }; } diff --git a/src/Functions/FunctionsBinaryRepr.cpp b/src/Functions/FunctionsBinaryRepr.cpp index 20b2acac88ac..b8733cfc6442 100644 --- a/src/Functions/FunctionsBinaryRepr.cpp +++ b/src/Functions/FunctionsBinaryRepr.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -42,7 +43,7 @@ struct HexImpl static constexpr size_t word_size = 2; template - static void executeOneUInt(T x, char *& out) + static void executeOneUInt(T x, char *& out, bool skip_leading_zero = true, bool auto_close = true) { bool was_nonzero = false; for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) @@ -50,15 +51,18 @@ struct HexImpl UInt8 byte = x >> offset; /// Skip leading zeros - if (byte == 0 && !was_nonzero && offset) //-V560 + if (byte == 0 && !was_nonzero && offset && skip_leading_zero) //-V560 continue; was_nonzero = true; writeHexByteUppercase(byte, out); out += word_size; } - *out = '\0'; - ++out; + if (auto_close) + { + *out = '\0'; + ++out; + } } static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) @@ -130,7 +134,7 @@ struct BinImpl static constexpr size_t word_size = 8; template - static void executeOneUInt(T x, char *& out) + static void executeOneUInt(T x, char *& out, bool skip_leading_zero = true, bool auto_close = true) { bool was_nonzero = false; for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) @@ -138,15 +142,18 @@ struct BinImpl UInt8 byte = x >> offset; /// Skip leading zeros - if (byte == 0 && !was_nonzero && offset) //-V560 + if (byte == 0 && !was_nonzero && offset && skip_leading_zero) //-V560 continue; was_nonzero = true; writeBinByte(byte, out); out += word_size; } - *out = '\0'; - ++out; + if (auto_close) + { + *out = '\0'; + ++out; + } } template @@ -275,6 +282,7 @@ class EncodeToBinaryRepr : public IFunction !which.isUInt() && !which.isFloat() && !which.isDecimal() && + !which.isUUID() && !which.isAggregateFunction()) throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -306,7 +314,8 @@ class EncodeToBinaryRepr : public IFunction tryExecuteFloat(column, res_column) || tryExecuteDecimal(column, res_column) || tryExecuteDecimal(column, res_column) || - tryExecuteDecimal(column, res_column)) + tryExecuteDecimal(column, res_column) || + tryExecuteUUID(column, res_column)) return res_column; throw Exception("Illegal column " + arguments[0].column->getName() @@ -480,6 +489,54 @@ class EncodeToBinaryRepr : public IFunction return false; } } + + bool tryExecuteUUID(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnUUID * col_vec = checkAndGetColumn(col); + + static constexpr size_t MAX_LENGTH = sizeof(UUID) * word_size + 1; /// Including trailing zero byte. + + if (col_vec) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const typename ColumnUUID::Container & in_vec = col_vec->getData(); + const UUID* uuid = in_vec.data(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. + + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + { + /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). + if (pos + MAX_LENGTH > out_vec.size()) + out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); + + char * begin = reinterpret_cast(&out_vec[pos]); + char * end = begin; + + // use executeOnUInt instead of using executeOneString + // because the latter one outputs the string in the memory order + Impl::executeOneUInt(uuid[i].toUnderType().items[0], end, false, false); + Impl::executeOneUInt(uuid[i].toUnderType().items[1], end, false, true); + + pos += end - begin; + out_offsets[i] = pos; + } + out_vec.resize(pos); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } }; /// Decode number or string from string with binary or hexadecimal representation diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp new file mode 100644 index 000000000000..d29dc14fa9fd --- /dev/null +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -0,0 +1,142 @@ +#include +#include +#include + +#include +#include + +namespace DB +{ + +/* Determine language and charset of text data. For each text, we build the distribution of bigrams bytes. + * Then we use marked-up dictionaries with distributions of bigram bytes of various languages ​​and charsets. + * Using a naive Bayesian classifier, find the most likely charset and language and return it + */ + +template +struct CharsetClassificationImpl +{ + /* We need to solve zero-frequency problem for Naive Bayes Classifier + * If the bigram is not found in the text, we assume that the probability of its meeting is 1e-06. + * 1e-06 is minimal value in our marked-up dictionary. + */ + static constexpr Float64 zero_frequency = 1e-06; + + /// If the data size is bigger than this, behaviour is unspecified for this function. + static constexpr size_t max_string_size = 1u << 15; + + static ALWAYS_INLINE inline Float64 naiveBayes( + const FrequencyHolder::EncodingMap & standard, + const HashMap & model, + Float64 max_result) + { + Float64 res = 0; + for (const auto & el : model) + { + /// Try to find bigram in the dictionary. + const auto * it = standard.find(el.getKey()); + if (it != standard.end()) + { + res += el.getMapped() * log(it->getMapped()); + } else + { + res += el.getMapped() * log(zero_frequency); + } + /// If at some step the result has become less than the current maximum, then it makes no sense to count it fully. + if (res < max_result) + { + return res; + } + } + return res; + } + + /// Сount how many times each bigram occurs in the text. + static ALWAYS_INLINE inline void calculateStats( + const UInt8 * data, + const size_t size, + HashMap & model) + { + UInt16 hash = 0; + for (size_t i = 0; i < size; ++i) + { + hash <<= 8; + hash += *(data + i); + ++model[hash]; + } + } + + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + const auto & encodings_freq = FrequencyHolder::getInstance().getEncodingsFrequency(); + + if (detect_language) + /// 2 chars for ISO code + 1 zero byte + res_data.reserve(offsets.size() * 3); + else + /// Mean charset length is 8 + res_data.reserve(offsets.size() * 8); + + res_offsets.resize(offsets.size()); + + size_t res_offset = 0; + + for (size_t i = 0; i < offsets.size(); ++i) + { + const UInt8 * str = data.data() + offsets[i - 1]; + const size_t str_len = offsets[i] - offsets[i - 1] - 1; + + std::string_view res; + + HashMap model; + calculateStats(str, str_len, model); + + /// Go through the dictionary and find the charset with the highest weight + Float64 max_result = log(zero_frequency) * (max_string_size); + for (const auto & item : encodings_freq) + { + Float64 score = naiveBayes(item.map, model, max_result); + if (max_result < score) + { + max_result = score; + res = detect_language ? item.lang : item.name; + } + } + + res_data.resize(res_offset + res.size() + 1); + memcpy(&res_data[res_offset], res.data(), res.size()); + + res_data[res_offset + res.size()] = 0; + res_offset += res.size() + 1; + + res_offsets[i] = res_offset; + } + } +}; + + +struct NameDetectCharset +{ + static constexpr auto name = "detectCharset"; +}; + +struct NameDetectLanguageUnknown +{ + static constexpr auto name = "detectLanguageUnknown"; +}; + + +using FunctionDetectCharset = FunctionTextClassificationString, NameDetectCharset>; +using FunctionDetectLanguageUnknown = FunctionTextClassificationString, NameDetectLanguageUnknown>; + +void registerFunctionDetectCharset(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index f32d5df8a213..4f5f6ae483f4 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -32,7 +32,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); - /// MysQL compatibility alias. + /// MySQL compatibility alias. factory.registerFunction("DATE", FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 62e62b5f5dcb..909803d7cd76 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -152,9 +152,11 @@ struct ConvertImpl if (const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get())) { typename ColVecTo::MutablePtr col_to = nullptr; + if constexpr (IsDataTypeDecimal) { UInt32 scale; + if constexpr (std::is_same_v || std::is_same_v) { @@ -185,6 +187,15 @@ struct ConvertImpl bool result_is_bool = isBool(result_type); for (size_t i = 0; i < input_rows_count; ++i) { + if constexpr (std::is_same_v) + { + if (result_is_bool) + { + vec_to[i] = vec_from[i] != FromFieldType(0); + continue; + } + } + if constexpr (std::is_same_v != std::is_same_v) { throw Exception("Conversion between numeric types and UUID is not supported", ErrorCodes::NOT_IMPLEMENTED); @@ -199,11 +210,11 @@ struct ConvertImpl bool convert_result = false; if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - convert_result = tryConvertDecimals(vec_from[i], vec_from.getScale(), vec_to.getScale(), result); + convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - convert_result = tryConvertFromDecimal(vec_from[i], vec_from.getScale(), result); + convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - convert_result = tryConvertToDecimal(vec_from[i], vec_to.getScale(), result); + convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); if (convert_result) vec_to[i] = result; @@ -216,11 +227,11 @@ struct ConvertImpl else { if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - vec_to[i] = convertDecimals(vec_from[i], vec_from.getScale(), vec_to.getScale()); + vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - vec_to[i] = convertFromDecimal(vec_from[i], vec_from.getScale()); + vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - vec_to[i] = convertToDecimal(vec_from[i], vec_to.getScale()); + vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); else throw Exception("Unsupported data type in conversion function", ErrorCodes::CANNOT_CONVERT_TYPE); } @@ -269,12 +280,6 @@ struct ConvertImpl vec_to[i] = static_cast(vec_from[i]); } } - - if constexpr (std::is_same_v) - { - if (result_is_bool) - vec_to[i] = static_cast(vec_to[i]); - } } } @@ -817,7 +822,7 @@ struct ConvertImpl) data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + vec_from.getScale() + 1)); + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); else data_to.resize(size * 3); /// Arbitrary @@ -1166,7 +1171,7 @@ struct ConvertThroughParsing if constexpr (to_datetime64) { DateTime64 res = 0; - parseDateTime64BestEffort(res, vec_to.getScale(), read_buffer, *local_time_zone, *utc_time_zone); + parseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); vec_to[i] = res; } else @@ -1181,7 +1186,7 @@ struct ConvertThroughParsing if constexpr (to_datetime64) { DateTime64 res = 0; - parseDateTime64BestEffortUS(res, vec_to.getScale(), read_buffer, *local_time_zone, *utc_time_zone); + parseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); vec_to[i] = res; } else @@ -1196,12 +1201,12 @@ struct ConvertThroughParsing if constexpr (to_datetime64) { DateTime64 value = 0; - readDateTime64Text(value, vec_to.getScale(), read_buffer, *local_time_zone); + readDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); vec_to[i] = value; } else if constexpr (IsDataTypeDecimal) SerializationDecimal::readText( - vec_to[i], read_buffer, ToDataType::maxPrecision(), vec_to.getScale()); + vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); else { parseImpl(vec_to[i], read_buffer, local_time_zone); @@ -1220,7 +1225,7 @@ struct ConvertThroughParsing if constexpr (to_datetime64) { DateTime64 res = 0; - parsed = tryParseDateTime64BestEffort(res, vec_to.getScale(), read_buffer, *local_time_zone, *utc_time_zone); + parsed = tryParseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); vec_to[i] = res; } else @@ -1241,12 +1246,12 @@ struct ConvertThroughParsing if constexpr (to_datetime64) { DateTime64 value = 0; - parsed = tryReadDateTime64Text(value, vec_to.getScale(), read_buffer, *local_time_zone); + parsed = tryReadDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); vec_to[i] = value; } else if constexpr (IsDataTypeDecimal) parsed = SerializationDecimal::tryReadText( - vec_to[i], read_buffer, ToDataType::maxPrecision(), vec_to.getScale()); + vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); else parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone); } @@ -1769,6 +1774,12 @@ class FunctionConvert : public IFunction } } + if constexpr (std::is_same_v) + { + if (from_type->getCustomSerialization()) + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + } + bool done; if constexpr (to_string_or_fixed_string) { @@ -2802,10 +2813,16 @@ class FunctionCast final : public FunctionCastBase } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); + const auto * from_type_map = checkAndGetDataType(from_type_untyped.get()); + + /// Convert from Map + if (from_type_map) + from_type = checkAndGetDataType(from_type_map->getNestedType().get()); + if (!from_type) { throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Array can only be performed between same-dimensional Array or String types"); + "CAST AS Array can only be performed between same-dimensional Array, Map or String types"); } DataTypePtr from_nested_type = from_type->getNestedType(); @@ -2825,9 +2842,16 @@ class FunctionCast final : public FunctionCastBase return [nested_function, from_nested_type, to_nested_type]( ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr { - const auto & array_arg = arguments.front(); + const auto & argument_column = arguments.front(); - if (const ColumnArray * col_array = checkAndGetColumn(array_arg.column.get())) + const ColumnArray * col_array = nullptr; + + if (const ColumnMap * col_map = checkAndGetColumn(argument_column.column.get())) + col_array = &col_map->getNestedColumn(); + else + col_array = checkAndGetColumn(argument_column.column.get()); + + if (col_array) { /// create columns for converting nested column containing original and result columns ColumnsWithTypeAndName nested_columns{{ col_array->getDataPtr(), from_nested_type, "" }}; @@ -2839,7 +2863,11 @@ class FunctionCast final : public FunctionCastBase return ColumnArray::create(result_column, col_array->getOffsetsPtr()); } else - throw Exception{"Illegal column " + array_arg.column->getName() + " for function CAST AS Array", ErrorCodes::LOGICAL_ERROR}; + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Illegal column {} for function CAST AS Array", + argument_column.column->getName()); + } }; } @@ -3389,7 +3417,7 @@ class FunctionCast final : public FunctionCastBase return false; }; - auto make_custom_serialization_wrapper = [&](const auto & types) -> bool + auto make_custom_serialization_wrapper = [&](const auto & types) -> bool { using Types = std::decay_t; using ToDataType = typename Types::RightType; diff --git a/src/Functions/FunctionsHashing.cpp b/src/Functions/FunctionsHashing.cpp index 3f334e9c302d..cbafd4bcec24 100644 --- a/src/Functions/FunctionsHashing.cpp +++ b/src/Functions/FunctionsHashing.cpp @@ -37,9 +37,7 @@ void registerFunctionsHashing(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); -#if USE_XXHASH factory.registerFunction(); factory.registerFunction(); -#endif } } diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index a42e6b0bf659..88a0e9524b3b 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -12,10 +12,7 @@ #include #include #include - -#if USE_XXHASH -# include -#endif +#include #if USE_SSL # include @@ -551,9 +548,6 @@ struct ImplMetroHash64 static constexpr bool use_int_hash_for_pods = true; }; - -#if USE_XXHASH - struct ImplXxHash32 { static constexpr auto name = "xxHash32"; @@ -574,7 +568,6 @@ struct ImplXxHash32 static constexpr bool use_int_hash_for_pods = false; }; - struct ImplXxHash64 { static constexpr auto name = "xxHash64"; @@ -592,9 +585,6 @@ struct ImplXxHash64 static constexpr bool use_int_hash_for_pods = false; }; -#endif - - template class FunctionStringHashFixedString : public IFunction { @@ -1413,9 +1403,7 @@ using FunctionJavaHash = FunctionAnyHash; using FunctionJavaHashUTF16LE = FunctionAnyHash; using FunctionHiveHash = FunctionAnyHash; -#if USE_XXHASH - using FunctionXxHash32 = FunctionAnyHash; - using FunctionXxHash64 = FunctionAnyHash; -#endif +using FunctionXxHash32 = FunctionAnyHash; +using FunctionXxHash64 = FunctionAnyHash; } diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp new file mode 100644 index 000000000000..521a4b0301e4 --- /dev/null +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -0,0 +1,231 @@ +#include "config_functions.h" + +#if USE_NLP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +/* Determine language of Unicode UTF-8 text. + * Uses the cld2 library https://github.com/CLD2Owners/cld2 + */ + +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +extern const int SUPPORT_IS_DISABLED; +} + +struct FunctionDetectLanguageImpl +{ + static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string) + { + if (code_string.ends_with("-Latn")) + code_string.remove_suffix(code_string.size() - 5); + + if (code_string.ends_with("-Hant")) + code_string.remove_suffix(code_string.size() - 5); + + // Old deprecated codes + if (code_string == "iw") + return "he"; + + if (code_string == "jw") + return "jv"; + + if (code_string == "in") + return "id"; + + if (code_string == "mo") + return "ro"; + + // Some languages do not have 2 letter codes, for example code for Cebuano is ceb + if (code_string.size() != 2) + return "other"; + + return code_string; + } + + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + /// Constant 3 is based on the fact that in general we need 2 characters for ISO code + 1 zero byte + res_data.reserve(offsets.size() * 3); + res_offsets.resize(offsets.size()); + + bool is_reliable; + size_t res_offset = 0; + + for (size_t i = 0; i < offsets.size(); ++i) + { + const UInt8 * str = data.data() + offsets[i - 1]; + const size_t str_len = offsets[i] - offsets[i - 1] - 1; + + std::string_view res; + + if (UTF8::isValidUTF8(str, str_len)) + { + auto lang = CLD2::DetectLanguage(reinterpret_cast(str), str_len, true, &is_reliable); + res = codeISO(LanguageCode(lang)); + } + else + { + res = "un"; + } + + res_data.resize(res_offset + res.size() + 1); + memcpy(&res_data[res_offset], res.data(), res.size()); + + res_data[res_offset + res.size()] = 0; + res_offset += res.size() + 1; + + res_offsets[i] = res_offset; + } + } +}; + +class FunctionDetectLanguageMixed : public IFunction +{ +public: + static constexpr auto name = "detectLanguageMixed"; + + /// Number of top results + static constexpr auto top_N = 3; + + static FunctionPtr create(ContextPtr context) + { + if (!context->getSettingsRef().allow_experimental_nlp_functions) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name); + + return std::make_shared(); + } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}. Must be String.", + arguments[0]->getName(), getName()); + + return std::make_shared(std::make_shared(), std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const auto & column = arguments[0].column; + const ColumnString * col = checkAndGetColumn(column.get()); + + if (!col) + throw Exception( + "Illegal columns " + arguments[0].column->getName() + " of arguments of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + const auto & input_data = col->getChars(); + const auto & input_offsets = col->getOffsets(); + + /// Create and fill the result map. + + const auto & result_type_map = static_cast(*result_type); + const DataTypePtr & key_type = result_type_map.getKeyType(); + const DataTypePtr & value_type = result_type_map.getValueType(); + + MutableColumnPtr keys_data = key_type->createColumn(); + MutableColumnPtr values_data = value_type->createColumn(); + MutableColumnPtr offsets = DataTypeNumber().createColumn(); + + size_t total_elements = input_rows_count * top_N; + keys_data->reserve(total_elements); + values_data->reserve(total_elements); + offsets->reserve(input_rows_count); + + bool is_reliable; + CLD2::Language result_lang_top3[top_N]; + int32_t pc[top_N]; + int bytes[top_N]; + + IColumn::Offset current_offset = 0; + for (size_t i = 0; i < input_rows_count; ++i) + { + const UInt8 * str = input_data.data() + input_offsets[i - 1]; + const size_t str_len = input_offsets[i] - input_offsets[i - 1] - 1; + + if (UTF8::isValidUTF8(str, str_len)) + { + CLD2::DetectLanguageSummary(reinterpret_cast(str), str_len, true, result_lang_top3, pc, bytes, &is_reliable); + + for (size_t j = 0; j < top_N; ++j) + { + if (pc[j] == 0) + break; + + auto res_str = FunctionDetectLanguageImpl::codeISO(LanguageCode(result_lang_top3[j])); + Float32 res_float = static_cast(pc[j]) / 100; + + keys_data->insertData(res_str.data(), res_str.size()); + values_data->insertData(reinterpret_cast(&res_float), sizeof(res_float)); + ++current_offset; + } + } + else + { + std::string_view res_str = "un"; + Float32 res_float = 0; + + keys_data->insertData(res_str.data(), res_str.size()); + values_data->insertData(reinterpret_cast(&res_float), sizeof(res_float)); + ++current_offset; + } + offsets->insert(current_offset); + } + + auto nested_column = ColumnArray::create( + ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}), + std::move(offsets)); + + return ColumnMap::create(nested_column); + } +}; + +struct NameDetectLanguage +{ + static constexpr auto name = "detectLanguage"; +}; + + +using FunctionDetectLanguage = FunctionTextClassificationString; + +void registerFunctionsDetectLanguage(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} +#endif diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 87a2ecd4c57b..0dee048dae39 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -483,10 +484,14 @@ DataTypePtr FunctionAnyArityLogical::getReturnTypeImpl(const DataTyp ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION); bool has_nullable_arguments = false; + bool has_bool_arguments = false; for (size_t i = 0; i < arguments.size(); ++i) { const auto & arg_type = arguments[i]; + if (isBool(arg_type)) + has_bool_arguments = true; + if (!has_nullable_arguments) { has_nullable_arguments = arg_type->isNullable(); @@ -503,7 +508,7 @@ DataTypePtr FunctionAnyArityLogical::getReturnTypeImpl(const DataTyp ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - auto result_type = std::make_shared(); + auto result_type = has_bool_arguments ? DataTypeFactory::instance().get("Bool") : std::make_shared(); return has_nullable_arguments ? makeNullable(result_type) : result_type; @@ -711,7 +716,7 @@ DataTypePtr FunctionUnaryLogical::getReturnTypeImpl(const DataTypes + ") of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(); + return isBool(arguments[0]) ? DataTypeFactory::instance().get("Bool") : std::make_shared(); } template