diff --git a/.github/actions/common_setup/action.yml b/.github/actions/common_setup/action.yml index b02413adc44..ab446c3ae7a 100644 --- a/.github/actions/common_setup/action.yml +++ b/.github/actions/common_setup/action.yml @@ -26,6 +26,16 @@ runs: echo "The GITHUB_JOB_OVERRIDDEN ENV is unset, and must be set for the nested jobs" exit 1 fi + - name: Setup zram + shell: bash + run: | + sudo modprobe zram + MemTotal=$(grep -Po "(?<=MemTotal:)\s+\d+" /proc/meminfo) # KiB + Percent=200 + ZRAM_SIZE=$(($MemTotal / 1024 / 1024 * $Percent / 100)) # Convert to GiB + .github/retry.sh 30 2 sudo zramctl --size ${ZRAM_SIZE}GiB --algorithm zstd /dev/zram0 + sudo mkswap /dev/zram0 && sudo swapon -p 100 /dev/zram0 + sudo sysctl vm.swappiness=200 - name: Setup $TEMP_PATH shell: bash run: | diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index aa4b2f68904..66682ea320e 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -93,6 +93,9 @@ env: AWS_DEFAULT_REGION: ${{ secrets.AWS_REPORT_REGION }} DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }} + CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }} + CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }} args: --test-to-end --no-colors --local @@ -522,6 +525,7 @@ jobs: uses: actions/checkout@v4 with: repository: Altinity/clickhouse-regression + ref: ${{ inputs.commit }} - name: Set envs run: | cat >> "$GITHUB_ENV" << 'EOF' diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 8a529b82627..bcc42b85f01 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -30,7 +30,7 @@ on: # yamllint disable-line rule:truthy jobs: DockerHubPushAarch64: - runs-on: [self-hosted, altinity-on-demand, altinity-type-cax41, altinity-in-hel1, altinity-image-arm-system-ubuntu-22.04] + runs-on: [self-hosted, altinity-on-demand, altinity-type-cax41, altinity-in-hel1, altinity-image-arm-snapshot-22.04-arm, altinity-startup-snapshot, altinity-setup-none] steps: - name: Check out repository code uses: Altinity/checkout@19599efdf36c4f3f30eb55d5bb388896faea69f6 @@ -52,7 +52,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, altinity-on-demand, altinity-type-cpx51, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04] + runs-on: [self-hosted, altinity-on-demand, altinity-type-cpx51, altinity-in-ash, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none] steps: - name: Check out repository code uses: Altinity/checkout@19599efdf36c4f3f30eb55d5bb388896faea69f6 @@ -75,7 +75,7 @@ jobs: DockerHubPush: needs: [DockerHubPushAmd64, DockerHubPushAarch64] - runs-on: [self-hosted, altinity-on-demand, altinity-type-cpx51, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04] + runs-on: [self-hosted, altinity-on-demand, altinity-type-cpx51, altinity-in-ash, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none] steps: - name: Check out repository code uses: Altinity/checkout@19599efdf36c4f3f30eb55d5bb388896faea69f6 @@ -119,7 +119,7 @@ jobs: secrets: inherit with: test_name: Compatibility check X86 - runner_type: altinity-on-demand, altinity-type-cpx51, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-cpx51, altinity-in-ash, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none timeout_minutes: 180 run_command: | cd "$REPO_COPY/tests/ci" @@ -131,7 +131,7 @@ jobs: secrets: inherit with: test_name: Compatibility check Aarch64 - runner_type: altinity-on-demand, altinity-type-cax41, altinity-image-arm-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-cax41, altinity-image-arm-snapshot-22.04-arm, altinity-startup-snapshot, altinity-setup-none timeout_minutes: 180 run_command: | cd "$REPO_COPY/tests/ci" @@ -148,7 +148,7 @@ jobs: build_name: package_release checkout_depth: 0 timeout_minutes: 180 - runner_type: altinity-setup-builder, altinity-type-ccx53, altinity-on-demand, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-ccx53, altinity-in-hil, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none additional_envs: | CLICKHOUSE_STABLE_VERSION_SUFFIX=altinitystable @@ -160,7 +160,7 @@ jobs: build_name: package_aarch64 checkout_depth: 0 timeout_minutes: 180 - runner_type: altinity-setup-builder, altinity-type-ccx53, altinity-on-demand, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-ccx53, altinity-in-hil, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none additional_envs: | CLICKHOUSE_STABLE_VERSION_SUFFIX=altinitystable @@ -172,7 +172,7 @@ jobs: build_name: package_asan checkout_depth: 0 timeout_minutes: 180 - runner_type: altinity-on-demand, altinity-setup-builder, altinity-type-ccx53, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-ccx53, altinity-in-hil, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none additional_envs: | CLICKHOUSE_STABLE_VERSION_SUFFIX=altinitystable @@ -184,7 +184,7 @@ jobs: build_name: package_ubsan checkout_depth: 0 timeout_minutes: 180 - runner_type: altinity-on-demand, altinity-setup-builder, altinity-type-ccx53, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-ccx53, altinity-in-hil, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none additional_envs: | CLICKHOUSE_STABLE_VERSION_SUFFIX=altinitystable @@ -196,7 +196,7 @@ jobs: build_name: package_tsan checkout_depth: 0 timeout_minutes: 180 - runner_type: altinity-on-demand, altinity-setup-builder, altinity-type-ccx53, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-ccx53, altinity-in-hil, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none additional_envs: | CLICKHOUSE_STABLE_VERSION_SUFFIX=altinitystable @@ -208,7 +208,7 @@ jobs: build_name: package_msan checkout_depth: 0 timeout_minutes: 180 - runner_type: altinity-on-demand, altinity-setup-builder, altinity-type-ccx53, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-ccx53, altinity-in-hil, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none additional_envs: | CLICKHOUSE_STABLE_VERSION_SUFFIX=altinitystable @@ -220,7 +220,7 @@ jobs: build_name: package_debug checkout_depth: 0 timeout_minutes: 180 - runner_type: altinity-on-demand, altinity-setup-builder, altinity-type-ccx53, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-ccx53, altinity-in-hil, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none additional_envs: | CLICKHOUSE_STABLE_VERSION_SUFFIX=altinitystable @@ -231,7 +231,7 @@ jobs: needs: - BuilderDebRelease - BuilderDebAarch64 - runs-on: [self-hosted, altinity-on-demand, altinity-type-cpx41, altinity-in-ash, altinity-image-x86-system-ubuntu-22.04] + runs-on: [self-hosted, altinity-on-demand, altinity-type-cpx41, altinity-in-ash, altinity-image-x86-snapshot-22.04-amd, altinity-startup-snapshot, altinity-setup-none] timeout-minutes: 180 steps: - name: Check out repository code @@ -267,7 +267,7 @@ jobs: secrets: inherit with: test_name: ClickHouse build check - runner_type: altinity-on-demand, altinity-setup-reporter, altinity-type-cax11, altinity-in-hel1, altinity-image-arm-system-ubuntu-22.04 + runner_type: altinity-on-demand, altinity-type-cax11, altinity-in-hel1, altinity-image-arm-snapshot-22.04-arm, altinity-startup-snapshot, altinity-setup-none timeout_minutes: 180 additional_envs: | NEEDS_DATA< /etc/timezone +ENV MAX_RUN_TIME=9000 +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezon ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index f2041fe445c..02e5171a34d 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -12,7 +12,7 @@ RUN apt-get update \ && apt-get install \ apt-transport-https='2.4.*' \ apt-utils='2.4.*' \ - ca-certificates='20230311ubuntu0.22.04.*' \ + ca-certificates \ curl='7.81.*' \ dnsutils='1:9.18.*' \ gnupg='2.2.*' \ diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml index 059562835d8..cc87aaf338f 100644 --- a/packages/clickhouse-client.yaml +++ b/packages/clickhouse-client.yaml @@ -16,7 +16,7 @@ homepage: "https://altinity.com/" license: "Apache" section: "database" priority: "optional" -maintainer: "ClickHouse Dev Team " +maintainer: "Altinity Dev Team https://github.com/Altinity/ClickHouse/" deb: fields: Source: clickhouse diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml index 63b95b03494..0821b029180 100644 --- a/packages/clickhouse-common-static-dbg.yaml +++ b/packages/clickhouse-common-static-dbg.yaml @@ -16,7 +16,7 @@ homepage: "https://altinity.com/" license: "Apache" section: "database" priority: "optional" -maintainer: "ClickHouse Dev Team " +maintainer: "Altinity Dev Team https://github.com/Altinity/ClickHouse" deb: fields: Source: clickhouse diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml index 96dd2d890a1..0140db3994b 100644 --- a/packages/clickhouse-common-static.yaml +++ b/packages/clickhouse-common-static.yaml @@ -16,7 +16,7 @@ homepage: "https://altinity.com/" license: "Apache" section: "database" priority: "optional" -maintainer: "ClickHouse Dev Team " +maintainer: "Altinity Dev Team https://github.com/Altinity/ClickHouse" deb: fields: Source: clickhouse diff --git a/packages/clickhouse-keeper-dbg.yaml b/packages/clickhouse-keeper-dbg.yaml index c1c8a178ba7..07aaafae83e 100644 --- a/packages/clickhouse-keeper-dbg.yaml +++ b/packages/clickhouse-keeper-dbg.yaml @@ -16,7 +16,7 @@ homepage: "https://altinity.com/" license: "Apache" section: "database" priority: "optional" -maintainer: "ClickHouse Dev Team " +maintainer: "Altinity Dev Team https://github.com/Altinity/ClickHouse" deb: fields: Source: clickhouse diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml index f9780cd4ad9..4fea5798ff9 100644 --- a/packages/clickhouse-keeper.yaml +++ b/packages/clickhouse-keeper.yaml @@ -16,7 +16,7 @@ homepage: "https://altinity.com/" license: "Apache" section: "database" priority: "optional" -maintainer: "ClickHouse Dev Team " +maintainer: "Altinity Dev Team https://github.com/Altinity/ClickHouse" deb: fields: Source: clickhouse diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 9a004c3eb1c..3f396fe3513 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -16,7 +16,7 @@ homepage: "https://altinity.com/" license: "Apache" section: "database" priority: "optional" -maintainer: "ClickHouse Dev Team " +maintainer: "Altinity Dev Team https://github.com/Altinity/ClickHouse" deb: fields: Source: clickhouse diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b3fe90b8644..eab06e06db0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -594,6 +594,7 @@ class IColumn; M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \ M(Bool, optimize_use_implicit_projections, true, "Automatically choose implicit projections to perform SELECT query", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ + M(String, preferred_optimize_projection_name, "", "If it is set to a non-empty string, ClickHouse tries to apply specified projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \ M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \ diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 0599a0fa369..3040f408cb5 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -411,7 +411,6 @@ struct MinMaxProjectionCandidate { AggregateProjectionCandidate candidate; Block block; - MergeTreeData::DataPartsVector normal_parts; }; struct AggregateProjectionCandidates @@ -476,7 +475,6 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( { // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(proj_dag)}; - MergeTreeData::DataPartsVector minmax_projection_normal_parts; // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block {}", sample_block.dumpStructure()); auto block = reading.getMergeTreeData().getMinMaxCountProjectionBlock( @@ -485,13 +483,13 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( dag.filter_node != nullptr, query_info, parts, - minmax_projection_normal_parts, + nullptr, max_added_blocks.get(), context); // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block 2 {}", block.dumpStructure()); - // minmax_count_projection cannot be used used when there is no data to process, because + // minmax_count_projection cannot be used when there is no data to process, because // it will produce incorrect result during constant aggregation. // See https://github.com/ClickHouse/ClickHouse/issues/36728 if (block) @@ -499,7 +497,6 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( MinMaxProjectionCandidate minmax; minmax.candidate = std::move(candidate); minmax.block = std::move(block); - minmax.normal_parts = std::move(minmax_projection_normal_parts); minmax.candidate.projection = projection; candidates.minmax_projection.emplace(std::move(minmax)); } @@ -508,6 +505,18 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (!candidates.minmax_projection) { + auto it = std::find_if(agg_projections.begin(), agg_projections.end(), [&](const auto * projection) + { + return projection->name == context->getSettings().preferred_optimize_projection_name.value; + }); + + if (it != agg_projections.end()) + { + const ProjectionDescription * preferred_projection = *it; + agg_projections.clear(); + agg_projections.push_back(preferred_projection); + } + candidates.real.reserve(agg_projections.size()); for (const auto * projection : agg_projections) { @@ -569,49 +578,74 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks, allow_implicit_projections); - AggregateProjectionCandidate * best_candidate = nullptr; - if (candidates.minmax_projection) - best_candidate = &candidates.minmax_projection->candidate; - else if (candidates.real.empty()) - return false; - const auto & parts = reading->getParts(); + const auto & alter_conversions = reading->getAlterConvertionsForParts(); const auto & query_info = reading->getQueryInfo(); const auto metadata = reading->getStorageMetadata(); ContextPtr context = reading->getContext(); MergeTreeDataSelectExecutor reader(reading->getMergeTreeData()); + AggregateProjectionCandidate * best_candidate = nullptr; - auto ordinary_reading_select_result = reading->selectRangesToRead(parts, /* alter_conversions = */ {}); - size_t ordinary_reading_marks = ordinary_reading_select_result->marks(); - - /// Selecting best candidate. - for (auto & candidate : candidates.real) + if (candidates.minmax_projection) { - auto required_column_names = candidate.dag->getRequiredColumnsNames(); - ActionDAGNodes added_filter_nodes; - if (candidates.has_filter) - added_filter_nodes.nodes.push_back(candidate.dag->getOutputs().front()); + best_candidate = &candidates.minmax_projection->candidate; + } + else if (!candidates.real.empty()) + { + auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions); + size_t ordinary_reading_marks = ordinary_reading_select_result->marks(); - bool analyzed = analyzeProjectionCandidate( - candidate, *reading, reader, required_column_names, parts, - metadata, query_info, context, max_added_blocks, added_filter_nodes); + /// Nothing to read. Ignore projections. + if (ordinary_reading_marks == 0) + { + reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); + return false; + } - if (!analyzed) - continue; + const auto & parts_with_ranges = ordinary_reading_select_result->partsWithRanges(); - if (candidate.sum_marks > ordinary_reading_marks) - continue; + /// Selecting best candidate. + for (auto & candidate : candidates.real) + { + auto required_column_names = candidate.dag->getRequiredColumnsNames(); + ActionDAGNodes added_filter_nodes; + if (candidates.has_filter) + added_filter_nodes.nodes.push_back(candidate.dag->getOutputs().front()); + + bool analyzed = analyzeProjectionCandidate( + candidate, + *reading, + reader, + required_column_names, + parts_with_ranges, + query_info, + context, + max_added_blocks, + added_filter_nodes); - if (best_candidate == nullptr || best_candidate->sum_marks > candidate.sum_marks) - best_candidate = &candidate; - } + if (!analyzed) + continue; + + if (candidate.sum_marks > ordinary_reading_marks) + continue; - if (!best_candidate) + if (best_candidate == nullptr || best_candidate->sum_marks > candidate.sum_marks) + best_candidate = &candidate; + } + + if (!best_candidate) + { + reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); + return false; + } + } + else { - reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); return false; } + chassert(best_candidate != nullptr); + QueryPlanStepPtr projection_reading; bool has_ordinary_parts; @@ -632,9 +666,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = candidates.minmax_projection->candidate.projection->name, }); - has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); - if (has_ordinary_parts) - reading->resetParts(std::move(candidates.minmax_projection->normal_parts)); + has_ordinary_parts = false; } else { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index fbe02265dcc..8cb64ddf9e3 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB::QueryPlanOptimizations { @@ -107,6 +107,19 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (normal_projections.empty()) return false; + ContextPtr context = reading->getContext(); + auto it = std::find_if(normal_projections.begin(), normal_projections.end(), [&](const auto * projection) + { + return projection->name == context->getSettings().preferred_optimize_projection_name.value; + }); + + if (it != normal_projections.end()) + { + const ProjectionDescription * preferred_projection = *it; + normal_projections.clear(); + normal_projections.push_back(preferred_projection); + } + QueryDAG query; { auto & child = iter->node->children[iter->next_child - 1]; @@ -122,13 +135,22 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) const Names & required_columns = reading->getRealColumnNames(); const auto & parts = reading->getParts(); + const auto & alter_conversions = reading->getAlterConvertionsForParts(); const auto & query_info = reading->getQueryInfo(); - ContextPtr context = reading->getContext(); MergeTreeDataSelectExecutor reader(reading->getMergeTreeData()); - auto ordinary_reading_select_result = reading->selectRangesToRead(parts, /* alter_conversions = */ {}); + auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions); size_t ordinary_reading_marks = ordinary_reading_select_result->marks(); + /// Nothing to read. Ignore projections. + if (ordinary_reading_marks == 0) + { + reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); + return false; + } + + const auto & parts_with_ranges = ordinary_reading_select_result->partsWithRanges(); + std::shared_ptr max_added_blocks = getMaxAddedBlocks(reading); for (const auto * projection : normal_projections) @@ -144,8 +166,15 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) added_filter_nodes.nodes.push_back(query.filter_node); bool analyzed = analyzeProjectionCandidate( - candidate, *reading, reader, required_columns, parts, - metadata, query_info, context, max_added_blocks, added_filter_nodes); + candidate, + *reading, + reader, + required_columns, + parts_with_ranges, + query_info, + context, + max_added_blocks, + added_filter_nodes); if (!analyzed) continue; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 7ddda29cad4..9bc18ee38ba 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -210,8 +210,7 @@ bool analyzeProjectionCandidate( const ReadFromMergeTree & reading, const MergeTreeDataSelectExecutor & reader, const Names & required_column_names, - const MergeTreeData::DataPartsVector & parts, - const StorageMetadataPtr & metadata, + const RangesInDataParts & parts_with_ranges, const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, @@ -219,14 +218,20 @@ bool analyzeProjectionCandidate( { MergeTreeData::DataPartsVector projection_parts; MergeTreeData::DataPartsVector normal_parts; - for (const auto & part : parts) + std::vector alter_conversions; + for (const auto & part_with_ranges : parts_with_ranges) { - const auto & created_projections = part->getProjectionParts(); + const auto & created_projections = part_with_ranges.data_part->getProjectionParts(); auto it = created_projections.find(candidate.projection->name); if (it != created_projections.end()) + { projection_parts.push_back(it->second); + } else - normal_parts.push_back(part); + { + normal_parts.push_back(part_with_ranges.data_part); + alter_conversions.push_back(part_with_ranges.alter_conversions); + } } if (projection_parts.empty()) @@ -236,7 +241,6 @@ bool analyzeProjectionCandidate( std::move(projection_parts), nullptr, required_column_names, - metadata, candidate.projection->metadata, query_info, /// How it is actually used? I hope that for index we need only added_filter_nodes added_filter_nodes, @@ -252,7 +256,8 @@ bool analyzeProjectionCandidate( if (!normal_parts.empty()) { - auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts), /* alter_conversions = */ {}); + /// TODO: We can reuse existing analysis_result by filtering out projection parts + auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts), std::move(alter_conversions)); if (normal_result_ptr->error()) return false; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index 35daccad115..22606ef8a29 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -19,6 +19,7 @@ using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr; using DataPartsVector = std::vector; +struct RangesInDataParts; struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr; @@ -71,8 +72,7 @@ bool analyzeProjectionCandidate( const ReadFromMergeTree & reading, const MergeTreeDataSelectExecutor & reader, const Names & required_column_names, - const DataPartsVector & parts, - const StorageMetadataPtr & metadata, + const RangesInDataParts & parts_with_ranges, const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0b5eb94dbac..566c577d687 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1168,7 +1168,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( std::move(alter_conversions), prewhere_info, filter_nodes, - storage_snapshot->metadata, metadata_for_reading, query_info, context, @@ -1354,7 +1353,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( std::vector alter_conversions, const PrewhereInfoPtr & prewhere_info, const ActionDAGNodes & added_filter_nodes, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -1375,7 +1373,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( return selectRangesToReadImpl( std::move(parts), std::move(alter_conversions), - metadata_snapshot_base, metadata_snapshot, updated_query_info_with_filter_dag, context, @@ -1391,7 +1388,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( return selectRangesToReadImpl( std::move(parts), std::move(alter_conversions), - metadata_snapshot_base, metadata_snapshot, query_info, context, @@ -1407,7 +1403,6 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -1468,7 +1463,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( parts, alter_conversions, part_values, - metadata_snapshot_base, + metadata_snapshot, data, context, max_block_numbers_to_read.get(), @@ -2157,10 +2152,23 @@ size_t MergeTreeDataSelectAnalysisResult::marks() const if (std::holds_alternative(result)) std::rethrow_exception(std::get(result)); - const auto & index_stats = std::get(result).index_stats; - if (index_stats.empty()) - return 0; - return index_stats.back().num_granules_after; + return std::get(result).selected_marks; +} + +UInt64 MergeTreeDataSelectAnalysisResult::rows() const +{ + if (std::holds_alternative(result)) + std::rethrow_exception(std::get(result)); + + return std::get(result).selected_rows; +} + +const RangesInDataParts & MergeTreeDataSelectAnalysisResult::partsWithRanges() const +{ + if (std::holds_alternative(result)) + std::rethrow_exception(std::get(result)); + + return std::get(result).parts_with_ranges; } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index cb2a3a8ddf9..b4ae10ff0c7 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -178,7 +178,6 @@ class ReadFromMergeTree final : public SourceStepWithFilter std::vector alter_conversions, const PrewhereInfoPtr & prewhere_info, const ActionDAGNodes & added_filter_nodes, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -214,13 +213,9 @@ class ReadFromMergeTree final : public SourceStepWithFilter bool hasAnalyzedResult() const { return analyzed_result_ptr != nullptr; } void setAnalyzedResult(MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); } - void resetParts(MergeTreeData::DataPartsVector parts) - { - prepared_parts = std::move(parts); - alter_conversions_for_parts = {}; - } - const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; } + const std::vector & getAlterConvertionsForParts() const { return alter_conversions_for_parts; } + const MergeTreeData & getMergeTreeData() const { return data; } size_t getMaxBlockSize() const { return max_block_size; } size_t getNumStreams() const { return requested_num_streams; } @@ -232,7 +227,6 @@ class ReadFromMergeTree final : public SourceStepWithFilter static MergeTreeDataSelectAnalysisResultPtr selectRangesToReadImpl( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -330,6 +324,8 @@ struct MergeTreeDataSelectAnalysisResult bool error() const; size_t marks() const; + UInt64 rows() const; + const RangesInDataParts & partsWithRanges() const; }; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6bb5231f998..99f3aed0b93 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6536,7 +6536,6 @@ static void selectBestProjection( projection_parts, candidate.prewhere_info, candidate.required_columns, - storage_snapshot->metadata, candidate.desc->metadata, query_info, added_filter_nodes, @@ -6561,7 +6560,6 @@ static void selectBestProjection( query_info.prewhere_info, required_columns, storage_snapshot->metadata, - storage_snapshot->metadata, query_info, // TODO syntax_analysis_result set in index added_filter_nodes, query_context, @@ -6596,7 +6594,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( bool has_filter, const SelectQueryInfo & query_info, const DataPartsVector & parts, - DataPartsVector & normal_parts, + DataPartsVector * normal_parts, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const { @@ -6721,10 +6719,22 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( continue; } + /// It's extremely rare that some parts have final marks while others don't. To make it + /// straightforward, disable minmax_count projection when `max(pk)' encounters any part with + /// no final mark. if (need_primary_key_max_column && !part->index_granularity.hasFinalMark()) { - normal_parts.push_back(part); - continue; + if (normal_parts) + { + // 23.8 behaviour + normal_parts->push_back(part); + continue; + } + else + { + // 23.12 behaviour + return {}; + } } real_parts.push_back(part); @@ -7161,7 +7171,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg !query_info.filter_asts.empty() || analysis_result.prewhere_info || analysis_result.before_where, query_info, parts, - normal_parts, + &normal_parts, max_added_blocks.get(), query_context); @@ -7201,7 +7211,6 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg query_info.prewhere_info, analysis_result.required_columns, metadata_snapshot, - metadata_snapshot, query_info, added_filter_nodes, query_context, @@ -7234,7 +7243,6 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg query_info.prewhere_info, analysis_result.required_columns, metadata_snapshot, - metadata_snapshot, query_info, added_filter_nodes, query_context, @@ -7374,7 +7382,6 @@ bool MergeTreeData::canUseParallelReplicasBasedOnPKAnalysis( query_info.prewhere_info, storage_snapshot->getMetadataForQuery()->getColumns().getAll().getNames(), storage_snapshot->metadata, - storage_snapshot->metadata, query_info, /*added_filter_nodes*/ActionDAGNodes{}, query_context, diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 22d7a070ad9..1aeacf7c873 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -396,17 +396,13 @@ class MergeTreeData : public IStorage, public WithMutableContext /// query_info - used to filter unneeded parts /// /// parts - part set to filter - /// - /// normal_parts - collects parts that don't have all the needed values to form the block. - /// Specifically, this is when a part doesn't contain a final mark and the related max value is - /// required. Block getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, bool has_filter, const SelectQueryInfo & query_info, const DataPartsVector & parts, - DataPartsVector & normal_parts, + DataPartsVector * normal_parts, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index afb73ff3859..62dc39ebc6d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -797,8 +797,8 @@ std::optional> MergeTreeDataSelectExecutor::filterPar } void MergeTreeDataSelectExecutor::filterPartsByPartition( - std::optional & partition_pruner, - std::optional & minmax_idx_condition, + const std::optional & partition_pruner, + const std::optional & minmax_idx_condition, MergeTreeData::DataPartsVector & parts, std::vector & alter_conversions, const std::optional> & part_values, @@ -816,6 +816,7 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( if (metadata_snapshot->hasPartitionKey()) { + chassert(minmax_idx_condition && partition_pruner); const auto & partition_key = metadata_snapshot->getPartitionKey(); minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); @@ -1231,7 +1232,6 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar MergeTreeData::DataPartsVector parts, const PrewhereInfoPtr & prewhere_info, const Names & column_names_to_return, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const ActionDAGNodes & added_filter_nodes, @@ -1253,12 +1253,13 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); std::optional indexes; + /// NOTE: We don't need alter_conversions because the returned analysis_result is only used for: + /// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions. return ReadFromMergeTree::selectRangesToRead( std::move(parts), /*alter_conversions=*/ {}, prewhere_info, added_filter_nodes, - metadata_snapshot_base, metadata_snapshot, query_info, context, @@ -1785,7 +1786,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( const std::optional> & part_values, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, - std::optional & partition_pruner, + const std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, PartFilterCounters & counters) { @@ -1847,7 +1848,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, - std::optional & partition_pruner, + const std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context, PartFilterCounters & counters, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 74d8d8e3c8f..53f4ceba06a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -59,7 +59,6 @@ class MergeTreeDataSelectExecutor MergeTreeData::DataPartsVector parts, const PrewhereInfoPtr & prewhere_info, const Names & column_names, - const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const ActionDAGNodes & added_filter_nodes, @@ -126,7 +125,7 @@ class MergeTreeDataSelectExecutor const std::optional> & part_values, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, - std::optional & partition_pruner, + const std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, PartFilterCounters & counters); @@ -138,7 +137,7 @@ class MergeTreeDataSelectExecutor MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, - std::optional & partition_pruner, + const std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context, PartFilterCounters & counters, @@ -172,8 +171,8 @@ class MergeTreeDataSelectExecutor /// Filter parts using minmax index and partition key. static void filterPartsByPartition( - std::optional & partition_pruner, - std::optional & minmax_idx_condition, + const std::optional & partition_pruner, + const std::optional & minmax_idx_condition, MergeTreeData::DataPartsVector & parts, std::vector & alter_conversions, const std::optional> & part_values, diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp index 97bb9f3b4d4..a5df08e3df9 100644 --- a/src/Storages/MergeTree/PartitionPruner.cpp +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -31,7 +31,7 @@ PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAG { } -bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) +bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) const { if (part.isEmpty()) return true; diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index 7f1b74795c4..e8a740b1524 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -16,14 +16,15 @@ class PartitionPruner PartitionPruner(const StorageMetadataPtr & metadata, const SelectQueryInfo & query_info, ContextPtr context, bool strict); PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict); - bool canBePruned(const IMergeTreeDataPart & part); + bool canBePruned(const IMergeTreeDataPart & part) const; bool isUseless() const { return useless; } const KeyCondition & getKeyCondition() const { return partition_condition; } private: - std::unordered_map partition_filter_map; + /// Cache already analyzed partitions. + mutable std::unordered_map partition_filter_map; /// partition_key is adjusted here (with substitution from modulo to moduloLegacy). KeyDescription partition_key; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 850f469b03b..4f49a665113 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -345,6 +345,8 @@ void StorageMergeTree::alter( prev_mutation = it->first; } + /// Always wait previous mutations synchronously, because alters + /// should be executed in sequential order. if (prev_mutation != 0) { LOG_DEBUG(log, "Cannot change metadata with barrier alter query, will wait for mutation {}", prev_mutation); @@ -372,9 +374,7 @@ void StorageMergeTree::alter( resetObjectColumnsFromActiveParts(parts_lock); } - /// Always execute required mutations synchronously, because alters - /// should be executed in sequential order. - if (!maybe_mutation_commands.empty()) + if (!maybe_mutation_commands.empty() && local_context->getSettingsRef().alter_sync > 0) waitForMutation(mutation_version, false); } diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index df72d19693a..a0f7a5fc24f 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -13,6 +13,7 @@ from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_JOB_API_URL, + GITHUB_RUN_ID, IMAGES_PATH, REPO_COPY, S3_ACCESS_KEY_ID, @@ -184,6 +185,9 @@ def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, # It should be fixed in performance-comparison image eventually # For performance tests we always set PRs prefix performance_pr = "PRs/0" + if "commits" not in pr_info.event and "pull_request" not in pr_info.event: + # for dispatch maintenance run we use sha and run id + return f"maintenance/{pr_info.base_ref}/{GITHUB_RUN_ID}", performance_pr if "release" in pr_info.labels or "release-lts" in pr_info.labels: # for release pull requests we use branch names prefixes, not pr numbers return pr_info.head_ref, performance_pr diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index e1e7709ca44..adab6be672c 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -110,15 +110,15 @@ def get_changed_docker_images( changed_images.append(DockerImage(dockerfile_dir, name, only_amd64)) break - # Rebuild all images on push or release - if pr_info.number == 0: + # Rebuild all images on push, release, or scheduled run + if pr_info.number in [0,1]: changed_images = all_images else: # Rebuild all on opened PR if pr_info.event['action'] in ['opened', 'reopened']: changed_images = all_images - + # Check that image for the PR exists elif pr_info.event['action'] == 'synchronize': unchanged_images = [ diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index ad57bcecde3..d9fb0e89ab4 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -255,8 +255,8 @@ def __init__( self.sha = os.getenv( "GITHUB_SHA", "0000000000000000000000000000000000000000" ) - self.number = f"{self.version.major}.{self.version.minor}.{self.version.patch}" - self.docker_image_tag = str(self.number) + "-" + str(self.sha) + self.number = 1 + self.docker_image_tag = f"{self.version.major}.{self.version.minor}.{self.version.patch}-" + str(self.sha) self.labels = set() repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" self.task_url = GITHUB_RUN_URL diff --git a/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.reference b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.reference new file mode 100644 index 00000000000..47b07da250f --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.reference @@ -0,0 +1 @@ +Selected 2/2 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges diff --git a/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh new file mode 100755 index 00000000000..ba8b3818ba3 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_analysis_reuse_partition.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "drop table if exists t" +${CLICKHOUSE_CLIENT} -q "create table t(s LowCardinality(String), e DateTime64(3), projection p1 (select * order by s, e)) engine MergeTree partition by toYYYYMM(e) order by tuple() settings index_granularity = 8192, index_granularity_bytes = '100M'" +${CLICKHOUSE_CLIENT} -q "insert into t select 'AAP', toDateTime('2023-07-01') + 360 * number from numbers(50000)" +${CLICKHOUSE_CLIENT} -q "insert into t select 'AAPL', toDateTime('2023-07-01') + 360 * number from numbers(50000)" + +CLICKHOUSE_CLIENT_DEBUG_LOG=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') + +${CLICKHOUSE_CLIENT_DEBUG_LOG} -q "select count() from t where e >= '2023-11-08 00:00:00.000' and e < '2023-11-09 00:00:00.000' and s in ('AAPL') format Null" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" + +${CLICKHOUSE_CLIENT} -q "drop table t" diff --git a/tests/queries/0_stateless/01710_projection_fix_crash.reference b/tests/queries/0_stateless/01710_projection_fix_crash.reference new file mode 100644 index 00000000000..18748286e5b --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_fix_crash.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01710_projection_fix_crash.sql b/tests/queries/0_stateless/01710_projection_fix_crash.sql new file mode 100644 index 00000000000..703a773ebca --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_fix_crash.sql @@ -0,0 +1,15 @@ +set force_index_by_date = 1; + +create table xxxxx (col1 String, col2 String, _time DateTime, projection p (select * order by col2)) engine=MergeTree partition by col1 order by tuple(); + +create table yyyyyyy (col1 String, col2 String, _time DateTime, projection p (select * order by col2)) engine=MergeTree partition by col1 order by tuple(); + +insert into xxxxx (col1, col2, _time) values ('xxx', 'zzzz', now()+1); +insert into yyyyyyy (col1, col2, _time) values ('xxx', 'zzzz', now()); + +SELECT count() +FROM xxxxx +WHERE (col1 = 'xxx') AND (_time = ( + SELECT max(_time) + FROM yyyyyyy + WHERE (col1 = 'xxx') AND (col2 = 'zzzz') AND (_time > (now() - toIntervalDay(3))))) diff --git a/tests/queries/0_stateless/01710_projection_with_alter_conversions.reference b/tests/queries/0_stateless/01710_projection_with_alter_conversions.reference new file mode 100644 index 00000000000..9874d6464ab --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_alter_conversions.reference @@ -0,0 +1 @@ +1 2 diff --git a/tests/queries/0_stateless/01710_projection_with_alter_conversions.sql b/tests/queries/0_stateless/01710_projection_with_alter_conversions.sql new file mode 100644 index 00000000000..649a07b9b5f --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_alter_conversions.sql @@ -0,0 +1,15 @@ +drop table if exists t; + +create table t (i int, j int, projection p (select i order by i)) engine MergeTree order by tuple(); + +insert into t values (1, 2); + +system stop merges t; + +set alter_sync = 0; + +alter table t rename column j to k; + +select * from t; + +drop table t; diff --git a/tests/queries/0_stateless/01710_projections.sql b/tests/queries/0_stateless/01710_projections.sql index a96339e30fa..7c45792847e 100644 --- a/tests/queries/0_stateless/01710_projections.sql +++ b/tests/queries/0_stateless/01710_projections.sql @@ -1,6 +1,6 @@ drop table if exists projection_test; -create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by (toStartOfTenMinutes(datetime), domain) settings index_granularity_bytes = 10000000; +create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by toStartOfTenMinutes(datetime) settings index_granularity_bytes = 10000000; insert into projection_test with rowNumberInAllBlocks() as id select 1, toDateTime('2020-10-24 00:00:00') + (id / 20), toString(id % 100), * from generateRandom('x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64)', 10, 10, 1) limit 1000 settings max_threads = 1; diff --git a/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference b/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference new file mode 100644 index 00000000000..5816b4eb49b --- /dev/null +++ b/tests/queries/0_stateless/02965_projection_with_partition_pruning.reference @@ -0,0 +1 @@ +3 4 diff --git a/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql b/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql new file mode 100644 index 00000000000..92f7cc0671c --- /dev/null +++ b/tests/queries/0_stateless/02965_projection_with_partition_pruning.sql @@ -0,0 +1,9 @@ +drop table if exists a; + +create table a (i int, j int, projection p (select * order by j)) engine MergeTree partition by i order by tuple() settings index_granularity = 1; + +insert into a values (1, 2), (0, 5), (3, 4); + +select * from a where i > 0 and j = 4 settings force_index_by_date = 1; + +drop table a;