diff --git a/.asf.yaml b/.asf.yaml index a2a3064783f0e98..3892aca2eddb77c 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -152,13 +152,13 @@ github: - LemonLiTree - Yukang-Lian - TangSiyang2001 - - Lchangliang - freemandealer - shuke987 - wm1581066 - KassieZ - yujun777 - doris-robot + - LiBinfeng-01 notifications: pullrequests_status: commits@doris.apache.org diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 98febd914c2724b..c7c41345761643c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,39 @@ -## Proposed changes +### What problem does this PR solve? Issue Number: close #xxx - +Related PR: #xxx + +Problem Summary: + +### Release note + +None + +### Check List (For Author) + +- Test + - [ ] Regression test + - [ ] Unit Test + - [ ] Manual test (add detailed scripts or steps below) + - [ ] No need to test or manual test. Explain why: + - [ ] This is a refactor/code format and no logic has been changed. + - [ ] Previous test can cover this change. + - [ ] No code files have been changed. + - [ ] Other reason + +- Behavior changed: + - [ ] No. + - [ ] Yes. + +- Does this need documentation? + - [ ] No. + - [ ] Yes. + +### Check List (For Reviewer who merge this PR) + +- [ ] Confirm the release note +- [ ] Confirm test cases +- [ ] Confirm document +- [ ] Add branch pick label diff --git a/.github/workflows/auto-cherry-pick.yml b/.github/workflows/auto-cherry-pick.yml index 4ee2614f0c952fa..f76c88934fdc9ef 100644 --- a/.github/workflows/auto-cherry-pick.yml +++ b/.github/workflows/auto-cherry-pick.yml @@ -30,7 +30,7 @@ permissions: jobs: auto_cherry_pick: runs-on: ubuntu-latest - if: ${{ contains(github.event.pull_request.labels.*.name, 'dev/3.0.x') && github.event.pull_request.merged == true }} + if: ${{ (contains(github.event.pull_request.labels.*.name, 'dev/3.0.x') || contains(github.event.pull_request.labels.*.name, 'dev/2.1.x')) && github.event.pull_request.merged == true }} steps: - name: Checkout repository uses: actions/checkout@v3 @@ -45,7 +45,7 @@ jobs: pip install PyGithub - name: Check SHA run: | - expected_sha="1941de05514e15c216067778e0287b4c3ebcd6f6042ee189a12257bfd0cdd9f764e18c7dae5de868e9b7128ce3be98dc8f78252932cee7d55552fc0cf8b69496" + expected_sha="4e4c0d7689b765c7f0677d75d23222555afa9286af46cf77ced66fa247a298d9f8a8c86830d0ce55f70e5f09532b54fbafee040c0343833077cbc7e214d486d2" calculated_sha=$(sha512sum tools/auto-pick-script.py | awk '{ print $1 }') if [ "$calculated_sha" != "$expected_sha" ]; then echo "SHA mismatch! Expected: $expected_sha, but got: $calculated_sha" @@ -53,10 +53,19 @@ jobs: else echo "SHA matches: $calculated_sha" fi - - name: Auto cherry-pick + - name: Auto cherry-pick to branch-3.0 + if: ${{ contains(github.event.pull_request.labels.*.name, 'dev/3.0.x') }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO_NAME: ${{ github.repository }} CONFLICT_LABEL: cherry-pick-conflict-in-3.0 run: | python tools/auto-pick-script.py ${{ github.event.pull_request.number }} branch-3.0 + - name: Auto cherry-pick to branch-2.1 + if: ${{ contains(github.event.pull_request.labels.*.name, 'dev/2.1.x') }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_NAME: ${{ github.repository }} + CONFLICT_LABEL: cherry-pick-conflict-in-2.1.x + run: | + python tools/auto-pick-script.py ${{ github.event.pull_request.number }} branch-2.1 diff --git a/.github/workflows/build-extension.yml b/.github/workflows/build-extension.yml index 14998f24144b735..d12fe7d9d713872 100644 --- a/.github/workflows/build-extension.yml +++ b/.github/workflows/build-extension.yml @@ -20,7 +20,9 @@ name: Build Extensions on: pull_request: - + workflow_dispatch: + issue_comment: + types: [ created ] concurrency: group: ${{ github.ref }} (Build Extensions) cancel-in-progress: true @@ -29,6 +31,12 @@ jobs: changes: name: Detect Changes runs-on: ubuntu-latest + if: | + (github.event_name == 'pull_request') || + (github.event_name == 'issue_comment' && + github.event.comment.body == 'run buildall' && + github.actor == 'doris-robot' && + github.event.issue.user.login == 'github-actions[bot]') outputs: broker_changes: ${{ steps.filter.outputs.broker_changes }} docs_changes: ${{ steps.filter.outputs.docs_changes }} diff --git a/.github/workflows/build-thirdparty.yml b/.github/workflows/build-thirdparty.yml index 991b5089035699f..7bc5d8a8182a719 100644 --- a/.github/workflows/build-thirdparty.yml +++ b/.github/workflows/build-thirdparty.yml @@ -19,6 +19,9 @@ name: Build Third Party Libraries on: pull_request: + workflow_dispatch: + issue_comment: + types: [ created ] concurrency: group: ${{ github.ref }} (Build Third Party Libraries) @@ -28,6 +31,12 @@ jobs: changes: name: Detect Changes runs-on: ubuntu-latest + if: | + (github.event_name == 'pull_request') || + (github.event_name == 'issue_comment' && + github.event.comment.body == 'run buildall' && + github.actor == 'doris-robot' && + github.event.issue.user.login == 'github-actions[bot]') outputs: thirdparty_changes: ${{ steps.filter.outputs.thirdparty_changes }} steps: diff --git a/.github/workflows/checkstyle.yaml b/.github/workflows/checkstyle.yaml index 13ab46b2cd50b22..a53a19d82649b9b 100644 --- a/.github/workflows/checkstyle.yaml +++ b/.github/workflows/checkstyle.yaml @@ -20,11 +20,20 @@ name: FE Code Style Checker on: pull_request: + workflow_dispatch: + issue_comment: + types: [ created ] jobs: java-checkstyle: name: "CheckStyle" runs-on: ubuntu-latest + if: | + (github.event_name == 'pull_request') || + (github.event_name == 'issue_comment' && + github.event.comment.body == 'run buildall' && + github.actor == 'doris-robot' && + github.event.issue.user.login == 'github-actions[bot]') steps: - name: Checkout uses: actions/checkout@v3 diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index adc77450d78c013..a81d64e4e2b1f18 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -19,12 +19,22 @@ --- name: Code Formatter -on: [push, pull_request_target] - +on: + pull_request: + pull_request_target: + workflow_dispatch: + issue_comment: + types: [ created ] jobs: clang-format: name: "Clang Formatter" runs-on: ubuntu-latest + if: | + (github.event_name == 'pull_request') || (github.event_name == 'pull_request_target') || + (github.event_name == 'issue_comment' && + github.event.comment.body == 'run buildall' && + github.actor == 'doris-robot' && + github.event.issue.user.login == 'github-actions[bot]') steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" if: ${{ github.event_name != 'pull_request_target' }} diff --git a/.github/workflows/license-eyes.yml b/.github/workflows/license-eyes.yml index 890efb2d9d11962..c17081fc75b9e82 100644 --- a/.github/workflows/license-eyes.yml +++ b/.github/workflows/license-eyes.yml @@ -22,10 +22,21 @@ on: push: branches: - master + workflow_dispatch: + issue_comment: + types: [ created ] + jobs: license-check: name: "License Check" runs-on: ubuntu-latest + if: | + (github.event_name == 'pull_request_target') || + (github.event_name == 'push' && github.ref == 'refs/heads/master') || + (github.event_name == 'issue_comment' && + github.event.comment.body == 'run buildall' && + github.actor == 'doris-robot' && + github.event.issue.user.login == 'github-actions[bot]') steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" if: ${{ github.event_name != 'pull_request_target' }} diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index d617aa173d93f8a..1d79048f96511c5 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -342,6 +342,10 @@ if (ENABLE_INJECTION_POINT) set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DENABLE_INJECTION_POINT") endif() +if (ENABLE_CACHE_LOCK_DEBUG) + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DENABLE_CACHE_LOCK_DEBUG") +endif() + # Enable memory tracker, which allows BE to limit the memory of tasks such as query, load, # and compaction,and observe the memory of BE through be_ip:http_port/MemTracker. # Adding the option `USE_MEM_TRACKER=OFF sh build.sh` when compiling can turn off the memory tracker, @@ -782,6 +786,7 @@ install(DIRECTORY DESTINATION ${OUTPUT_DIR}/conf) install(FILES ${BASE_DIR}/../bin/start_be.sh ${BASE_DIR}/../bin/stop_be.sh + ${BASE_DIR}/../tools/jeprof PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_WRITE GROUP_EXECUTE WORLD_READ WORLD_EXECUTE diff --git a/be/src/agent/agent_server.cpp b/be/src/agent/agent_server.cpp index 9d36148b64f3051..361a8ab93a90a69 100644 --- a/be/src/agent/agent_server.cpp +++ b/be/src/agent/agent_server.cpp @@ -33,6 +33,7 @@ #include "agent/utils.h" #include "agent/workload_group_listener.h" #include "agent/workload_sched_policy_listener.h" +#include "cloud/config.h" #include "common/config.h" #include "common/logging.h" #include "common/status.h" @@ -193,7 +194,7 @@ void AgentServer::start_workers(StorageEngine& engine, ExecEnv* exec_env) { "REPORT_DISK_STATE", _master_info, config::report_disk_state_interval_seconds, [&engine, &master_info = _master_info] { report_disk_callback(engine, master_info); })); _report_workers.push_back(std::make_unique( - "REPORT_OLAP_TABLE", _master_info, config::report_tablet_interval_seconds,[&engine, &master_info = _master_info] { report_tablet_callback(engine, master_info); })); + "REPORT_OLAP_TABLET", _master_info, config::report_tablet_interval_seconds,[&engine, &master_info = _master_info] { report_tablet_callback(engine, master_info); })); // clang-format on } @@ -211,6 +212,10 @@ void AgentServer::cloud_start_workers(CloudStorageEngine& engine, ExecEnv* exec_ "CALC_DBM_TASK", config::calc_delete_bitmap_worker_count, [&engine](auto&& task) { return calc_delete_bitmap_callback(engine, task); }); + // cloud, drop tablet just clean clear_cache, so just one thread do it + _workers[TTaskType::DROP] = std::make_unique( + "DROP_TABLE", 1, [&engine](auto&& task) { return drop_tablet_callback(engine, task); }); + _report_workers.push_back(std::make_unique( "REPORT_TASK", _master_info, config::report_task_interval_seconds, [&master_info = _master_info] { report_task_callback(master_info); })); @@ -218,6 +223,14 @@ void AgentServer::cloud_start_workers(CloudStorageEngine& engine, ExecEnv* exec_ _report_workers.push_back(std::make_unique( "REPORT_DISK_STATE", _master_info, config::report_disk_state_interval_seconds, [&engine, &master_info = _master_info] { report_disk_callback(engine, master_info); })); + + if (config::enable_cloud_tablet_report) { + _report_workers.push_back(std::make_unique( + "REPORT_OLAP_TABLET", _master_info, config::report_tablet_interval_seconds, + [&engine, &master_info = _master_info] { + report_tablet_callback(engine, master_info); + })); + } } // TODO(lingbin): each task in the batch may have it own status or FE must check and diff --git a/be/src/agent/heartbeat_server.cpp b/be/src/agent/heartbeat_server.cpp index 146604aaab20f44..78002ed08fe0df5 100644 --- a/be/src/agent/heartbeat_server.cpp +++ b/be/src/agent/heartbeat_server.cpp @@ -26,6 +26,7 @@ #include #include +#include "cloud/cloud_tablet_mgr.h" #include "cloud/config.h" #include "common/config.h" #include "common/status.h" @@ -275,6 +276,11 @@ Status HeartbeatServer::_heartbeat(const TMasterInfo& master_info) { LOG(INFO) << "set config cloud_unique_id " << master_info.cloud_unique_id << " " << st; } + if (master_info.__isset.tablet_report_inactive_duration_ms) { + doris::g_tablet_report_inactive_duration_ms = + master_info.tablet_report_inactive_duration_ms; + } + if (need_report) { LOG(INFO) << "Master FE is changed or restarted. report tablet and disk info immediately"; _engine.notify_listeners(); diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 5906511ce157949..8a034001378f6fb 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -48,6 +48,8 @@ #include "cloud/cloud_delete_task.h" #include "cloud/cloud_engine_calc_delete_bitmap_task.h" #include "cloud/cloud_schema_change_job.h" +#include "cloud/cloud_tablet_mgr.h" +#include "cloud/config.h" #include "common/config.h" #include "common/logging.h" #include "common/status.h" @@ -116,6 +118,10 @@ bool register_task_info(const TTaskType::type task_type, int64_t signature) { // no need to report task of these types return true; } + if (task_type == TTaskType::type::DROP && config::is_cloud_mode()) { + // cloud no need to report drop task status + return true; + } if (signature == -1) { // No need to report task with unintialized signature return true; @@ -1134,6 +1140,46 @@ void report_tablet_callback(StorageEngine& engine, const TMasterInfo& master_inf } } +void report_tablet_callback(CloudStorageEngine& engine, const TMasterInfo& master_info) { + // Random sleep 1~5 seconds before doing report. + // In order to avoid the problem that the FE receives many report requests at the same time + // and can not be processed. + if (config::report_random_wait) { + random_sleep(5); + } + + TReportRequest request; + request.__set_backend(BackendOptions::get_local_backend()); + request.__isset.tablets = true; + + increase_report_version(); + uint64_t report_version; + uint64_t total_num_tablets = 0; + for (int i = 0; i < 5; i++) { + request.tablets.clear(); + report_version = s_report_version; + engine.tablet_mgr().build_all_report_tablets_info(&request.tablets, &total_num_tablets); + if (report_version == s_report_version) { + break; + } + } + + if (report_version < s_report_version) { + LOG(WARNING) << "report version " << report_version << " change to " << s_report_version; + DorisMetrics::instance()->report_all_tablets_requests_skip->increment(1); + return; + } + + request.__set_report_version(report_version); + request.__set_num_tablets(total_num_tablets); + + bool succ = handle_report(request, master_info, "tablet"); + report_tablet_total << 1; + if (!succ) [[unlikely]] { + report_tablet_failed << 1; + } +} + void upload_callback(StorageEngine& engine, ExecEnv* env, const TAgentTaskRequest& req) { const auto& upload_request = req.upload_req; @@ -1390,15 +1436,7 @@ void update_s3_resource(const TStorageResource& param, io::RemoteFileSystemSPtr DCHECK_EQ(existed_fs->type(), io::FileSystemType::S3) << param.id << ' ' << param.name; auto client = static_cast(existed_fs.get())->client_holder(); auto new_s3_conf = S3Conf::get_s3_conf(param.s3_storage_param); - S3ClientConf conf { - .endpoint {}, - .region {}, - .ak = std::move(new_s3_conf.client_conf.ak), - .sk = std::move(new_s3_conf.client_conf.sk), - .token = std::move(new_s3_conf.client_conf.token), - .bucket {}, - .provider = new_s3_conf.client_conf.provider, - }; + S3ClientConf conf = std::move(new_s3_conf.client_conf); st = client->reset(conf); fs = std::move(existed_fs); } @@ -1406,7 +1444,7 @@ void update_s3_resource(const TStorageResource& param, io::RemoteFileSystemSPtr if (!st.ok()) { LOG(WARNING) << "update s3 resource failed: " << st; } else { - LOG_INFO("successfully update hdfs resource") + LOG_INFO("successfully update s3 resource") .tag("resource_id", param.id) .tag("resource_name", param.name); put_storage_resource(param.id, {std::move(fs)}, param.version); @@ -1610,6 +1648,21 @@ void drop_tablet_callback(StorageEngine& engine, const TAgentTaskRequest& req) { remove_task_info(req.task_type, req.signature); } +void drop_tablet_callback(CloudStorageEngine& engine, const TAgentTaskRequest& req) { + const auto& drop_tablet_req = req.drop_tablet_req; + DBUG_EXECUTE_IF("WorkPoolCloudDropTablet.drop_tablet_callback.failed", { + LOG_WARNING("WorkPoolCloudDropTablet.drop_tablet_callback.failed") + .tag("tablet_id", drop_tablet_req.tablet_id); + return; + }); + // 1. erase lru from tablet mgr + // TODO(dx) clean tablet file cache + // get tablet's info(such as cachekey, tablet id, rsid) + engine.tablet_mgr().erase_tablet(drop_tablet_req.tablet_id); + // 2. gen clean file cache task + return; +} + void push_callback(StorageEngine& engine, const TAgentTaskRequest& req) { const auto& push_req = req.push_req; diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h index f51d6c2a4c0dc04..c50ac57ffe9b743 100644 --- a/be/src/agent/task_worker_pool.h +++ b/be/src/agent/task_worker_pool.h @@ -155,6 +155,8 @@ void create_tablet_callback(StorageEngine& engine, const TAgentTaskRequest& req) void drop_tablet_callback(StorageEngine& engine, const TAgentTaskRequest& req); +void drop_tablet_callback(CloudStorageEngine& engine, const TAgentTaskRequest& req); + void clear_transaction_task_callback(StorageEngine& engine, const TAgentTaskRequest& req); void push_callback(StorageEngine& engine, const TAgentTaskRequest& req); @@ -188,6 +190,8 @@ void report_disk_callback(CloudStorageEngine& engine, const TMasterInfo& master_ void report_tablet_callback(StorageEngine& engine, const TMasterInfo& master_info); +void report_tablet_callback(CloudStorageEngine& engine, const TMasterInfo& master_info); + void calc_delete_bitmap_callback(CloudStorageEngine& engine, const TAgentTaskRequest& req); } // namespace doris diff --git a/be/src/apache-orc b/be/src/apache-orc index 903ea6ccdc463b8..db01184f765c034 160000 --- a/be/src/apache-orc +++ b/be/src/apache-orc @@ -1 +1 @@ -Subproject commit 903ea6ccdc463b8a17af2604975107ba7d895380 +Subproject commit db01184f765c03496e4107bd3ac37c077ac4bc5f diff --git a/be/src/cloud/cloud_base_compaction.cpp b/be/src/cloud/cloud_base_compaction.cpp index f431eaf850bbd19..88d83000e95dfaa 100644 --- a/be/src/cloud/cloud_base_compaction.cpp +++ b/be/src/cloud/cloud_base_compaction.cpp @@ -124,7 +124,8 @@ Status CloudBaseCompaction::prepare_compact() { for (auto& rs : _input_rowsets) { _input_row_num += rs->num_rows(); _input_segments += rs->num_segments(); - _input_rowsets_size += rs->data_disk_size(); + _input_rowsets_data_size += rs->data_disk_size(); + _input_rowsets_total_size += rs->total_disk_size(); } LOG_INFO("start CloudBaseCompaction, tablet_id={}, range=[{}-{}]", _tablet->tablet_id(), _input_rowsets.front()->start_version(), _input_rowsets.back()->end_version()) @@ -132,7 +133,9 @@ Status CloudBaseCompaction::prepare_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size); + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size); return st; } @@ -270,17 +273,21 @@ Status CloudBaseCompaction::execute_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total", _input_rowsets_total_size) .tag("output_rows", _output_rowset->num_rows()) .tag("output_segments", _output_rowset->num_segments()) - .tag("output_data_size", _output_rowset->data_disk_size()); + .tag("output_rowset_data_size", _output_rowset->data_disk_size()) + .tag("output_rowset_index_size", _output_rowset->index_disk_size()) + .tag("output_rowset_total_size", _output_rowset->total_disk_size()); //_compaction_succeed = true; _state = CompactionState::SUCCESS; DorisMetrics::instance()->base_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_size); - base_output_size << _output_rowset->data_disk_size(); + DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_total_size); + base_output_size << _output_rowset->total_disk_size(); return Status::OK(); } @@ -302,8 +309,8 @@ Status CloudBaseCompaction::modify_rowsets() { compaction_job->set_output_cumulative_point(cloud_tablet()->cumulative_layer_point()); compaction_job->set_num_input_rows(_input_row_num); compaction_job->set_num_output_rows(_output_rowset->num_rows()); - compaction_job->set_size_input_rowsets(_input_rowsets_size); - compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size()); + compaction_job->set_size_input_rowsets(_input_rowsets_total_size); + compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size()); compaction_job->set_num_input_segments(_input_segments); compaction_job->set_num_output_segments(_output_rowset->num_segments()); compaction_job->set_num_input_rowsets(_input_rowsets.size()); diff --git a/be/src/cloud/cloud_compaction_action.cpp b/be/src/cloud/cloud_compaction_action.cpp index 13161c32c8e20e2..481f7b589fe523f 100644 --- a/be/src/cloud/cloud_compaction_action.cpp +++ b/be/src/cloud/cloud_compaction_action.cpp @@ -149,8 +149,9 @@ Status CloudCompactionAction::_handle_run_compaction(HttpRequest* req, std::stri compaction_type != PARAM_COMPACTION_FULL) { return Status::NotSupported("The compaction type '{}' is not supported", compaction_type); } - - CloudTabletSPtr tablet = DORIS_TRY(_engine.tablet_mgr().get_tablet(tablet_id)); + bool sync_delete_bitmap = compaction_type != PARAM_COMPACTION_FULL; + CloudTabletSPtr tablet = + DORIS_TRY(_engine.tablet_mgr().get_tablet(tablet_id, false, sync_delete_bitmap)); if (tablet == nullptr) { return Status::NotFound("Tablet not found. tablet_id={}", tablet_id); } diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index aad1bd7bfe7d2d7..6b74e70ee1b4b8e 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -164,7 +164,9 @@ Status CloudCumulativeCompaction::prepare_compact() { for (auto& rs : _input_rowsets) { _input_row_num += rs->num_rows(); _input_segments += rs->num_segments(); - _input_rowsets_size += rs->data_disk_size(); + _input_rowsets_data_size += rs->data_disk_size(); + _input_rowsets_index_size += rs->index_disk_size(); + _input_rowsets_total_size += rs->total_disk_size(); } LOG_INFO("start CloudCumulativeCompaction, tablet_id={}, range=[{}-{}]", _tablet->tablet_id(), _input_rowsets.front()->start_version(), _input_rowsets.back()->end_version()) @@ -172,7 +174,9 @@ Status CloudCumulativeCompaction::prepare_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size) .tag("tablet_max_version", cloud_tablet()->max_version_unlocked()) .tag("cumulative_point", cloud_tablet()->cumulative_layer_point()) .tag("num_rowsets", cloud_tablet()->fetch_add_approximate_num_rowsets(0)) @@ -201,10 +205,14 @@ Status CloudCumulativeCompaction::execute_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size) .tag("output_rows", _output_rowset->num_rows()) .tag("output_segments", _output_rowset->num_segments()) - .tag("output_data_size", _output_rowset->data_disk_size()) + .tag("output_rowset_data_size", _output_rowset->data_disk_size()) + .tag("output_rowset_index_size", _output_rowset->index_disk_size()) + .tag("output_rowset_total_size", _output_rowset->total_disk_size()) .tag("tablet_max_version", _tablet->max_version_unlocked()) .tag("cumulative_point", cloud_tablet()->cumulative_layer_point()) .tag("num_rowsets", cloud_tablet()->fetch_add_approximate_num_rowsets(0)) @@ -213,8 +221,9 @@ Status CloudCumulativeCompaction::execute_compact() { _state = CompactionState::SUCCESS; DorisMetrics::instance()->cumulative_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->cumulative_compaction_bytes_total->increment(_input_rowsets_size); - cumu_output_size << _output_rowset->data_disk_size(); + DorisMetrics::instance()->cumulative_compaction_bytes_total->increment( + _input_rowsets_total_size); + cumu_output_size << _output_rowset->total_disk_size(); return Status::OK(); } @@ -243,8 +252,8 @@ Status CloudCumulativeCompaction::modify_rowsets() { compaction_job->set_output_cumulative_point(new_cumulative_point); compaction_job->set_num_input_rows(_input_row_num); compaction_job->set_num_output_rows(_output_rowset->num_rows()); - compaction_job->set_size_input_rowsets(_input_rowsets_size); - compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size()); + compaction_job->set_size_input_rowsets(_input_rowsets_total_size); + compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size()); compaction_job->set_num_input_segments(_input_segments); compaction_job->set_num_output_segments(_output_rowset->num_segments()); compaction_job->set_num_input_rowsets(_input_rowsets.size()); @@ -351,14 +360,15 @@ Status CloudCumulativeCompaction::modify_rowsets() { stats.num_rows(), stats.data_size()); } } - if (_tablet->keys_type() == KeysType::UNIQUE_KEYS && + if (config::enable_delete_bitmap_merge_on_compaction && + _tablet->keys_type() == KeysType::UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write() && _input_rowsets.size() != 1) { - process_old_version_delete_bitmap(); + RETURN_IF_ERROR(process_old_version_delete_bitmap()); } return Status::OK(); } -void CloudCumulativeCompaction::process_old_version_delete_bitmap() { +Status CloudCumulativeCompaction::process_old_version_delete_bitmap() { // agg previously rowset old version delete bitmap std::vector pre_rowsets {}; std::vector pre_rowset_ids {}; @@ -397,40 +407,29 @@ void CloudCumulativeCompaction::process_old_version_delete_bitmap() { } if (!new_delete_bitmap->empty()) { // store agg delete bitmap - Status update_st; DBUG_EXECUTE_IF("CloudCumulativeCompaction.modify_rowsets.update_delete_bitmap_failed", { - update_st = Status::InternalError( + return Status::InternalError( "test fail to update delete bitmap for tablet_id {}", cloud_tablet()->tablet_id()); }); - if (update_st.ok()) { - update_st = _engine.meta_mgr().update_delete_bitmap_without_lock( - *cloud_tablet(), new_delete_bitmap.get()); - } - if (!update_st.ok()) { - std::stringstream ss; - ss << "failed to update delete bitmap for tablet=" << cloud_tablet()->tablet_id() - << " st=" << update_st.to_string(); - std::string msg = ss.str(); - LOG(WARNING) << msg; - } else { - Version version(_input_rowsets.front()->start_version(), - _input_rowsets.back()->end_version()); - for (auto it = new_delete_bitmap->delete_bitmap.begin(); - it != new_delete_bitmap->delete_bitmap.end(); it++) { - _tablet->tablet_meta()->delete_bitmap().set(it->first, it->second); - } - _tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(), - to_remove_vec); - DBUG_EXECUTE_IF( - "CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets", { - static_cast(_tablet.get()) - ->delete_expired_stale_rowsets(); - }); + RETURN_IF_ERROR(_engine.meta_mgr().cloud_update_delete_bitmap_without_lock( + *cloud_tablet(), new_delete_bitmap.get())); + + Version version(_input_rowsets.front()->start_version(), + _input_rowsets.back()->end_version()); + for (auto it = new_delete_bitmap->delete_bitmap.begin(); + it != new_delete_bitmap->delete_bitmap.end(); it++) { + _tablet->tablet_meta()->delete_bitmap().set(it->first, it->second); } + _tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(), + to_remove_vec); + DBUG_EXECUTE_IF( + "CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets", + { static_cast(_tablet.get())->delete_expired_stale_rowsets(); }); } } + return Status::OK(); } void CloudCumulativeCompaction::garbage_collection() { diff --git a/be/src/cloud/cloud_cumulative_compaction.h b/be/src/cloud/cloud_cumulative_compaction.h index 62c7cb44ea5bf5f..1159dcb59ceef1b 100644 --- a/be/src/cloud/cloud_cumulative_compaction.h +++ b/be/src/cloud/cloud_cumulative_compaction.h @@ -47,7 +47,7 @@ class CloudCumulativeCompaction : public CloudCompactionMixin { void update_cumulative_point(); - void process_old_version_delete_bitmap(); + Status process_old_version_delete_bitmap(); ReaderType compaction_type() const override { return ReaderType::READER_CUMULATIVE_COMPACTION; } diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.cpp b/be/src/cloud/cloud_cumulative_compaction_policy.cpp index f9af469e56f60a1..5a9879387b23278 100644 --- a/be/src/cloud/cloud_cumulative_compaction_policy.cpp +++ b/be/src/cloud/cloud_cumulative_compaction_policy.cpp @@ -209,7 +209,7 @@ int64_t CloudSizeBasedCumulativeCompactionPolicy::new_cumulative_point( // if rowsets have no delete version, check output_rowset total disk size satisfies promotion size. return output_rowset->start_version() == last_cumulative_point && (last_delete_version.first != -1 || - output_rowset->data_disk_size() >= cloud_promotion_size(tablet) || + output_rowset->total_disk_size() >= cloud_promotion_size(tablet) || satisfy_promotion_version) ? output_rowset->end_version() + 1 : last_cumulative_point; diff --git a/be/src/cloud/cloud_full_compaction.cpp b/be/src/cloud/cloud_full_compaction.cpp index 2e11891045c2505..c27b728c93d29b1 100644 --- a/be/src/cloud/cloud_full_compaction.cpp +++ b/be/src/cloud/cloud_full_compaction.cpp @@ -98,7 +98,9 @@ Status CloudFullCompaction::prepare_compact() { for (auto& rs : _input_rowsets) { _input_row_num += rs->num_rows(); _input_segments += rs->num_segments(); - _input_rowsets_size += rs->data_disk_size(); + _input_rowsets_data_size += rs->data_disk_size(); + _input_rowsets_index_size += rs->index_disk_size(); + _input_rowsets_total_size += rs->total_disk_size(); } LOG_INFO("start CloudFullCompaction, tablet_id={}, range=[{}-{}]", _tablet->tablet_id(), _input_rowsets.front()->start_version(), _input_rowsets.back()->end_version()) @@ -106,7 +108,9 @@ Status CloudFullCompaction::prepare_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size); + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size); return st; } @@ -162,16 +166,20 @@ Status CloudFullCompaction::execute_compact() { .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_data_size", _input_rowsets_data_size) + .tag("input_rowsets_index_size", _input_rowsets_index_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size) .tag("output_rows", _output_rowset->num_rows()) .tag("output_segments", _output_rowset->num_segments()) - .tag("output_data_size", _output_rowset->data_disk_size()); + .tag("output_rowset_data_size", _output_rowset->data_disk_size()) + .tag("output_rowset_index_size", _output_rowset->index_disk_size()) + .tag("output_rowset_total_size", _output_rowset->total_disk_size()); _state = CompactionState::SUCCESS; DorisMetrics::instance()->full_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->full_compaction_bytes_total->increment(_input_rowsets_size); - full_output_size << _output_rowset->data_disk_size(); + DorisMetrics::instance()->full_compaction_bytes_total->increment(_input_rowsets_total_size); + full_output_size << _output_rowset->total_disk_size(); return Status::OK(); } @@ -193,8 +201,12 @@ Status CloudFullCompaction::modify_rowsets() { compaction_job->set_output_cumulative_point(_output_rowset->end_version() + 1); compaction_job->set_num_input_rows(_input_row_num); compaction_job->set_num_output_rows(_output_rowset->num_rows()); - compaction_job->set_size_input_rowsets(_input_rowsets_size); - compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size()); + compaction_job->set_size_input_rowsets(_input_rowsets_total_size); + compaction_job->set_size_output_rowsets(_output_rowset->total_disk_size()); + DBUG_EXECUTE_IF("CloudFullCompaction::modify_rowsets.wrong_compaction_data_size", { + compaction_job->set_size_input_rowsets(1); + compaction_job->set_size_output_rowsets(10000001); + }) compaction_job->set_num_input_segments(_input_segments); compaction_job->set_num_output_segments(_output_rowset->num_segments()); compaction_job->set_num_input_rowsets(_input_rowsets.size()); @@ -341,7 +353,7 @@ Status CloudFullCompaction::_cloud_full_compaction_update_delete_bitmap(int64_t .tag("input_rowsets", _input_rowsets.size()) .tag("input_rows", _input_row_num) .tag("input_segments", _input_segments) - .tag("input_data_size", _input_rowsets_size) + .tag("input_rowsets_total_size", _input_rowsets_total_size) .tag("update_bitmap_size", delete_bitmap->delete_bitmap.size()); _tablet->tablet_meta()->delete_bitmap().merge(*delete_bitmap); return Status::OK(); diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index 02497f6a044b912..ae98a7f605875e5 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,7 @@ #include "cloud/cloud_tablet.h" #include "cloud/config.h" #include "cloud/pb_convert.h" +#include "common/config.h" #include "common/logging.h" #include "common/status.h" #include "cpp/sync_point.h" @@ -51,6 +53,7 @@ #include "olap/olap_common.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_fwd.h" #include "olap/storage_engine.h" #include "olap/tablet_meta.h" #include "runtime/client_cache.h" @@ -292,6 +295,9 @@ static std::string debug_info(const Request& req) { return fmt::format(" tablet_id={}", req.rowset_meta().tablet_id()); } else if constexpr (is_any_v) { return fmt::format(" tablet_id={}", req.tablet_id()); + } else if constexpr (is_any_v) { + return fmt::format(" table_id={}, tablet_id={}, lock_id={}", req.table_id(), + req.tablet_id(), req.lock_id()); } else { static_assert(!sizeof(Request)); } @@ -378,7 +384,8 @@ Status CloudMetaMgr::get_tablet_meta(int64_t tablet_id, TabletMetaSharedPtr* tab return Status::OK(); } -Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_data) { +Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_data, + bool sync_delete_bitmap) { using namespace std::chrono; TEST_SYNC_POINT_RETURN_WITH_VALUE("CloudMetaMgr::sync_tablet_rowsets", Status::OK(), tablet); @@ -410,6 +417,10 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_ req.set_cumulative_point(tablet->cumulative_layer_point()); } req.set_end_version(-1); + // backend side use schema dict + if (config::variant_use_cloud_schema_dict) { + req.set_schema_op(GetRowsetRequest::RETURN_DICT); + } VLOG_DEBUG << "send GetRowsetRequest: " << req.ShortDebugString(); stub->get_rowset(&cntl, &req, &resp, nullptr); @@ -455,7 +466,7 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_ // If is mow, the tablet has no delete bitmap in base rowsets. // So dont need to sync it. - if (tablet->enable_unique_key_merge_on_write() && + if (sync_delete_bitmap && tablet->enable_unique_key_merge_on_write() && tablet->tablet_state() == TABLET_RUNNING) { DeleteBitmap delete_bitmap(tablet_id); int64_t old_max_version = req.start_version() - 1; @@ -524,7 +535,8 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_ existed_rowset->rowset_id().to_string() == cloud_rs_meta_pb.rowset_id_v2()) { continue; // Same rowset, skip it } - RowsetMetaPB meta_pb = cloud_rowset_meta_to_doris(cloud_rs_meta_pb); + RowsetMetaPB meta_pb = cloud_rowset_meta_to_doris( + cloud_rs_meta_pb, resp.has_schema_dict() ? &resp.schema_dict() : nullptr); auto rs_meta = std::make_shared(); rs_meta->init_from_pb(meta_pb); RowsetSharedPtr rowset; @@ -693,11 +705,19 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_ const auto& segment_ids = res.segment_ids(); const auto& vers = res.versions(); const auto& delete_bitmaps = res.segment_delete_bitmaps(); + if (rowset_ids.size() != segment_ids.size() || rowset_ids.size() != vers.size() || + rowset_ids.size() != delete_bitmaps.size()) { + return Status::Error( + "get delete bitmap data wrong," + "rowset_ids.size={},segment_ids.size={},vers.size={},delete_bitmaps.size={}", + rowset_ids.size(), segment_ids.size(), vers.size(), delete_bitmaps.size()); + } for (size_t i = 0; i < rowset_ids.size(); i++) { RowsetId rst_id; rst_id.init(rowset_ids[i]); - delete_bitmap->merge({rst_id, segment_ids[i], vers[i]}, - roaring::Roaring::read(delete_bitmaps[i].data())); + delete_bitmap->merge( + {rst_id, segment_ids[i], vers[i]}, + roaring::Roaring::readSafe(delete_bitmaps[i].data(), delete_bitmaps[i].length())); } int64_t latency = cntl.latency_us(); if (latency > 100 * 1000) { // 100ms @@ -750,6 +770,7 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta& rs_meta, Status ret_st; TEST_INJECTION_POINT_RETURN_WITH_VALUE("CloudMetaMgr::commit_rowset", ret_st); } + check_table_size_correctness(rs_meta); CreateRowsetRequest req; CreateRowsetResponse resp; req.set_cloud_unique_id(config::cloud_unique_id); @@ -1049,9 +1070,10 @@ Status CloudMetaMgr::update_delete_bitmap(const CloudTablet& tablet, int64_t loc return st; } -Status CloudMetaMgr::update_delete_bitmap_without_lock(const CloudTablet& tablet, - DeleteBitmap* delete_bitmap) { - VLOG_DEBUG << "update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id(); +Status CloudMetaMgr::cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet, + DeleteBitmap* delete_bitmap) { + LOG(INFO) << "cloud_update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id() + << ",delete_bitmap size:" << delete_bitmap->delete_bitmap.size(); UpdateDeleteBitmapRequest req; UpdateDeleteBitmapResponse res; req.set_cloud_unique_id(config::cloud_unique_id); @@ -1106,6 +1128,25 @@ Status CloudMetaMgr::get_delete_bitmap_update_lock(const CloudTablet& tablet, in return st; } +Status CloudMetaMgr::remove_delete_bitmap_update_lock(const CloudTablet& tablet, int64_t lock_id, + int64_t initiator) { + VLOG_DEBUG << "remove_delete_bitmap_update_lock , tablet_id: " << tablet.tablet_id() + << ",lock_id:" << lock_id; + RemoveDeleteBitmapUpdateLockRequest req; + RemoveDeleteBitmapUpdateLockResponse res; + req.set_cloud_unique_id(config::cloud_unique_id); + req.set_tablet_id(tablet.tablet_id()); + req.set_lock_id(lock_id); + req.set_initiator(initiator); + auto st = retry_rpc("remove delete bitmap update lock", req, &res, + &MetaService_Stub::remove_delete_bitmap_update_lock); + if (!st.ok()) { + LOG(WARNING) << "remove delete bitmap update lock fail,tablet_id=" << tablet.tablet_id() + << " lock_id=" << lock_id << " st=" << st.to_string(); + } + return st; +} + Status CloudMetaMgr::remove_old_version_delete_bitmap( int64_t tablet_id, const std::vector>& to_delete) { @@ -1124,4 +1165,120 @@ Status CloudMetaMgr::remove_old_version_delete_bitmap( return st; } +void CloudMetaMgr::check_table_size_correctness(const RowsetMeta& rs_meta) { + if (!config::enable_table_size_correctness_check) { + return; + } + int64_t total_segment_size = get_segment_file_size(rs_meta); + int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta); + if (rs_meta.data_disk_size() != total_segment_size || + rs_meta.index_disk_size() != total_inverted_index_size || + rs_meta.data_disk_size() + rs_meta.index_disk_size() != rs_meta.total_disk_size()) { + LOG(WARNING) << "[Cloud table table size check failed]:" + << " tablet id: " << rs_meta.tablet_id() + << ", rowset id:" << rs_meta.rowset_id() + << ", rowset data disk size:" << rs_meta.data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta.index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta.total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path(rs_meta.tablet_id(), + rs_meta.rowset_id().to_string(), 0); + DCHECK(false); + } +} + +int64_t CloudMetaMgr::get_segment_file_size(const RowsetMeta& rs_meta) { + int64_t total_segment_size = 0; + const auto fs = const_cast(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); + } + for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + if (st.is()) { + LOG(INFO) << "cloud table size correctness check get segment size 0 because " + "file not exist! msg:" + << st.msg() << ", segment path:" << segment_path; + } else { + LOG(WARNING) << "cloud table size correctness check get segment size failed! msg:" + << st.msg() << ", segment path:" << segment_path; + } + } + total_segment_size += segment_file_size; + } + return total_segment_size; +} + +int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) { + int64_t total_inverted_index_size = 0; + const auto fs = const_cast(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); + } + if (rs_meta.tablet_schema()->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + const auto& indices = rs_meta.tablet_schema()->inverted_indexes(); + for (auto& index : indices) { + for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t file_size = 0; + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), + index->index_id(), index->get_index_suffix()); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + if (st.is()) { + LOG(INFO) << "cloud table size correctness check get inverted index v1 " + "0 because file not exist! msg:" + << st.msg() + << ", inverted index path:" << inverted_index_file_path; + } else { + LOG(WARNING) + << "cloud table size correctness check get inverted index v1 " + "size failed! msg:" + << st.msg() << ", inverted index path:" << inverted_index_file_path; + } + } + total_inverted_index_size += file_size; + } + } + } else { + for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) { + int64_t file_size = 0; + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + + std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_path_v2( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + if (st.is()) { + LOG(INFO) << "cloud table size correctness check get inverted index v2 " + "0 because file not exist! msg:" + << st.msg() << ", inverted index path:" << inverted_index_file_path; + } else { + LOG(WARNING) << "cloud table size correctness check get inverted index v2 " + "size failed! msg:" + << st.msg() + << ", inverted index path:" << inverted_index_file_path; + } + } + total_inverted_index_size += file_size; + } + } + return total_inverted_index_size; +} + } // namespace doris::cloud diff --git a/be/src/cloud/cloud_meta_mgr.h b/be/src/cloud/cloud_meta_mgr.h index 79cdb3fd3d1f8c0..a657c0fdd8e3500 100644 --- a/be/src/cloud/cloud_meta_mgr.h +++ b/be/src/cloud/cloud_meta_mgr.h @@ -57,7 +57,8 @@ class CloudMetaMgr { Status get_tablet_meta(int64_t tablet_id, std::shared_ptr* tablet_meta); - Status sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_data = false); + Status sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_data = false, + bool sync_delete_bitmap = true); Status prepare_rowset(const RowsetMeta& rs_meta, std::shared_ptr* existed_rs_meta = nullptr); @@ -95,12 +96,15 @@ class CloudMetaMgr { Status update_delete_bitmap(const CloudTablet& tablet, int64_t lock_id, int64_t initiator, DeleteBitmap* delete_bitmap); - Status update_delete_bitmap_without_lock(const CloudTablet& tablet, - DeleteBitmap* delete_bitmap); + Status cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet, + DeleteBitmap* delete_bitmap); Status get_delete_bitmap_update_lock(const CloudTablet& tablet, int64_t lock_id, int64_t initiator); + Status remove_delete_bitmap_update_lock(const CloudTablet& tablet, int64_t lock_id, + int64_t initiator); + Status remove_old_version_delete_bitmap( int64_t tablet_id, const std::vector>& to_delete); @@ -113,6 +117,9 @@ class CloudMetaMgr { Status sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_max_version, std::ranges::range auto&& rs_metas, const TabletStatsPB& stats, const TabletIndexPB& idx, DeleteBitmap* delete_bitmap); + void check_table_size_correctness(const RowsetMeta& rs_meta); + int64_t get_segment_file_size(const RowsetMeta& rs_meta); + int64_t get_inverted_index_file_szie(const RowsetMeta& rs_meta); }; } // namespace cloud diff --git a/be/src/cloud/cloud_rowset_builder.cpp b/be/src/cloud/cloud_rowset_builder.cpp index 192da0f17efa825..2e6764b33aa79cb 100644 --- a/be/src/cloud/cloud_rowset_builder.cpp +++ b/be/src/cloud/cloud_rowset_builder.cpp @@ -106,7 +106,7 @@ void CloudRowsetBuilder::update_tablet_stats() { tablet->fetch_add_approximate_num_rowsets(1); tablet->fetch_add_approximate_num_segments(_rowset->num_segments()); tablet->fetch_add_approximate_num_rows(_rowset->num_rows()); - tablet->fetch_add_approximate_data_size(_rowset->data_disk_size()); + tablet->fetch_add_approximate_data_size(_rowset->total_disk_size()); tablet->fetch_add_approximate_cumu_num_rowsets(1); tablet->fetch_add_approximate_cumu_num_deltas(_rowset->num_segments()); tablet->write_count.fetch_add(1, std::memory_order_relaxed); diff --git a/be/src/cloud/cloud_rowset_writer.cpp b/be/src/cloud/cloud_rowset_writer.cpp index 642077b7e983ec1..ebc411697ee4b15 100644 --- a/be/src/cloud/cloud_rowset_writer.cpp +++ b/be/src/cloud/cloud_rowset_writer.cpp @@ -115,13 +115,14 @@ Status CloudRowsetWriter::build(RowsetSharedPtr& rowset) { } else { _rowset_meta->add_segments_file_size(seg_file_size.value()); } - - if (auto idx_files_info = _idx_files_info.get_inverted_files_info(_segment_start_id); - !idx_files_info.has_value()) [[unlikely]] { - LOG(ERROR) << "expected inverted index files info, but none presents: " - << idx_files_info.error(); - } else { - _rowset_meta->add_inverted_index_files_info(idx_files_info.value()); + if (rowset_schema->has_inverted_index()) { + if (auto idx_files_info = _idx_files.inverted_index_file_info(_segment_start_id); + !idx_files_info.has_value()) [[unlikely]] { + LOG(ERROR) << "expected inverted index files info, but none presents: " + << idx_files_info.error(); + } else { + _rowset_meta->add_inverted_index_files_info(idx_files_info.value()); + } } RETURN_NOT_OK_STATUS_WITH_WARN(RowsetFactory::create_rowset(rowset_schema, _context.tablet_path, diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index b7e3be93e853bb9..896804578d7db9c 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -344,7 +344,7 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam sc_job->add_txn_ids(rs->txn_id()); sc_job->add_output_versions(rs->end_version()); num_output_rows += rs->num_rows(); - size_output_rowsets += rs->data_disk_size(); + size_output_rowsets += rs->total_disk_size(); num_output_segments += rs->num_segments(); } sc_job->set_num_output_rows(num_output_rows); diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index c046259b0da71cb..601e9486edf0310 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -108,6 +108,36 @@ Status CloudTablet::capture_rs_readers(const Version& spec_version, return capture_rs_readers_unlocked(version_path, rs_splits); } +Status CloudTablet::merge_rowsets_schema() { + // Find the rowset with the max version + auto max_version_rowset = + std::max_element( + _rs_version_map.begin(), _rs_version_map.end(), + [](const auto& a, const auto& b) { + return !a.second->tablet_schema() + ? true + : (!b.second->tablet_schema() + ? false + : a.second->tablet_schema()->schema_version() < + b.second->tablet_schema() + ->schema_version()); + }) + ->second; + TabletSchemaSPtr max_version_schema = max_version_rowset->tablet_schema(); + // If the schema has variant columns, perform a merge to create a wide tablet schema + if (max_version_schema->num_variant_columns() > 0) { + std::vector schemas; + std::transform(_rs_version_map.begin(), _rs_version_map.end(), std::back_inserter(schemas), + [](const auto& rs_meta) { return rs_meta.second->tablet_schema(); }); + // Merge the collected schemas to obtain the least common schema + RETURN_IF_ERROR(vectorized::schema_util::get_least_common_schema(schemas, nullptr, + max_version_schema)); + VLOG_DEBUG << "dump schema: " << max_version_schema->dump_full_schema(); + _merged_tablet_schema = max_version_schema; + } + return Status::OK(); +} + // There are only two tablet_states RUNNING and NOT_READY in cloud mode // This function will erase the tablet from `CloudTabletMgr` when it can't find this tablet in MS. Status CloudTablet::sync_rowsets(int64_t query_version, bool warmup_delta_data) { @@ -133,6 +163,10 @@ Status CloudTablet::sync_rowsets(int64_t query_version, bool warmup_delta_data) if (st.is()) { clear_cache(); } + + // Merge all rowset schemas within a CloudTablet + RETURN_IF_ERROR(merge_rowsets_schema()); + return st; } @@ -188,16 +222,7 @@ Status CloudTablet::sync_if_not_running() { } TabletSchemaSPtr CloudTablet::merged_tablet_schema() const { - std::shared_lock rdlock(_meta_lock); - TabletSchemaSPtr target_schema; - std::vector schemas; - for (const auto& [_, rowset] : _rs_version_map) { - schemas.push_back(rowset->tablet_schema()); - } - // get the max version schema and merge all schema - static_cast( - vectorized::schema_util::get_least_common_schema(schemas, nullptr, target_schema)); - return target_schema; + return _merged_tablet_schema; } void CloudTablet::add_rowsets(std::vector to_add, bool version_overlap, @@ -263,15 +288,13 @@ void CloudTablet::add_rowsets(std::vector to_add, bool version_ auto schema_ptr = rowset_meta->tablet_schema(); auto idx_version = schema_ptr->get_inverted_index_storage_format(); if (idx_version == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : schema_ptr->indexes()) { - if (index.index_type() == IndexType::INVERTED) { - auto idx_path = storage_resource.value()->remote_idx_v1_path( - *rowset_meta, seg_id, index.index_id(), - index.get_index_suffix()); - download_idx_file(idx_path); - } + for (const auto& index : schema_ptr->inverted_indexes()) { + auto idx_path = storage_resource.value()->remote_idx_v1_path( + *rowset_meta, seg_id, index->index_id(), + index->get_index_suffix()); + download_idx_file(idx_path); } - } else if (idx_version == InvertedIndexStorageFormatPB::V2) { + } else { if (schema_ptr->has_inverted_index()) { auto idx_path = storage_resource.value()->remote_idx_v2_path( *rowset_meta, seg_id); @@ -412,7 +435,7 @@ int CloudTablet::delete_expired_stale_rowsets() { void CloudTablet::update_base_size(const Rowset& rs) { // Define base rowset as the rowset of version [2-x] if (rs.start_version() == 2) { - _base_size = rs.data_disk_size(); + _base_size = rs.total_disk_size(); } } @@ -433,7 +456,7 @@ void CloudTablet::recycle_cached_data(const std::vector& rowset // TODO: Segment::file_cache_key auto file_key = Segment::file_cache_key(rs->rowset_id().to_string(), seg_id); auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key); - file_cache->remove_if_cached(file_key); + file_cache->remove_if_cached_async(file_key); } } } @@ -872,4 +895,12 @@ Status CloudTablet::sync_meta() { return Status::OK(); } +void CloudTablet::build_tablet_report_info(TTabletInfo* tablet_info) { + std::shared_lock rdlock(_meta_lock); + tablet_info->__set_total_version_count(_tablet_meta->version_count()); + tablet_info->__set_tablet_id(_tablet_meta->tablet_id()); + // Currently, this information will not be used by the cloud report, + // but it may be used in the future. +} + } // namespace doris diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h index 53747dc19e27dea..5f4785b62d23746 100644 --- a/be/src/cloud/cloud_tablet.h +++ b/be/src/cloud/cloud_tablet.h @@ -196,10 +196,13 @@ class CloudTablet final : public BaseTablet { int64_t last_base_compaction_success_time_ms = 0; int64_t last_cumu_compaction_success_time_ms = 0; int64_t last_cumu_no_suitable_version_ms = 0; + int64_t last_access_time_ms = 0; // Return merged extended schema TabletSchemaSPtr merged_tablet_schema() const override; + void build_tablet_report_info(TTabletInfo* tablet_info); + private: // FIXME(plat1ko): No need to record base size if rowsets are ordered by version void update_base_size(const Rowset& rs); @@ -208,6 +211,9 @@ class CloudTablet final : public BaseTablet { Status sync_if_not_running(); + // Merge all rowset schemas within a CloudTablet + Status merge_rowsets_schema(); + CloudStorageEngine& _engine; // this mutex MUST ONLY be used when sync meta @@ -246,6 +252,9 @@ class CloudTablet final : public BaseTablet { std::mutex _base_compaction_lock; std::mutex _cumulative_compaction_lock; mutable std::mutex _rowset_update_lock; + + // Schema will be merged from all rowsets when sync_rowsets + TabletSchemaSPtr _merged_tablet_schema; }; using CloudTabletSPtr = std::shared_ptr; diff --git a/be/src/cloud/cloud_tablet_mgr.cpp b/be/src/cloud/cloud_tablet_mgr.cpp index e5c31785c1eb1c0..e7a7d254f3fa89f 100644 --- a/be/src/cloud/cloud_tablet_mgr.cpp +++ b/be/src/cloud/cloud_tablet_mgr.cpp @@ -28,6 +28,7 @@ #include "runtime/memory/cache_policy.h" namespace doris { +uint64_t g_tablet_report_inactive_duration_ms = 0; namespace { // port from @@ -142,8 +143,14 @@ CloudTabletMgr::CloudTabletMgr(CloudStorageEngine& engine) CloudTabletMgr::~CloudTabletMgr() = default; -Result> CloudTabletMgr::get_tablet(int64_t tablet_id, - bool warmup_data) { +void set_tablet_access_time_ms(CloudTablet* tablet) { + using namespace std::chrono; + int64_t now = duration_cast(system_clock::now().time_since_epoch()).count(); + tablet->last_access_time_ms = now; +} + +Result> CloudTabletMgr::get_tablet(int64_t tablet_id, bool warmup_data, + bool sync_delete_bitmap) { // LRU value type. `Value`'s lifetime MUST NOT be longer than `CloudTabletMgr` class Value : public LRUCacheValueBase { public: @@ -161,8 +168,8 @@ Result> CloudTabletMgr::get_tablet(int64_t tablet_i CacheKey key(tablet_id_str); auto* handle = _cache->lookup(key); if (handle == nullptr) { - auto load_tablet = [this, &key, - warmup_data](int64_t tablet_id) -> std::shared_ptr { + auto load_tablet = [this, &key, warmup_data, + sync_delete_bitmap](int64_t tablet_id) -> std::shared_ptr { TabletMetaSharedPtr tablet_meta; auto st = _engine.meta_mgr().get_tablet_meta(tablet_id, &tablet_meta); if (!st.ok()) { @@ -173,7 +180,8 @@ Result> CloudTabletMgr::get_tablet(int64_t tablet_i auto tablet = std::make_shared(_engine, std::move(tablet_meta)); auto value = std::make_unique(tablet, *_tablet_map); // MUST sync stats to let compaction scheduler work correctly - st = _engine.meta_mgr().sync_tablet_rowsets(tablet.get(), warmup_data); + st = _engine.meta_mgr().sync_tablet_rowsets(tablet.get(), warmup_data, + sync_delete_bitmap); if (!st.ok()) { LOG(WARNING) << "failed to sync tablet " << tablet_id << ": " << st; return nullptr; @@ -181,8 +189,11 @@ Result> CloudTabletMgr::get_tablet(int64_t tablet_i auto* handle = _cache->insert(key, value.release(), 1, sizeof(CloudTablet), CachePriority::NORMAL); - auto ret = std::shared_ptr( - tablet.get(), [this, handle](...) { _cache->release(handle); }); + auto ret = + std::shared_ptr(tablet.get(), [this, handle](CloudTablet* tablet) { + set_tablet_access_time_ms(tablet); + _cache->release(handle); + }); _tablet_map->put(std::move(tablet)); return ret; }; @@ -191,12 +202,16 @@ Result> CloudTabletMgr::get_tablet(int64_t tablet_i if (tablet == nullptr) { return ResultError(Status::InternalError("failed to get tablet {}", tablet_id)); } + set_tablet_access_time_ms(tablet.get()); return tablet; } CloudTablet* tablet_raw_ptr = reinterpret_cast(_cache->value(handle))->tablet.get(); - auto tablet = std::shared_ptr(tablet_raw_ptr, - [this, handle](...) { _cache->release(handle); }); + set_tablet_access_time_ms(tablet_raw_ptr); + auto tablet = std::shared_ptr(tablet_raw_ptr, [this, handle](CloudTablet* tablet) { + set_tablet_access_time_ms(tablet); + _cache->release(handle); + }); return tablet; } @@ -357,4 +372,54 @@ Status CloudTabletMgr::get_topn_tablets_to_compact( return Status::OK(); } +void CloudTabletMgr::build_all_report_tablets_info(std::map* tablets_info, + uint64_t* tablet_num) { + DCHECK(tablets_info != nullptr); + VLOG_NOTICE << "begin to build all report cloud tablets info"; + + HistogramStat tablet_version_num_hist; + + auto handler = [&](const std::weak_ptr& tablet_wk) { + auto tablet = tablet_wk.lock(); + if (!tablet) return; + (*tablet_num)++; + TTabletInfo tablet_info; + tablet->build_tablet_report_info(&tablet_info); + using namespace std::chrono; + int64_t now = duration_cast(system_clock::now().time_since_epoch()).count(); + if (now - g_tablet_report_inactive_duration_ms * 1000 < tablet->last_access_time_ms) { + // the tablet is still being accessed and used in recently, so not report it + return; + } + auto& t_tablet = (*tablets_info)[tablet->tablet_id()]; + // On the cloud, a specific BE has only one tablet replica; + // there are no multiple replicas for a specific BE. + // This is only to reuse the non-cloud report protocol. + tablet_version_num_hist.add(tablet_info.total_version_count); + t_tablet.tablet_infos.emplace_back(std::move(tablet_info)); + }; + + auto weak_tablets = get_weak_tablets(); + std::for_each(weak_tablets.begin(), weak_tablets.end(), handler); + + DorisMetrics::instance()->tablet_version_num_distribution->set_histogram( + tablet_version_num_hist); + LOG(INFO) << "success to build all cloud report tablets info. all_tablet_count=" << *tablet_num + << " exceed drop time limit count=" << tablets_info->size(); +} + +void CloudTabletMgr::get_tablet_info(int64_t num_tablets, std::vector* tablets_info) { + auto weak_tablets = get_weak_tablets(); + for (auto& weak_tablet : weak_tablets) { + auto tablet = weak_tablet.lock(); + if (tablet == nullptr) { + continue; + } + if (tablets_info->size() >= num_tablets) { + return; + } + tablets_info->push_back(tablet->get_tablet_info()); + } +} + } // namespace doris diff --git a/be/src/cloud/cloud_tablet_mgr.h b/be/src/cloud/cloud_tablet_mgr.h index 976d483b36c143c..cbbd119a36b532f 100644 --- a/be/src/cloud/cloud_tablet_mgr.h +++ b/be/src/cloud/cloud_tablet_mgr.h @@ -17,6 +17,9 @@ #pragma once +#include +#include + #include #include #include @@ -31,6 +34,8 @@ class CloudStorageEngine; class LRUCachePolicy; class CountDownLatch; +extern uint64_t g_tablet_report_inactive_duration_ms; + class CloudTabletMgr { public: CloudTabletMgr(CloudStorageEngine& engine); @@ -38,7 +43,8 @@ class CloudTabletMgr { // If the tablet is in cache, return this tablet directly; otherwise will get tablet meta first, // sync rowsets after, and download segment data in background if `warmup_data` is true. - Result> get_tablet(int64_t tablet_id, bool warmup_data = false); + Result> get_tablet(int64_t tablet_id, bool warmup_data = false, + bool sync_delete_bitmap = true); void erase_tablet(int64_t tablet_id); @@ -65,6 +71,17 @@ class CloudTabletMgr { std::vector>* tablets, int64_t* max_score); + /** + * Gets tablets info and total tablet num that are reported + * + * @param tablets_info used by report + * @param tablet_num tablets in be tabletMgr, total num + */ + void build_all_report_tablets_info(std::map* tablets_info, + uint64_t* tablet_num); + + void get_tablet_info(int64_t num_tablets, std::vector* tablets_info); + private: CloudStorageEngine& _engine; diff --git a/be/src/cloud/cloud_warm_up_manager.cpp b/be/src/cloud/cloud_warm_up_manager.cpp index 07beeaeb078a464..06d6df11dc4cc37 100644 --- a/be/src/cloud/cloud_warm_up_manager.cpp +++ b/be/src/cloud/cloud_warm_up_manager.cpp @@ -63,14 +63,14 @@ void CloudWarmUpManager::handle_jobs() { #ifndef BE_TEST constexpr int WAIT_TIME_SECONDS = 600; while (true) { - JobMeta cur_job; + std::shared_ptr cur_job = nullptr; { std::unique_lock lock(_mtx); _cond.wait(lock, [this]() { return _closed || !_pending_job_metas.empty(); }); if (_closed) break; - cur_job = std::move(_pending_job_metas.front()); + cur_job = _pending_job_metas.front(); } - for (int64_t tablet_id : cur_job.tablet_ids) { + for (int64_t tablet_id : cur_job->tablet_ids) { if (_cur_job_id == 0) { // The job is canceled break; } @@ -147,15 +147,13 @@ void CloudWarmUpManager::handle_jobs() { auto schema_ptr = rs->tablet_schema(); auto idx_version = schema_ptr->get_inverted_index_storage_format(); if (idx_version == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : schema_ptr->indexes()) { - if (index.index_type() == IndexType::INVERTED) { - wait->add_count(); - auto idx_path = storage_resource.value()->remote_idx_v1_path( - *rs, seg_id, index.index_id(), index.get_index_suffix()); - download_idx_file(idx_path); - } + for (const auto& index : schema_ptr->inverted_indexes()) { + wait->add_count(); + auto idx_path = storage_resource.value()->remote_idx_v1_path( + *rs, seg_id, index->index_id(), index->get_index_suffix()); + download_idx_file(idx_path); } - } else if (idx_version == InvertedIndexStorageFormatPB::V2) { + } else { if (schema_ptr->has_inverted_index()) { wait->add_count(); auto idx_path = @@ -173,7 +171,7 @@ void CloudWarmUpManager::handle_jobs() { } { std::unique_lock lock(_mtx); - _finish_job.push_back(std::move(cur_job)); + _finish_job.push_back(cur_job); _pending_job_metas.pop_front(); } } @@ -230,8 +228,9 @@ Status CloudWarmUpManager::check_and_set_batch_id(int64_t job_id, int64_t batch_ void CloudWarmUpManager::add_job(const std::vector& job_metas) { { std::lock_guard lock(_mtx); - std::for_each(job_metas.begin(), job_metas.end(), - [this](const TJobMeta& meta) { _pending_job_metas.emplace_back(meta); }); + std::for_each(job_metas.begin(), job_metas.end(), [this](const TJobMeta& meta) { + _pending_job_metas.emplace_back(std::make_shared(meta)); + }); } _cond.notify_all(); } diff --git a/be/src/cloud/cloud_warm_up_manager.h b/be/src/cloud/cloud_warm_up_manager.h index fd034b2c5bc38cd..219dedc58065a6c 100644 --- a/be/src/cloud/cloud_warm_up_manager.h +++ b/be/src/cloud/cloud_warm_up_manager.h @@ -74,8 +74,8 @@ class CloudWarmUpManager { std::condition_variable _cond; int64_t _cur_job_id {0}; int64_t _cur_batch_id {-1}; - std::deque _pending_job_metas; - std::vector _finish_job; + std::deque> _pending_job_metas; + std::vector> _finish_job; std::thread _download_thread; bool _closed {false}; // the attribute for compile in ut diff --git a/be/src/cloud/config.cpp b/be/src/cloud/config.cpp index e724dbea84e10ce..32e3250f87c2586 100644 --- a/be/src/cloud/config.cpp +++ b/be/src/cloud/config.cpp @@ -75,4 +75,5 @@ DEFINE_mInt32(tablet_txn_info_min_expired_seconds, "120"); DEFINE_mBool(enable_use_cloud_unique_id_from_fe, "true"); +DEFINE_mBool(enable_cloud_tablet_report, "true"); } // namespace doris::config diff --git a/be/src/cloud/config.h b/be/src/cloud/config.h index 86197f924d0cad0..8af967afb8c67b0 100644 --- a/be/src/cloud/config.h +++ b/be/src/cloud/config.h @@ -108,4 +108,6 @@ DECLARE_mInt32(tablet_txn_info_min_expired_seconds); DECLARE_mBool(enable_use_cloud_unique_id_from_fe); +DECLARE_Bool(enable_cloud_tablet_report); + } // namespace doris::config diff --git a/be/src/cloud/pb_convert.cpp b/be/src/cloud/pb_convert.cpp index 466e932fb2fd9a7..b6b8d3934eecc6f 100644 --- a/be/src/cloud/pb_convert.cpp +++ b/be/src/cloud/pb_convert.cpp @@ -17,6 +17,7 @@ #include "cloud/pb_convert.h" +#include #include #include @@ -138,19 +139,54 @@ void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, RowsetMetaPB&& in) { out->mutable_inverted_index_file_info()->Swap(in.mutable_inverted_index_file_info()); } -RowsetMetaPB cloud_rowset_meta_to_doris(const RowsetMetaCloudPB& in) { +static void fill_schema_with_dict(const RowsetMetaCloudPB& in, RowsetMetaPB* out, + const SchemaCloudDictionary& dict) { + std::unordered_map unique_id_map; + //init map + for (ColumnPB& column : *out->mutable_tablet_schema()->mutable_column()) { + unique_id_map[column.unique_id()] = &column; + } + // column info + for (size_t i = 0; i < in.schema_dict_key_list().column_dict_key_list_size(); ++i) { + int dict_key = in.schema_dict_key_list().column_dict_key_list(i); + const ColumnPB& dict_val = dict.column_dict().at(dict_key); + ColumnPB& to_add = *out->mutable_tablet_schema()->add_column(); + to_add = dict_val; + VLOG_DEBUG << "fill dict column " << dict_val.ShortDebugString(); + } + + // index info + for (size_t i = 0; i < in.schema_dict_key_list().index_info_dict_key_list_size(); ++i) { + int dict_key = in.schema_dict_key_list().index_info_dict_key_list(i); + const TabletIndexPB& dict_val = dict.index_dict().at(dict_key); + *out->mutable_tablet_schema()->add_index() = dict_val; + VLOG_DEBUG << "fill dict index " << dict_val.ShortDebugString(); + } + + // sparse column info + for (size_t i = 0; i < in.schema_dict_key_list().sparse_column_dict_key_list_size(); ++i) { + int dict_key = in.schema_dict_key_list().sparse_column_dict_key_list(i); + const ColumnPB& dict_val = dict.column_dict().at(dict_key); + *unique_id_map.at(dict_val.parent_unique_id())->add_sparse_columns() = dict_val; + VLOG_DEBUG << "fill dict sparse column" << dict_val.ShortDebugString(); + } +} + +RowsetMetaPB cloud_rowset_meta_to_doris(const RowsetMetaCloudPB& in, + const SchemaCloudDictionary* dict) { RowsetMetaPB out; - cloud_rowset_meta_to_doris(&out, in); + cloud_rowset_meta_to_doris(&out, in, dict); return out; } -RowsetMetaPB cloud_rowset_meta_to_doris(RowsetMetaCloudPB&& in) { +RowsetMetaPB cloud_rowset_meta_to_doris(RowsetMetaCloudPB&& in, const SchemaCloudDictionary* dict) { RowsetMetaPB out; - cloud_rowset_meta_to_doris(&out, std::move(in)); + cloud_rowset_meta_to_doris(&out, std::move(in), dict); return out; } -void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const RowsetMetaCloudPB& in) { +void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const RowsetMetaCloudPB& in, + const SchemaCloudDictionary* dict) { // ATTN: please keep the set order aligned with the definition of proto `TabletSchemaCloudPB`. out->set_rowset_id(in.rowset_id()); out->set_partition_id(in.partition_id()); @@ -185,6 +221,9 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const RowsetMetaCloudPB& in) if (in.has_tablet_schema()) { cloud_tablet_schema_to_doris(out->mutable_tablet_schema(), in.tablet_schema()); } + if (dict != nullptr) { + fill_schema_with_dict(in, out, *dict); + } out->set_txn_expiration(in.txn_expiration()); out->set_segments_overlap_pb(in.segments_overlap_pb()); out->mutable_segments_file_size()->CopyFrom(in.segments_file_size()); @@ -198,7 +237,8 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const RowsetMetaCloudPB& in) out->mutable_inverted_index_file_info()->CopyFrom(in.inverted_index_file_info()); } -void cloud_rowset_meta_to_doris(RowsetMetaPB* out, RowsetMetaCloudPB&& in) { +void cloud_rowset_meta_to_doris(RowsetMetaPB* out, RowsetMetaCloudPB&& in, + const SchemaCloudDictionary* dict) { // ATTN: please keep the set order aligned with the definition of proto `TabletSchemaCloudPB`. out->set_rowset_id(in.rowset_id()); out->set_partition_id(in.partition_id()); @@ -234,6 +274,9 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, RowsetMetaCloudPB&& in) { cloud_tablet_schema_to_doris(out->mutable_tablet_schema(), std::move(*in.mutable_tablet_schema())); } + if (dict != nullptr) { + fill_schema_with_dict(in, out, *dict); + } out->set_txn_expiration(in.txn_expiration()); out->set_segments_overlap_pb(in.segments_overlap_pb()); out->mutable_segments_file_size()->Swap(in.mutable_segments_file_size()); @@ -460,6 +503,7 @@ void doris_tablet_meta_to_cloud(TabletMetaCloudPB* out, const TabletMetaPB& in) if (in.has_schema_version()) { out->set_schema_version(in.schema_version()); } + out->set_storage_page_size(in.storage_page_size()); } void doris_tablet_meta_to_cloud(TabletMetaCloudPB* out, TabletMetaPB&& in) { @@ -526,6 +570,7 @@ void doris_tablet_meta_to_cloud(TabletMetaCloudPB* out, TabletMetaPB&& in) { if (in.has_schema_version()) { out->set_schema_version(in.schema_version()); } + out->set_storage_page_size(in.storage_page_size()); } TabletMetaPB cloud_tablet_meta_to_doris(const TabletMetaCloudPB& in) { @@ -601,6 +646,7 @@ void cloud_tablet_meta_to_doris(TabletMetaPB* out, const TabletMetaCloudPB& in) if (in.has_schema_version()) { out->set_schema_version(in.schema_version()); } + out->set_storage_page_size(in.storage_page_size()); } void cloud_tablet_meta_to_doris(TabletMetaPB* out, TabletMetaCloudPB&& in) { @@ -667,6 +713,7 @@ void cloud_tablet_meta_to_doris(TabletMetaPB* out, TabletMetaCloudPB&& in) { if (in.has_schema_version()) { out->set_schema_version(in.schema_version()); } + out->set_storage_page_size(in.storage_page_size()); } } // namespace doris::cloud diff --git a/be/src/cloud/pb_convert.h b/be/src/cloud/pb_convert.h index 0cfa033f2930a0f..31fe43adb11a6da 100644 --- a/be/src/cloud/pb_convert.h +++ b/be/src/cloud/pb_convert.h @@ -24,10 +24,14 @@ RowsetMetaCloudPB doris_rowset_meta_to_cloud(const RowsetMetaPB&); RowsetMetaCloudPB doris_rowset_meta_to_cloud(RowsetMetaPB&&); void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, const RowsetMetaPB& in); void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, RowsetMetaPB&& in); -RowsetMetaPB cloud_rowset_meta_to_doris(const RowsetMetaCloudPB&); -RowsetMetaPB cloud_rowset_meta_to_doris(RowsetMetaCloudPB&&); -void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const RowsetMetaCloudPB& in); -void cloud_rowset_meta_to_doris(RowsetMetaPB* out, RowsetMetaCloudPB&& in); +RowsetMetaPB cloud_rowset_meta_to_doris(const RowsetMetaCloudPB&, + const SchemaCloudDictionary* dict = nullptr); +RowsetMetaPB cloud_rowset_meta_to_doris(RowsetMetaCloudPB&&, + const SchemaCloudDictionary* dict = nullptr); +void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const RowsetMetaCloudPB& in, + const SchemaCloudDictionary* dict = nullptr); +void cloud_rowset_meta_to_doris(RowsetMetaPB* out, RowsetMetaCloudPB&& in, + const SchemaCloudDictionary* dict = nullptr); // TabletSchemaPB <=> TabletSchemaCloudPB TabletSchemaCloudPB doris_tablet_schema_to_cloud(const TabletSchemaPB&); diff --git a/be/src/clucene b/be/src/clucene index 5a458e6112b7e50..7cf6cf410d41d95 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit 5a458e6112b7e5010262594147adf22830b096e6 +Subproject commit 7cf6cf410d41d95456edba263cc55b7b6f5ab027 diff --git a/be/src/common/compile_check_begin.h b/be/src/common/compile_check_begin.h index d3b7f60439c74e5..6da403f28948857 100644 --- a/be/src/common/compile_check_begin.h +++ b/be/src/common/compile_check_begin.h @@ -15,10 +15,16 @@ // specific language governing permissions and limitations // under the License. +#ifdef COMPILE_CHECK +#error The handling of compile_check_begin.h and compile_check_end.h is not done correctly. +#endif + +#define COMPILE_CHECK #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic error "-Wconversion" #pragma clang diagnostic ignored "-Wsign-conversion" #pragma clang diagnostic ignored "-Wfloat-conversion" #endif + //#include "common/compile_check_begin.h" \ No newline at end of file diff --git a/be/src/common/compile_check_end.h b/be/src/common/compile_check_end.h index 6cba13c7f669c53..0897965dc74a3dc 100644 --- a/be/src/common/compile_check_end.h +++ b/be/src/common/compile_check_end.h @@ -18,4 +18,6 @@ #ifdef __clang__ #pragma clang diagnostic pop #endif +#undef COMPILE_CHECK + // #include "common/compile_check_end.h" \ No newline at end of file diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 425cf0f4de051f4..38f167da510118e 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -540,7 +540,6 @@ DEFINE_mInt32(streaming_load_rpc_max_alive_time_sec, "1200"); DEFINE_Int32(tablet_writer_open_rpc_timeout_sec, "60"); // You can ignore brpc error '[E1011]The server is overcrowded' when writing data. DEFINE_mBool(tablet_writer_ignore_eovercrowded, "true"); -DEFINE_mBool(exchange_sink_ignore_eovercrowded, "true"); DEFINE_mInt32(slave_replica_writer_rpc_timeout_sec, "60"); // Whether to enable stream load record function, the default is false. // False: disable stream load record @@ -905,7 +904,8 @@ DEFINE_mInt64(small_column_size_buffer, "100"); // Perform the always_true check at intervals determined by runtime_filter_sampling_frequency DEFINE_mInt32(runtime_filter_sampling_frequency, "64"); - +DEFINE_mInt32(execution_max_rpc_timeout_sec, "3600"); +DEFINE_mBool(execution_ignore_eovercrowded, "true"); // cooldown task configs DEFINE_Int32(cooldown_thread_num, "5"); DEFINE_mInt64(generate_cooldown_task_interval_sec, "20"); @@ -927,6 +927,9 @@ DEFINE_mBool(enable_query_like_bloom_filter, "true"); DEFINE_Int32(doris_remote_scanner_thread_pool_thread_num, "48"); // number of s3 scanner thread pool queue size DEFINE_Int32(doris_remote_scanner_thread_pool_queue_size, "102400"); +DEFINE_mInt64(block_cache_wait_timeout_ms, "1000"); +DEFINE_mInt64(cache_lock_long_tail_threshold, "1000"); +DEFINE_Int64(file_cache_recycle_keys_size, "1000000"); // limit the queue of pending batches which will be sent by a single nodechannel DEFINE_mInt64(nodechannel_pending_queue_max_bytes, "67108864"); @@ -981,6 +984,8 @@ DEFINE_Int32(pipeline_executor_size, "0"); DEFINE_Bool(enable_workload_group_for_scan, "false"); DEFINE_mInt64(workload_group_scan_task_wait_timeout_ms, "10000"); +// Whether use schema dict in backend side instead of MetaService side(cloud mode) +DEFINE_mBool(variant_use_cloud_schema_dict, "true"); DEFINE_mDouble(variant_ratio_of_defaults_as_sparse_column, "1"); DEFINE_mInt64(variant_threshold_rows_to_estimate_sparse_column, "2048"); DEFINE_mBool(variant_throw_exeception_on_invalid_json, "false"); @@ -1008,13 +1013,11 @@ DEFINE_Bool(enable_file_cache_query_limit, "false"); DEFINE_mInt32(file_cache_enter_disk_resource_limit_mode_percent, "90"); DEFINE_mInt32(file_cache_exit_disk_resource_limit_mode_percent, "80"); DEFINE_mBool(enable_read_cache_file_directly, "false"); -DEFINE_mBool(file_cache_enable_evict_from_other_queue_by_size, "false"); +DEFINE_mBool(file_cache_enable_evict_from_other_queue_by_size, "true"); DEFINE_mInt64(file_cache_ttl_valid_check_interval_second, "0"); // zero for not checking // If true, evict the ttl cache using LRU when full. // Otherwise, only expiration can evict ttl and new data won't add to cache when full. DEFINE_Bool(enable_ttl_cache_evict_using_lru, "true"); -// rename ttl filename to new format during read, with some performance cost -DEFINE_mBool(translate_to_new_ttl_format_during_read, "false"); DEFINE_mBool(enbale_dump_error_file, "true"); // limit the max size of error log on disk DEFINE_mInt64(file_cache_error_log_limit_bytes, "209715200"); // 200MB @@ -1041,7 +1044,7 @@ DEFINE_Int32(inverted_index_read_buffer_size, "4096"); // tree depth for bkd index DEFINE_Int32(max_depth_in_bkd_tree, "32"); // index compaction -DEFINE_mBool(inverted_index_compaction_enable, "false"); +DEFINE_mBool(inverted_index_compaction_enable, "true"); // Only for debug, do not use in production DEFINE_mBool(debug_inverted_index_compaction, "false"); // index by RAM directory @@ -1289,7 +1292,7 @@ DEFINE_Int64(num_s3_file_upload_thread_pool_min_thread, "16"); // The max thread num for S3FileUploadThreadPool DEFINE_Int64(num_s3_file_upload_thread_pool_max_thread, "64"); // The max ratio for ttl cache's size -DEFINE_mInt64(max_ttl_cache_ratio, "90"); +DEFINE_mInt64(max_ttl_cache_ratio, "50"); // The maximum jvm heap usage ratio for hdfs write workload DEFINE_mDouble(max_hdfs_wirter_jni_heap_usage_ratio, "0.5"); // The sleep milliseconds duration when hdfs write exceeds the maximum usage @@ -1354,6 +1357,10 @@ DEFINE_mInt32(check_score_rounds_num, "1000"); DEFINE_Int32(query_cache_size, "512"); +DEFINE_mBool(enable_delete_bitmap_merge_on_compaction, "false"); +// Enable validation to check the correctness of table size. +DEFINE_Bool(enable_table_size_correctness_check, "false"); + // clang-format off #ifdef BE_TEST // test s3 diff --git a/be/src/common/config.h b/be/src/common/config.h index 7073b92821ef732..ec0edba0d7d8d4d 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -587,7 +587,6 @@ DECLARE_mInt32(streaming_load_rpc_max_alive_time_sec); DECLARE_Int32(tablet_writer_open_rpc_timeout_sec); // You can ignore brpc error '[E1011]The server is overcrowded' when writing data. DECLARE_mBool(tablet_writer_ignore_eovercrowded); -DECLARE_mBool(exchange_sink_ignore_eovercrowded); DECLARE_mInt32(slave_replica_writer_rpc_timeout_sec); // Whether to enable stream load record function, the default is false. // False: disable stream load record @@ -960,6 +959,8 @@ DECLARE_mInt64(big_column_size_buffer); DECLARE_mInt64(small_column_size_buffer); DECLARE_mInt32(runtime_filter_sampling_frequency); +DECLARE_mInt32(execution_max_rpc_timeout_sec); +DECLARE_mBool(execution_ignore_eovercrowded); // cooldown task configs DECLARE_Int32(cooldown_thread_num); @@ -986,6 +987,9 @@ DECLARE_mInt64(nodechannel_pending_queue_max_bytes); // The batch size for sending data by brpc streaming client DECLARE_mInt64(brpc_streaming_client_batch_bytes); +DECLARE_mInt64(block_cache_wait_timeout_ms); +DECLARE_mInt64(cache_lock_long_tail_threshold); +DECLARE_Int64(file_cache_recycle_keys_size); DECLARE_Bool(enable_brpc_builtin_services); @@ -1061,8 +1065,6 @@ DECLARE_mInt64(file_cache_ttl_valid_check_interval_second); // If true, evict the ttl cache using LRU when full. // Otherwise, only expiration can evict ttl and new data won't add to cache when full. DECLARE_Bool(enable_ttl_cache_evict_using_lru); -// rename ttl filename to new format during read, with some performance cost -DECLARE_Bool(translate_to_new_ttl_format_during_read); DECLARE_mBool(enbale_dump_error_file); // limit the max size of error log on disk DECLARE_mInt64(file_cache_error_log_limit_bytes); @@ -1180,6 +1182,7 @@ DECLARE_mInt64(LZ4_HC_compression_level); // Threshold of a column as sparse column // Notice: TEST ONLY DECLARE_mDouble(variant_ratio_of_defaults_as_sparse_column); +DECLARE_mBool(variant_use_cloud_schema_dict); // Threshold to estimate a column is sparsed // Notice: TEST ONLY DECLARE_mInt64(variant_threshold_rows_to_estimate_sparse_column); @@ -1440,6 +1443,10 @@ DECLARE_mInt32(check_score_rounds_num); // MB DECLARE_Int32(query_cache_size); +DECLARE_mBool(enable_delete_bitmap_merge_on_compaction); +// Enable validation to check the correctness of table size. +DECLARE_Bool(enable_table_size_correctness_check); + #ifdef BE_TEST // test s3 DECLARE_String(test_s3_resource); diff --git a/be/src/common/status.h b/be/src/common/status.h index e95b93431679a25..e6c4b6871168e7b 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -478,7 +478,7 @@ class [[nodiscard]] Status { ERROR_CTOR_NOSTACK(Cancelled, CANCELLED) ERROR_CTOR(MemoryLimitExceeded, MEM_LIMIT_EXCEEDED) ERROR_CTOR(RpcError, THRIFT_RPC_ERROR) - ERROR_CTOR(TimedOut, TIMEOUT) + ERROR_CTOR_NOSTACK(TimedOut, TIMEOUT) ERROR_CTOR_NOSTACK(TooManyTasks, TOO_MANY_TASKS) ERROR_CTOR(Uninitialized, UNINITIALIZED) ERROR_CTOR(Aborted, ABORTED) diff --git a/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp b/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp index 16d5f2daba61e74..3aa0e944a822c52 100644 --- a/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp @@ -26,6 +26,9 @@ #include #include +#include "cloud/cloud_storage_engine.h" +#include "cloud/cloud_tablet.h" +#include "cloud/cloud_tablet_mgr.h" #include "cloud/config.h" #include "common/status.h" #include "olap/olap_common.h" @@ -35,6 +38,7 @@ #include "olap/tablet.h" #include "olap/tablet_manager.h" #include "runtime/define_primitive_type.h" +#include "runtime/exec_env.h" #include "runtime/runtime_state.h" #include "util/runtime_profile.h" #include "vec/common/string_ref.h" @@ -78,7 +82,19 @@ Status SchemaRowsetsScanner::start(RuntimeState* state) { Status SchemaRowsetsScanner::_get_all_rowsets() { if (config::is_cloud_mode()) { - return Status::NotSupported("SchemaRowsetsScanner::_get_all_rowsets is not implemented"); + // only query cloud tablets in lru cache instead of all tablets + std::vector> tablets = + ExecEnv::GetInstance()->storage_engine().to_cloud().tablet_mgr().get_weak_tablets(); + for (const std::weak_ptr& tablet : tablets) { + if (!tablet.expired()) { + auto t = tablet.lock(); + std::shared_lock rowset_ldlock(t->get_header_lock()); + for (const auto& it : t->rowset_map()) { + rowsets_.emplace_back(it.second); + } + } + } + return Status::OK(); } std::vector tablets = ExecEnv::GetInstance()->storage_engine().to_local().tablet_manager()->get_all_tablet(); diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 0816a1ac6986575..f1c0ad60e06455c 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -788,6 +788,7 @@ Status VOlapTablePartitionParam::replace_partitions( // add new partitions with new id. _partitions.emplace_back(part); + VLOG_NOTICE << "params add new partition " << part->id; // replace items in _partition_maps if (_is_in_partition) { diff --git a/be/src/exprs/bitmapfilter_predicate.h b/be/src/exprs/bitmapfilter_predicate.h index 376453c06817b07..5cb2b812220b10e 100644 --- a/be/src/exprs/bitmapfilter_predicate.h +++ b/be/src/exprs/bitmapfilter_predicate.h @@ -37,7 +37,7 @@ class BitmapFilterFuncBase : public RuntimeFilterFuncBase { virtual void light_copy(BitmapFilterFuncBase* other) { _not_in = other->_not_in; } virtual uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap, uint16_t* offsets, int number) = 0; - virtual void find_batch(const char* data, const uint8* nullmap, int number, + virtual void find_batch(const char* data, const uint8* nullmap, size_t number, uint8* results) const = 0; virtual size_t size() const = 0; bool is_not_in() const { return _not_in; } @@ -65,7 +65,7 @@ class BitmapFilterFunc : public BitmapFilterFuncBase { uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap, uint16_t* offsets, int number) override; - void find_batch(const char* data, const uint8* nullmap, int number, + void find_batch(const char* data, const uint8* nullmap, size_t number, uint8* results) const override; bool empty() override { return _bitmap_value->empty(); } @@ -133,9 +133,9 @@ uint16_t BitmapFilterFunc::find_fixed_len_olap_engine(const char* data, co } template -void BitmapFilterFunc::find_batch(const char* data, const uint8* nullmap, int number, +void BitmapFilterFunc::find_batch(const char* data, const uint8* nullmap, size_t number, uint8* results) const { - for (int i = 0; i < number; i++) { + for (size_t i = 0; i < number; i++) { results[i] = false; if (nullmap != nullptr && nullmap[i]) { continue; diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 84a964f5c3865ce..fb82450ac4d600d 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -1146,8 +1146,11 @@ Status IRuntimeFilter::send_filter_size(RuntimeState* state, uint64_t local_filt request->set_filter_size(local_filter_size); request->set_filter_id(_filter_id); - callback->cntl_->set_timeout_ms(std::min(3600, state->execution_timeout()) * 1000); - callback->cntl_->ignore_eovercrowded(); + + callback->cntl_->set_timeout_ms(get_execution_rpc_timeout_ms(state->execution_timeout())); + if (config::execution_ignore_eovercrowded) { + callback->cntl_->ignore_eovercrowded(); + } stub->send_filter_size(closure->cntl_.get(), closure->request_.get(), closure->response_.get(), closure.get()); @@ -1181,11 +1184,14 @@ Status IRuntimeFilter::push_to_remote(const TNetworkAddress* addr) { pfragment_instance_id->set_lo((int64_t)this); merge_filter_request->set_filter_id(_filter_id); - merge_filter_request->set_is_pipeline(true); auto column_type = _wrapper->column_type(); RETURN_IF_CATCH_EXCEPTION(merge_filter_request->set_column_type(to_proto(column_type))); - merge_filter_callback->cntl_->set_timeout_ms(wait_time_ms()); - merge_filter_callback->cntl_->ignore_eovercrowded(); + + merge_filter_callback->cntl_->set_timeout_ms( + get_execution_rpc_timeout_ms(_state->execution_timeout)); + if (config::execution_ignore_eovercrowded) { + merge_filter_callback->cntl_->ignore_eovercrowded(); + } if (get_ignored()) { merge_filter_request->set_filter_type(PFilterType::UNKNOW_FILTER); diff --git a/be/src/exprs/runtime_filter_slots.h b/be/src/exprs/runtime_filter_slots.h index cb7944409ac2d74..42c5f598633ad9f 100644 --- a/be/src/exprs/runtime_filter_slots.h +++ b/be/src/exprs/runtime_filter_slots.h @@ -77,6 +77,10 @@ class VRuntimeFilterSlots { if (filter->get_real_type() != RuntimeFilterType::IN_FILTER) { continue; } + if (!filter->need_sync_filter_size() && + filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER) { + continue; + } if (has_in_filter.contains(filter->expr_order())) { filter->set_ignored(); continue; @@ -84,7 +88,7 @@ class VRuntimeFilterSlots { has_in_filter.insert(filter->expr_order()); } - // process ignore filter when it has IN_FILTER on same expr, and init bloom filter size + // process ignore filter when it has IN_FILTER on same expr for (auto filter : _runtime_filters) { if (filter->get_ignored()) { continue; diff --git a/be/src/http/action/adjust_log_level.cpp b/be/src/http/action/adjust_log_level.cpp index 687639a9b58deaa..a8644a0fb5f52a3 100644 --- a/be/src/http/action/adjust_log_level.cpp +++ b/be/src/http/action/adjust_log_level.cpp @@ -17,8 +17,9 @@ #include +#include + #include "common/logging.h" -#include "common/status.h" #include "http/http_channel.h" #include "http/http_request.h" @@ -26,7 +27,7 @@ namespace doris { // **Note**: If the module_name does not exist in the vlog modules, vlog // would create corresponding module for it. -int handle_request(HttpRequest* req) { +std::tuple handle_request(HttpRequest* req) { auto parse_param = [&req](std::string param) { const auto& value = req->param(param); if (value.empty()) { @@ -38,13 +39,16 @@ int handle_request(HttpRequest* req) { const auto& module = parse_param("module"); const auto& level = parse_param("level"); int new_level = std::stoi(level); - return google::SetVLOGLevel(module.c_str(), new_level); + return std::make_tuple(module, google::SetVLOGLevel(module.c_str(), new_level), new_level); } void AdjustLogLevelAction::handle(HttpRequest* req) { try { - auto old_level = handle_request(req); - auto msg = fmt::format("adjust log level success, origin level is {}", old_level); + auto handle_result = handle_request(req); + auto msg = + fmt::format("adjust vlog of {} from {} to {} succeed", std::get<0>(handle_result), + std::get<1>(handle_result), std::get<2>(handle_result)); + LOG(INFO) << msg; HttpChannel::send_reply(req, msg); } catch (const std::exception& e) { HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, e.what()); diff --git a/be/src/http/action/file_cache_action.cpp b/be/src/http/action/file_cache_action.cpp index f31c040c5cf6727..740bac46edf2a7f 100644 --- a/be/src/http/action/file_cache_action.cpp +++ b/be/src/http/action/file_cache_action.cpp @@ -17,10 +17,15 @@ #include "file_cache_action.h" +#include + +#include #include #include #include #include +#include +#include #include "common/status.h" #include "http/http_channel.h" @@ -30,6 +35,7 @@ #include "io/cache/block_file_cache.h" #include "io/cache/block_file_cache_factory.h" #include "io/cache/file_cache_common.h" +#include "io/cache/fs_file_cache_storage.h" #include "olap/olap_define.h" #include "olap/tablet_meta.h" #include "util/easy_json.h" @@ -43,6 +49,7 @@ constexpr static std::string_view PATH = "path"; constexpr static std::string_view CLEAR = "clear"; constexpr static std::string_view RESET = "reset"; constexpr static std::string_view HASH = "hash"; +constexpr static std::string_view LIST_CACHE = "list_cache"; constexpr static std::string_view CAPACITY = "capacity"; constexpr static std::string_view RELEASE = "release"; constexpr static std::string_view BASE_PATH = "base_path"; @@ -66,7 +73,14 @@ Status FileCacheAction::_handle_header(HttpRequest* req, std::string* json_metri *json_metrics = json.ToString(); } else if (operation == CLEAR) { const std::string& sync = req->param(SYNC.data()); - auto ret = io::FileCacheFactory::instance()->clear_file_caches(to_lower(sync) == "true"); + const std::string& segment_path = req->param(VALUE.data()); + if (segment_path.empty()) { + io::FileCacheFactory::instance()->clear_file_caches(to_lower(sync) == "true"); + } else { + io::UInt128Wrapper hash = io::BlockFileCache::hash(segment_path); + io::BlockFileCache* cache = io::FileCacheFactory::instance()->get_by_path(hash); + cache->remove_if_cached(hash); + } } else if (operation == RESET) { std::string capacity = req->param(CAPACITY.data()); int64_t new_capacity = 0; @@ -96,6 +110,23 @@ Status FileCacheAction::_handle_header(HttpRequest* req, std::string* json_metri json[HASH.data()] = ret.to_string(); *json_metrics = json.ToString(); } + } else if (operation == LIST_CACHE) { + const std::string& segment_path = req->param(VALUE.data()); + if (segment_path.empty()) { + st = Status::InvalidArgument("missing parameter: {} is required", VALUE.data()); + } else { + io::UInt128Wrapper cache_hash = io::BlockFileCache::hash(segment_path); + std::vector cache_files = + io::FileCacheFactory::instance()->get_cache_file_by_path(cache_hash); + if (cache_files.empty()) { + *json_metrics = "[]"; + } else { + EasyJson json; + std::for_each(cache_files.begin(), cache_files.end(), + [&json](auto& x) { json.PushBack(x); }); + *json_metrics = json.ToString(); + } + } } else { st = Status::InternalError("invalid operation: {}", operation); } diff --git a/be/src/http/action/jeprofile_actions.cpp b/be/src/http/action/jeprofile_actions.cpp index f805d61d5b0b876..47399c575a3f6d5 100644 --- a/be/src/http/action/jeprofile_actions.cpp +++ b/be/src/http/action/jeprofile_actions.cpp @@ -18,69 +18,101 @@ #include "http/action/jeprofile_actions.h" #include -#include #include -#include -#include -#include -#include #include -#include "common/config.h" -#include "common/object_pool.h" #include "http/ev_http_server.h" #include "http/http_channel.h" #include "http/http_handler.h" #include "http/http_handler_with_auth.h" -#include "http/http_method.h" -#include "io/fs/local_file_system.h" +#include "http/http_headers.h" +#include "http/http_request.h" +#include "runtime/memory/heap_profiler.h" namespace doris { -class HttpRequest; -static std::mutex kJeprofileActionMutex; -class JeHeapAction : public HttpHandlerWithAuth { -public: - JeHeapAction(ExecEnv* exec_env) : HttpHandlerWithAuth(exec_env) {} - virtual ~JeHeapAction() = default; +const static std::string HEADER_JSON = "application/json"; - virtual void handle(HttpRequest* req) override; -}; - -void JeHeapAction::handle(HttpRequest* req) { - std::lock_guard lock(kJeprofileActionMutex); -#ifndef USE_JEMALLOC - std::string str = "jemalloc heap dump is not available without setting USE_JEMALLOC"; - HttpChannel::send_reply(req, str); +static bool compile_check(HttpRequest* req) { +#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER) + HttpChannel::send_reply( + req, HttpStatus::INTERNAL_SERVER_ERROR, + "Jemalloc heap dump is not available with ASAN(address sanitizer) builds.\n"); + return false; +#elif !defined(USE_JEMALLOC) + HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, + "jemalloc heap dump is not available without setting USE_JEMALLOC.\n"); + return false; #else - std::stringstream tmp_jeprof_file_name; - std::time_t now = std::time(nullptr); - // Build a temporary file name that is hopefully unique. - tmp_jeprof_file_name << config::jeprofile_dir << "/jeheap_dump." << now << "." << getpid() - << "." << rand() << ".heap"; - const std::string& tmp_file_name_str = tmp_jeprof_file_name.str(); - const char* file_name_ptr = tmp_file_name_str.c_str(); - int result = jemallctl("prof.dump", nullptr, nullptr, &file_name_ptr, sizeof(const char*)); - std::stringstream response; - if (result == 0) { - response << "Jemalloc heap dump success, dump file path: " << tmp_jeprof_file_name.str() - << "\n"; - } else { - response << "Jemalloc heap dump failed, je_mallctl return: " << result << "\n"; - } - HttpChannel::send_reply(req, response.str()); + return true; #endif } -Status JeprofileActions::setup(doris::ExecEnv* exec_env, doris::EvHttpServer* http_server, - doris::ObjectPool& pool) { - if (!config::jeprofile_dir.empty()) { - RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(config::jeprofile_dir)); +void SetJeHeapProfileActiveActions::handle(HttpRequest* req) { + req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.c_str()); + if (compile_check(req)) { + if (req->param("prof_value") == "true") { + HeapProfiler::instance()->heap_profiler_start(); + HttpChannel::send_reply( + req, HttpStatus::OK, + "heap profiler started\nJemalloc will only track and sample the memory " + "allocated and freed after the heap profiler started, it cannot analyze the " + "memory allocated and freed before. Therefore, dumping the heap profile " + "immediately after start heap profiler may prompt `No nodes to print`. If you " + "want to analyze the memory that has been allocated in the past, you can only " + "restart the BE process and start heap profiler immediately.\n"); + } else { + HeapProfiler::instance()->heap_profiler_stop(); + HttpChannel::send_reply(req, HttpStatus::OK, "heap profiler stoped\n"); + } + } +} + +void DumpJeHeapProfileToDotActions::handle(HttpRequest* req) { + req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.c_str()); + if (compile_check(req)) { + if (!HeapProfiler::instance()->check_heap_profiler()) { + HttpChannel::send_reply( + req, HttpStatus::INTERNAL_SERVER_ERROR, + "`curl http://be_host:be_webport/jeheap/prof/true` to start heap profiler\n"); + } + std::string dot = HeapProfiler::instance()->dump_heap_profile_to_dot(); + if (dot.empty()) { + HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, + "dump heap profile to dot failed, see be.INFO\n"); + } else { + dot += "\n-------------------------------------------------------\n"; + dot += "Copy the text after `digraph` in the above output to " + "http://www.webgraphviz.com to generate a dot graph.\n" + "after start heap profiler, if there is no operation, will print `No nodes to " + "print`." + "If there are many errors: `addr2line: Dwarf Error`," + "or other FAQ, reference doc: " + "https://doris.apache.org/community/developer-guide/debug-tool/#4-qa\n"; + HttpChannel::send_reply(req, HttpStatus::OK, dot); + } + } +} + +void DumpJeHeapProfileActions::handle(HttpRequest* req) { + req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.c_str()); + if (compile_check(req)) { + if (!HeapProfiler::instance()->check_heap_profiler()) { + HttpChannel::send_reply( + req, HttpStatus::INTERNAL_SERVER_ERROR, + "`curl http://be_host:be_webport/jeheap/prof/true` to start heap profiler\n"); + } + std::string profile_file_name = HeapProfiler::instance()->dump_heap_profile(); + if (profile_file_name.empty()) { + HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, + "jemalloc heap dump failed\n"); + } else { + HttpChannel::send_reply(req, HttpStatus::OK, + fmt::format("jemalloc heap dump success, dump file path: {}\n", + profile_file_name)); + } } - http_server->register_handler(HttpMethod::GET, "/jeheap/dump", - pool.add(new JeHeapAction(exec_env))); - return Status::OK(); } } // namespace doris diff --git a/be/src/http/action/jeprofile_actions.h b/be/src/http/action/jeprofile_actions.h index 2ebeb3c9ffdc926..f1336ac4691d57a 100644 --- a/be/src/http/action/jeprofile_actions.h +++ b/be/src/http/action/jeprofile_actions.h @@ -15,17 +15,35 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_JEPROFILE_ACTIONS_H -#define DORIS_JEPROFILE_ACTIONS_H -#include "common/status.h" +#pragma once + +#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" + namespace doris { -class EvHttpServer; + +class HttpRequest; class ExecEnv; -class ObjectPool; -class JeprofileActions { + +class SetJeHeapProfileActiveActions final : public HttpHandlerWithAuth { +public: + SetJeHeapProfileActiveActions(ExecEnv* exec_env) : HttpHandlerWithAuth(exec_env) {} + ~SetJeHeapProfileActiveActions() override = default; + void handle(HttpRequest* req) override; +}; + +class DumpJeHeapProfileToDotActions final : public HttpHandlerWithAuth { +public: + DumpJeHeapProfileToDotActions(ExecEnv* exec_env) : HttpHandlerWithAuth(exec_env) {} + ~DumpJeHeapProfileToDotActions() override = default; + void handle(HttpRequest* req) override; +}; + +class DumpJeHeapProfileActions final : public HttpHandlerWithAuth { public: - static Status setup(ExecEnv* exec_env, EvHttpServer* http_server, ObjectPool& pool); + DumpJeHeapProfileActions(ExecEnv* exec_env) : HttpHandlerWithAuth(exec_env) {} + ~DumpJeHeapProfileActions() override = default; + void handle(HttpRequest* req) override; }; } // namespace doris -#endif //DORIS_JEPROFILE_ACTIONS_H diff --git a/be/src/http/action/tablets_info_action.cpp b/be/src/http/action/tablets_info_action.cpp index 9c27c1de9a02b35..672b03ce6ceaedb 100644 --- a/be/src/http/action/tablets_info_action.cpp +++ b/be/src/http/action/tablets_info_action.cpp @@ -24,6 +24,8 @@ #include #include +#include "cloud/cloud_storage_engine.h" +#include "cloud/cloud_tablet_mgr.h" #include "cloud/config.h" #include "http/http_channel.h" #include "http/http_headers.h" @@ -51,12 +53,6 @@ void TabletsInfoAction::handle(HttpRequest* req) { EasyJson TabletsInfoAction::get_tablets_info(string tablet_num_to_return) { EasyJson tablets_info_ej; - if (config::is_cloud_mode()) { - // TODO(plat1ko): CloudStorageEngine - tablets_info_ej["msg"] = "TabletsInfoAction::get_tablets_info is not implemented"; - tablets_info_ej["code"] = 0; - return tablets_info_ej; - } int64_t number; std::string msg; @@ -74,9 +70,15 @@ EasyJson TabletsInfoAction::get_tablets_info(string tablet_num_to_return) { msg = "Parameter Error"; } std::vector tablets_info; - TabletManager* tablet_manager = - ExecEnv::GetInstance()->storage_engine().to_local().tablet_manager(); - tablet_manager->obtain_specific_quantity_tablets(tablets_info, number); + if (!config::is_cloud_mode()) { + TabletManager* tablet_manager = + ExecEnv::GetInstance()->storage_engine().to_local().tablet_manager(); + tablet_manager->obtain_specific_quantity_tablets(tablets_info, number); + } else { + CloudTabletMgr& cloud_tablet_manager = + ExecEnv::GetInstance()->storage_engine().to_cloud().tablet_mgr(); + cloud_tablet_manager.get_tablet_info(number, &tablets_info); + } tablets_info_ej["msg"] = msg; tablets_info_ej["code"] = 0; diff --git a/be/src/io/cache/block_file_cache.cpp b/be/src/io/cache/block_file_cache.cpp index cd502d16547f9b5..4fb3f3e02cb58c5 100644 --- a/be/src/io/cache/block_file_cache.cpp +++ b/be/src/io/cache/block_file_cache.cpp @@ -54,6 +54,8 @@ BlockFileCache::BlockFileCache(const std::string& cache_base_path, _max_query_cache_size(cache_settings.max_query_cache_size) { _cur_cache_size_metrics = std::make_shared>(_cache_base_path.c_str(), "file_cache_cache_size", 0); + _cache_capacity_metrics = std::make_shared>( + _cache_base_path.c_str(), "file_cache_capacity", _capacity); _cur_ttl_cache_size_metrics = std::make_shared>( _cache_base_path.c_str(), "file_cache_ttl_cache_size", 0); _cur_normal_queue_element_count_metrics = std::make_shared>( @@ -84,6 +86,94 @@ BlockFileCache::BlockFileCache(const std::string& cache_base_path, _total_evict_size_metrics = std::make_shared>( _cache_base_path.c_str(), "file_cache_total_evict_size"); + _evict_by_heat_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::NORMAL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_disposable_to_normal"); + _evict_by_heat_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::INDEX] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_disposable_to_index"); + _evict_by_heat_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::TTL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_disposable_to_ttl"); + _evict_by_heat_metrics_matrix[FileCacheType::NORMAL][FileCacheType::DISPOSABLE] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_normal_to_disposable"); + _evict_by_heat_metrics_matrix[FileCacheType::NORMAL][FileCacheType::INDEX] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_normal_to_index"); + _evict_by_heat_metrics_matrix[FileCacheType::NORMAL][FileCacheType::TTL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_normal_to_ttl"); + _evict_by_heat_metrics_matrix[FileCacheType::INDEX][FileCacheType::DISPOSABLE] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_index_to_disposable"); + _evict_by_heat_metrics_matrix[FileCacheType::INDEX][FileCacheType::NORMAL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_index_to_normal"); + _evict_by_heat_metrics_matrix[FileCacheType::INDEX][FileCacheType::TTL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_index_to_ttl"); + _evict_by_heat_metrics_matrix[FileCacheType::TTL][FileCacheType::DISPOSABLE] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_ttl_to_disposable"); + _evict_by_heat_metrics_matrix[FileCacheType::TTL][FileCacheType::NORMAL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_ttl_to_normal"); + _evict_by_heat_metrics_matrix[FileCacheType::TTL][FileCacheType::INDEX] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_heat_ttl_to_index"); + + _evict_by_self_lru_metrics_matrix[FileCacheType::DISPOSABLE] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_self_lru_disposable"); + _evict_by_self_lru_metrics_matrix[FileCacheType::NORMAL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_self_lru_normal"); + _evict_by_self_lru_metrics_matrix[FileCacheType::INDEX] = std::make_shared>( + _cache_base_path.c_str(), "file_cache_evict_by_self_lru_index"); + _evict_by_self_lru_metrics_matrix[FileCacheType::TTL] = std::make_shared>( + _cache_base_path.c_str(), "file_cache_evict_by_self_lru_ttl"); + + _evict_by_size_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::NORMAL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_disposable_to_normal"); + _evict_by_size_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::INDEX] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_disposable_to_index"); + _evict_by_size_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::TTL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_disposable_to_ttl"); + _evict_by_size_metrics_matrix[FileCacheType::NORMAL][FileCacheType::DISPOSABLE] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_normal_to_disposable"); + _evict_by_size_metrics_matrix[FileCacheType::NORMAL][FileCacheType::INDEX] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_normal_to_index"); + _evict_by_size_metrics_matrix[FileCacheType::NORMAL][FileCacheType::TTL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_normal_to_ttl"); + _evict_by_size_metrics_matrix[FileCacheType::INDEX][FileCacheType::DISPOSABLE] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_index_to_disposable"); + _evict_by_size_metrics_matrix[FileCacheType::INDEX][FileCacheType::NORMAL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_index_to_normal"); + _evict_by_size_metrics_matrix[FileCacheType::INDEX][FileCacheType::TTL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_index_to_ttl"); + _evict_by_size_metrics_matrix[FileCacheType::TTL][FileCacheType::DISPOSABLE] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_ttl_to_disposable"); + _evict_by_size_metrics_matrix[FileCacheType::TTL][FileCacheType::NORMAL] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_ttl_to_normal"); + _evict_by_size_metrics_matrix[FileCacheType::TTL][FileCacheType::INDEX] = + std::make_shared>(_cache_base_path.c_str(), + "file_cache_evict_by_size_ttl_to_index"); + + _evict_by_try_release = std::make_shared>( + _cache_base_path.c_str(), "file_cache_evict_by_try_release"); + _num_read_blocks = std::make_shared>(_cache_base_path.c_str(), "file_cache_num_read_blocks"); _num_hit_blocks = std::make_shared>(_cache_base_path.c_str(), @@ -107,6 +197,8 @@ BlockFileCache::BlockFileCache(const std::string& cache_base_path, "file_cache_hit_ratio_5m", 0.0); _hit_ratio_1h = std::make_shared>(_cache_base_path.c_str(), "file_cache_hit_ratio_1h", 0.0); + _disk_limit_mode_metrics = + std::make_shared>(_cache_base_path.c_str(), "disk_limit_mode", 0); _disposable_queue = LRUQueue(cache_settings.disposable_queue_size, cache_settings.disposable_queue_elements, 60 * 60); @@ -114,9 +206,11 @@ BlockFileCache::BlockFileCache(const std::string& cache_base_path, 7 * 24 * 60 * 60); _normal_queue = LRUQueue(cache_settings.query_queue_size, cache_settings.query_queue_elements, 24 * 60 * 60); - _ttl_queue = LRUQueue(std::numeric_limits::max(), std::numeric_limits::max(), + _ttl_queue = LRUQueue(cache_settings.ttl_queue_size, cache_settings.ttl_queue_elements, std::numeric_limits::max()); + _recycle_keys = std::make_shared>( + config::file_cache_recycle_keys_size); if (cache_settings.storage == "memory") { _storage = std::make_unique(); _cache_base_path = "memory"; @@ -161,8 +255,7 @@ FileCacheType BlockFileCache::string_to_cache_type(const std::string& str) { BlockFileCache::QueryFileCacheContextHolderPtr BlockFileCache::get_query_context_holder( const TUniqueId& query_id) { - std::lock_guard cache_lock(_mutex); - + SCOPED_CACHE_LOCK(_mutex); if (!config::enable_file_cache_query_limit) { return {}; } @@ -180,7 +273,7 @@ BlockFileCache::QueryFileCacheContextPtr BlockFileCache::get_query_context( } void BlockFileCache::remove_query_context(const TUniqueId& query_id) { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); const auto& query_iter = _query_map.find(query_id); if (query_iter != _query_map.end() && query_iter->second.use_count() <= 1) { @@ -225,7 +318,7 @@ void BlockFileCache::QueryFileCacheContext::reserve(const UInt128Wrapper& hash, } Status BlockFileCache::initialize() { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); return initialize_unlocked(cache_lock); } @@ -314,14 +407,10 @@ FileBlocks BlockFileCache::get_impl(const UInt128Wrapper& hash, const CacheConte if (st.ok()) { auto& queue = get_queue(origin_type); queue.remove(cell.queue_iterator.value(), cache_lock); - if (config::enable_ttl_cache_evict_using_lru) { - auto& ttl_queue = get_queue(FileCacheType::TTL); - cell.queue_iterator = ttl_queue.add( - cell.file_block->get_hash_value(), cell.file_block->offset(), - cell.file_block->range().size(), cache_lock); - } else { - cell.queue_iterator.reset(); - } + auto& ttl_queue = get_queue(FileCacheType::TTL); + cell.queue_iterator = + ttl_queue.add(cell.file_block->get_hash_value(), cell.file_block->offset(), + cell.file_block->range().size(), cache_lock); } else { LOG_WARNING("Failed to change key meta").error(st); } @@ -436,7 +525,7 @@ std::string BlockFileCache::clear_file_cache_async() { int64_t num_cells_to_delete = 0; int64_t num_files_all = 0; { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); if (!_async_clear_file_cache) { for (auto& [_, offset_to_cell] : _files) { ++num_files_all; @@ -672,7 +761,7 @@ FileBlocksHolder BlockFileCache::get_or_set(const UInt128Wrapper& hash, size_t o CacheContext& context) { FileBlock::Range range(offset, offset + size - 1); - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); if (auto iter = _key_to_time.find(hash); context.cache_type == FileCacheType::INDEX && iter != _key_to_time.end()) { context.cache_type = FileCacheType::TTL; @@ -731,11 +820,10 @@ BlockFileCache::FileBlockCell* BlockFileCache::add_cell(const UInt128Wrapper& ha << " cache_type=" << cache_type_to_string(context.cache_type) << " error=" << st.msg(); } - if (cell.file_block->cache_type() != FileCacheType::TTL || - config::enable_ttl_cache_evict_using_lru) { - auto& queue = get_queue(cell.file_block->cache_type()); - cell.queue_iterator = queue.add(hash, offset, size, cache_lock); - } + + auto& queue = get_queue(cell.file_block->cache_type()); + cell.queue_iterator = queue.add(hash, offset, size, cache_lock); + if (cell.file_block->cache_type() == FileCacheType::TTL) { if (_key_to_time.find(hash) == _key_to_time.end()) { _key_to_time[hash] = context.expiration_time; @@ -749,7 +837,7 @@ BlockFileCache::FileBlockCell* BlockFileCache::add_cell(const UInt128Wrapper& ha } size_t BlockFileCache::try_release() { - std::lock_guard l(_mutex); + SCOPED_CACHE_LOCK(_mutex); std::vector trash; for (auto& [hash, blocks] : _files) { for (auto& [offset, cell] : blocks) { @@ -758,11 +846,14 @@ size_t BlockFileCache::try_release() { } } } + size_t remove_size = 0; for (auto& cell : trash) { FileBlockSPtr file_block = cell->file_block; std::lock_guard lc(cell->file_block->_mutex); - remove(file_block, l, lc); + remove_size += file_block->range().size(); + remove(file_block, cache_lock, lc); } + *_evict_by_try_release << remove_size; LOG(INFO) << "Released " << trash.size() << " blocks in file cache " << _cache_base_path; return trash.size(); } @@ -811,6 +902,18 @@ void BlockFileCache::remove_file_blocks(std::vector& to_evict, std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if); } +void BlockFileCache::remove_file_blocks_async(std::vector& to_evict, + std::lock_guard& cache_lock) { + auto remove_file_block_if = [&](FileBlockCell* cell) { + FileBlockSPtr file_block = cell->file_block; + if (file_block) { + std::lock_guard block_lock(file_block->_mutex); + remove(file_block, cache_lock, block_lock, /*sync*/ false); + } + }; + std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if); +} + void BlockFileCache::remove_file_blocks_and_clean_time_maps( std::vector& to_evict, std::lock_guard& cache_lock) { auto remove_file_block_and_clean_time_maps_if = [&](FileBlockCell* cell) { @@ -841,9 +944,10 @@ void BlockFileCache::remove_file_blocks_and_clean_time_maps( void BlockFileCache::find_evict_candidates(LRUQueue& queue, size_t size, size_t cur_cache_size, size_t& removed_size, std::vector& to_evict, - std::lock_guard& cache_lock, bool is_ttl) { + std::lock_guard& cache_lock, + size_t& cur_removed_size) { for (const auto& [entry_key, entry_offset, entry_size] : queue) { - if (!is_overflow(removed_size, size, cur_cache_size, is_ttl)) { + if (!is_overflow(removed_size, size, cur_cache_size)) { break; } auto* cell = get_cell(entry_key, entry_offset, cache_lock); @@ -861,6 +965,7 @@ void BlockFileCache::find_evict_candidates(LRUQueue& queue, size_t size, size_t DCHECK(file_block->_download_state == FileBlock::State::DOWNLOADED); to_evict.push_back(cell); removed_size += cell_size; + cur_removed_size += cell_size; } } } @@ -886,8 +991,9 @@ bool BlockFileCache::try_reserve_for_ttl_without_lru(size_t size, } std::vector to_evict; auto collect_eliminate_fragments = [&](LRUQueue& queue) { + size_t cur_removed_size = 0; find_evict_candidates(queue, size, cur_cache_size, removed_size, to_evict, cache_lock, - false); + cur_removed_size); }; if (disposable_queue_size != 0) { collect_eliminate_fragments(get_queue(FileCacheType::DISPOSABLE)); @@ -914,8 +1020,9 @@ bool BlockFileCache::try_reserve_for_ttl(size_t size, std::lock_guard to_evict; + size_t cur_removed_size = 0; find_evict_candidates(queue, size, cur_cache_size, removed_size, to_evict, cache_lock, - true); + cur_removed_size); remove_file_blocks_and_clean_time_maps(to_evict, cache_lock); return !is_overflow(removed_size, size, cur_cache_size); @@ -948,10 +1055,6 @@ bool BlockFileCache::try_reserve(const UInt128Wrapper& hash, const CacheContext& size = 5 * size; } - if (context.cache_type == FileCacheType::TTL) { - return try_reserve_for_ttl(size, cache_lock); - } - auto query_context = config::enable_file_cache_query_limit && (context.query_id.hi != 0 || context.query_id.lo != 0) ? get_query_context(context.query_id, cache_lock) @@ -1096,7 +1199,7 @@ bool BlockFileCache::remove_if_ttl_file_unlock(const UInt128Wrapper& file_key, b } void BlockFileCache::remove_if_cached(const UInt128Wrapper& file_key) { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); bool is_ttl_file = remove_if_ttl_file_unlock(file_key, true, cache_lock); if (!is_ttl_file) { auto iter = _files.find(file_key); @@ -1112,12 +1215,50 @@ void BlockFileCache::remove_if_cached(const UInt128Wrapper& file_key) { } } -std::vector BlockFileCache::get_other_cache_type(FileCacheType cur_cache_type) { +void BlockFileCache::remove_if_cached_async(const UInt128Wrapper& file_key) { + SCOPED_CACHE_LOCK(_mutex); + bool is_ttl_file = remove_if_ttl_file_unlock(file_key, true, cache_lock); + if (!is_ttl_file) { + auto iter = _files.find(file_key); + std::vector to_remove; + if (iter != _files.end()) { + for (auto& [_, cell] : iter->second) { + if (cell.releasable()) { + to_remove.push_back(&cell); + } + } + } + remove_file_blocks_async(to_remove, cache_lock); + } +} + +std::vector BlockFileCache::get_other_cache_type_without_ttl( + FileCacheType cur_cache_type) { switch (cur_cache_type) { + case FileCacheType::TTL: + return {FileCacheType::DISPOSABLE, FileCacheType::NORMAL, FileCacheType::INDEX}; case FileCacheType::INDEX: return {FileCacheType::DISPOSABLE, FileCacheType::NORMAL}; case FileCacheType::NORMAL: return {FileCacheType::DISPOSABLE, FileCacheType::INDEX}; + case FileCacheType::DISPOSABLE: + return {FileCacheType::NORMAL, FileCacheType::INDEX}; + default: + return {}; + } + return {}; +} + +std::vector BlockFileCache::get_other_cache_type(FileCacheType cur_cache_type) { + switch (cur_cache_type) { + case FileCacheType::TTL: + return {FileCacheType::DISPOSABLE, FileCacheType::NORMAL, FileCacheType::INDEX}; + case FileCacheType::INDEX: + return {FileCacheType::DISPOSABLE, FileCacheType::NORMAL, FileCacheType::TTL}; + case FileCacheType::NORMAL: + return {FileCacheType::DISPOSABLE, FileCacheType::INDEX, FileCacheType::TTL}; + case FileCacheType::DISPOSABLE: + return {FileCacheType::NORMAL, FileCacheType::INDEX, FileCacheType::TTL}; default: return {}; } @@ -1143,13 +1284,14 @@ void BlockFileCache::reset_range(const UInt128Wrapper& hash, size_t offset, size } bool BlockFileCache::try_reserve_from_other_queue_by_hot_interval( - std::vector other_cache_types, size_t size, int64_t cur_time, - std::lock_guard& cache_lock) { + FileCacheType cur_type, std::vector other_cache_types, size_t size, + int64_t cur_time, std::lock_guard& cache_lock) { size_t removed_size = 0; size_t cur_cache_size = _cur_cache_size; std::vector to_evict; for (FileCacheType cache_type : other_cache_types) { auto& queue = get_queue(cache_type); + size_t remove_size_per_type = 0; for (const auto& [entry_key, entry_offset, entry_size] : queue) { if (!is_overflow(removed_size, size, cur_cache_size)) { break; @@ -1171,39 +1313,48 @@ bool BlockFileCache::try_reserve_from_other_queue_by_hot_interval( DCHECK(file_block->_download_state == FileBlock::State::DOWNLOADED); to_evict.push_back(cell); removed_size += cell_size; + remove_size_per_type += cell_size; } } + *(_evict_by_heat_metrics_matrix[cache_type][cur_type]) << remove_size_per_type; } remove_file_blocks(to_evict, cache_lock); return !is_overflow(removed_size, size, cur_cache_size); } -bool BlockFileCache::is_overflow(size_t removed_size, size_t need_size, size_t cur_cache_size, - bool is_ttl) const { +bool BlockFileCache::is_overflow(size_t removed_size, size_t need_size, + size_t cur_cache_size) const { bool ret = false; if (_disk_resource_limit_mode) { ret = (removed_size < need_size); } else { ret = (cur_cache_size + need_size - removed_size > _capacity); } - if (is_ttl) { - size_t ttl_threshold = config::max_ttl_cache_ratio * _capacity / 100; - return (ret || ((cur_cache_size + need_size - removed_size) > ttl_threshold)); - } return ret; } bool BlockFileCache::try_reserve_from_other_queue_by_size( - std::vector other_cache_types, size_t size, + FileCacheType cur_type, std::vector other_cache_types, size_t size, std::lock_guard& cache_lock) { size_t removed_size = 0; size_t cur_cache_size = _cur_cache_size; std::vector to_evict; + // we follow the privilege defined in get_other_cache_types to evict for (FileCacheType cache_type : other_cache_types) { auto& queue = get_queue(cache_type); + + // we will not drain each of them to the bottom -- i.e., we only + // evict what they have stolen. + size_t cur_queue_size = queue.get_capacity(cache_lock); + size_t cur_queue_max_size = queue.get_max_size(); + if (cur_queue_size <= cur_queue_max_size) { + continue; + } + size_t cur_removed_size = 0; find_evict_candidates(queue, size, cur_cache_size, removed_size, to_evict, cache_lock, - false); + cur_removed_size); + *(_evict_by_size_metrics_matrix[cache_type][cur_type]) << cur_removed_size; } remove_file_blocks(to_evict, cache_lock); return !is_overflow(removed_size, size, cur_cache_size); @@ -1212,16 +1363,15 @@ bool BlockFileCache::try_reserve_from_other_queue_by_size( bool BlockFileCache::try_reserve_from_other_queue(FileCacheType cur_cache_type, size_t size, int64_t cur_time, std::lock_guard& cache_lock) { - // disposable queue cannot reserve other queues - if (cur_cache_type == FileCacheType::DISPOSABLE) { - return false; - } - auto other_cache_types = get_other_cache_type(cur_cache_type); - bool reserve_success = try_reserve_from_other_queue_by_hot_interval(other_cache_types, size, - cur_time, cache_lock); + // currently, TTL cache is not considered as a candidate + auto other_cache_types = get_other_cache_type_without_ttl(cur_cache_type); + bool reserve_success = try_reserve_from_other_queue_by_hot_interval( + cur_cache_type, other_cache_types, size, cur_time, cache_lock); if (reserve_success || !config::file_cache_enable_evict_from_other_queue_by_size) { return reserve_success; } + + other_cache_types = get_other_cache_type(cur_cache_type); auto& cur_queue = get_queue(cur_cache_type); size_t cur_queue_size = cur_queue.get_capacity(cache_lock); size_t cur_queue_max_size = cur_queue.get_max_size(); @@ -1229,7 +1379,8 @@ bool BlockFileCache::try_reserve_from_other_queue(FileCacheType cur_cache_type, if (_cur_cache_size + size > _capacity && cur_queue_size + size > cur_queue_max_size) { return false; } - return try_reserve_from_other_queue_by_size(other_cache_types, size, cache_lock); + return try_reserve_from_other_queue_by_size(cur_cache_type, other_cache_types, size, + cache_lock); } bool BlockFileCache::try_reserve_for_lru(const UInt128Wrapper& hash, @@ -1245,9 +1396,11 @@ bool BlockFileCache::try_reserve_for_lru(const UInt128Wrapper& hash, size_t cur_cache_size = _cur_cache_size; std::vector to_evict; + size_t cur_removed_size = 0; find_evict_candidates(queue, size, cur_cache_size, removed_size, to_evict, cache_lock, - false); + cur_removed_size); remove_file_blocks(to_evict, cache_lock); + *(_evict_by_self_lru_metrics_matrix[context.cache_type]) << cur_removed_size; if (is_overflow(removed_size, size, cur_cache_size)) { return false; @@ -1262,7 +1415,7 @@ bool BlockFileCache::try_reserve_for_lru(const UInt128Wrapper& hash, template requires IsXLock && IsXLock -void BlockFileCache::remove(FileBlockSPtr file_block, T& cache_lock, U& block_lock) { +void BlockFileCache::remove(FileBlockSPtr file_block, T& cache_lock, U& block_lock, bool sync) { auto hash = file_block->get_hash_value(); auto offset = file_block->offset(); auto type = file_block->cache_type(); @@ -1282,9 +1435,24 @@ void BlockFileCache::remove(FileBlockSPtr file_block, T& cache_lock, U& block_lo key.offset = offset; key.meta.type = type; key.meta.expiration_time = expiration_time; - Status st = _storage->remove(key); - if (!st.ok()) { - LOG_WARNING("").error(st); + if (sync) { + Status st = _storage->remove(key); + if (!st.ok()) { + LOG_WARNING("").error(st); + } + } else { + // the file will be deleted in the bottom half + // so there will be a window that the file is not in the cache but still in the storage + // but it's ok, because the rowset is stale already + // in case something unexpected happen, set the _recycle_keys queue to zero to fallback + bool ret = _recycle_keys->push(key); + if (!ret) { + LOG_WARNING("Failed to push recycle key to queue, do it synchronously"); + Status st = _storage->remove(key); + if (!st.ok()) { + LOG_WARNING("").error(st); + } + } } } _cur_cache_size -= file_block->range().size(); @@ -1299,8 +1467,18 @@ void BlockFileCache::remove(FileBlockSPtr file_block, T& cache_lock, U& block_lo *_num_removed_blocks << 1; } +void BlockFileCache::recycle_stale_rowset_async_bottom_half() { + FileCacheKey key; + while (_recycle_keys->pop(key)) { + Status st = _storage->remove(key); + if (!st.ok()) { + LOG_WARNING("").error(st); + } + } +} + size_t BlockFileCache::get_used_cache_size(FileCacheType cache_type) const { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); return get_used_cache_size_unlocked(cache_type, cache_lock); } @@ -1310,7 +1488,7 @@ size_t BlockFileCache::get_used_cache_size_unlocked(FileCacheType cache_type, } size_t BlockFileCache::get_available_cache_size(FileCacheType cache_type) const { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); return get_available_cache_size_unlocked(cache_type, cache_lock); } @@ -1321,7 +1499,7 @@ size_t BlockFileCache::get_available_cache_size_unlocked( } size_t BlockFileCache::get_file_blocks_num(FileCacheType cache_type) const { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); return get_file_blocks_num_unlocked(cache_type, cache_lock); } @@ -1405,7 +1583,7 @@ std::string BlockFileCache::LRUQueue::to_string( } std::string BlockFileCache::dump_structure(const UInt128Wrapper& hash) { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); return dump_structure_unlocked(hash, cache_lock); } @@ -1423,7 +1601,7 @@ std::string BlockFileCache::dump_structure_unlocked(const UInt128Wrapper& hash, } std::string BlockFileCache::dump_single_cache_type(const UInt128Wrapper& hash, size_t offset) { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); return dump_single_cache_type_unlocked(hash, offset, cache_lock); } @@ -1486,7 +1664,7 @@ std::string BlockFileCache::reset_capacity(size_t new_capacity) { ss << "finish reset_capacity, path=" << _cache_base_path; auto start_time = steady_clock::time_point(); { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); if (new_capacity < _capacity && new_capacity < _cur_cache_size) { int64_t need_remove_size = _cur_cache_size - new_capacity; auto remove_blocks = [&](LRUQueue& queue) -> int64_t { @@ -1522,11 +1700,13 @@ std::string BlockFileCache::reset_capacity(size_t new_capacity) { ss << " ttl_queue released " << queue_released; } _disk_resource_limit_mode = true; + _disk_limit_mode_metrics->set_value(1); _async_clear_file_cache = true; ss << " total_space_released=" << space_released; } old_capacity = _capacity; _capacity = new_capacity; + _cache_capacity_metrics->set_value(_capacity); } auto use_time = duration_cast(steady_clock::time_point() - start_time); LOG(INFO) << "Finish tag deleted block. path=" << _cache_base_path @@ -1542,6 +1722,7 @@ void BlockFileCache::check_disk_resource_limit() { } if (_capacity > _cur_cache_size) { _disk_resource_limit_mode = false; + _disk_limit_mode_metrics->set_value(0); } std::pair percent; int ret = disk_used_percentage(_cache_base_path, &percent); @@ -1567,10 +1748,12 @@ void BlockFileCache::check_disk_resource_limit() { if (capacity_percentage >= config::file_cache_enter_disk_resource_limit_mode_percent || inode_is_insufficient(inode_percentage)) { _disk_resource_limit_mode = true; + _disk_limit_mode_metrics->set_value(1); } else if (_disk_resource_limit_mode && (capacity_percentage < config::file_cache_exit_disk_resource_limit_mode_percent) && (inode_percentage < config::file_cache_exit_disk_resource_limit_mode_percent)) { _disk_resource_limit_mode = false; + _disk_limit_mode_metrics->set_value(0); } if (_disk_resource_limit_mode) { // log per mins @@ -1594,10 +1777,11 @@ void BlockFileCache::run_background_operation() { break; } } + recycle_stale_rowset_async_bottom_half(); recycle_deleted_blocks(); // gc int64_t cur_time = UnixSeconds(); - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); while (!_time_to_key.empty()) { auto begin = _time_to_key.begin(); if (cur_time < begin->first) { @@ -1643,7 +1827,7 @@ void BlockFileCache::run_background_operation() { void BlockFileCache::modify_expiration_time(const UInt128Wrapper& hash, uint64_t new_expiration_time) { - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); // 1. If new_expiration_time is equal to zero if (new_expiration_time == 0) { remove_if_ttl_file_unlock(hash, false, cache_lock); @@ -1685,14 +1869,9 @@ void BlockFileCache::modify_expiration_time(const UInt128Wrapper& hash, if (st.ok()) { auto& queue = get_queue(origin_type); queue.remove(cell.queue_iterator.value(), cache_lock); - if (config::enable_ttl_cache_evict_using_lru) { - auto& ttl_queue = get_queue(FileCacheType::TTL); - cell.queue_iterator = - ttl_queue.add(hash, cell.file_block->offset(), - cell.file_block->range().size(), cache_lock); - } else { - cell.queue_iterator.reset(); - } + auto& ttl_queue = get_queue(FileCacheType::TTL); + cell.queue_iterator = ttl_queue.add(hash, cell.file_block->offset(), + cell.file_block->range().size(), cache_lock); } if (!st.ok()) { LOG_WARNING("").error(st); @@ -1708,7 +1887,7 @@ BlockFileCache::get_hot_blocks_meta(const UInt128Wrapper& hash) const { int64_t cur_time = std::chrono::duration_cast( std::chrono::steady_clock::now().time_since_epoch()) .count(); - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); std::vector> blocks_meta; if (auto iter = _files.find(hash); iter != _files.end()) { for (auto& pair : _files.find(hash)->second) { @@ -1777,7 +1956,7 @@ std::string BlockFileCache::clear_file_cache_directly() { using namespace std::chrono; std::stringstream ss; auto start = steady_clock::now(); - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); LOG_INFO("start clear_file_cache_directly").tag("path", _cache_base_path); std::string clear_msg; @@ -1815,7 +1994,7 @@ std::string BlockFileCache::clear_file_cache_directly() { std::map BlockFileCache::get_blocks_by_key(const UInt128Wrapper& hash) { std::map offset_to_block; - std::lock_guard cache_lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); if (_files.contains(hash)) { for (auto& [offset, cell] : _files[hash]) { if (cell.file_block->state() == FileBlock::State::DOWNLOADED) { @@ -1830,7 +2009,7 @@ std::map BlockFileCache::get_blocks_by_key(const UInt128W } void BlockFileCache::update_ttl_atime(const UInt128Wrapper& hash) { - std::lock_guard lock(_mutex); + SCOPED_CACHE_LOCK(_mutex); if (auto iter = _files.find(hash); iter != _files.end()) { for (auto& [_, cell] : iter->second) { cell.update_atime(); @@ -1850,6 +2029,12 @@ std::map BlockFileCache::get_stats() { stats["index_queue_curr_elements"] = (double)_cur_index_queue_element_count_metrics->get_value(); + stats["ttl_queue_max_size"] = (double)_ttl_queue.get_max_size(); + stats["ttl_queue_curr_size"] = (double)_cur_ttl_cache_lru_queue_cache_size_metrics->get_value(); + stats["ttl_queue_max_elements"] = (double)_ttl_queue.get_max_element_size(); + stats["ttl_queue_curr_elements"] = + (double)_cur_ttl_cache_lru_queue_element_count_metrics->get_value(); + stats["normal_queue_max_size"] = (double)_normal_queue.get_max_size(); stats["normal_queue_curr_size"] = (double)_cur_normal_queue_element_count_metrics->get_value(); stats["normal_queue_max_elements"] = (double)_normal_queue.get_max_element_size(); @@ -1866,7 +2051,37 @@ std::map BlockFileCache::get_stats() { return stats; } +// for be UTs +std::map BlockFileCache::get_stats_unsafe() { + std::map stats; + stats["hits_ratio"] = (double)_hit_ratio->get_value(); + stats["hits_ratio_5m"] = (double)_hit_ratio_5m->get_value(); + stats["hits_ratio_1h"] = (double)_hit_ratio_1h->get_value(); + + stats["index_queue_max_size"] = (double)_index_queue.get_max_size(); + stats["index_queue_curr_size"] = (double)_index_queue.get_capacity_unsafe(); + stats["index_queue_max_elements"] = (double)_index_queue.get_max_element_size(); + stats["index_queue_curr_elements"] = (double)_index_queue.get_elements_num_unsafe(); + + stats["ttl_queue_max_size"] = (double)_ttl_queue.get_max_size(); + stats["ttl_queue_curr_size"] = (double)_ttl_queue.get_capacity_unsafe(); + stats["ttl_queue_max_elements"] = (double)_ttl_queue.get_max_element_size(); + stats["ttl_queue_curr_elements"] = (double)_ttl_queue.get_elements_num_unsafe(); + + stats["normal_queue_max_size"] = (double)_normal_queue.get_max_size(); + stats["normal_queue_curr_size"] = (double)_normal_queue.get_capacity_unsafe(); + stats["normal_queue_max_elements"] = (double)_normal_queue.get_max_element_size(); + stats["normal_queue_curr_elements"] = (double)_normal_queue.get_elements_num_unsafe(); + + stats["disposable_queue_max_size"] = (double)_disposable_queue.get_max_size(); + stats["disposable_queue_curr_size"] = (double)_disposable_queue.get_capacity_unsafe(); + stats["disposable_queue_max_elements"] = (double)_disposable_queue.get_max_element_size(); + stats["disposable_queue_curr_elements"] = (double)_disposable_queue.get_elements_num_unsafe(); + + return stats; +} + template void BlockFileCache::remove(FileBlockSPtr file_block, std::lock_guard& cache_lock, - std::lock_guard& block_lock); + std::lock_guard& block_lock, bool sync); } // namespace doris::io diff --git a/be/src/io/cache/block_file_cache.h b/be/src/io/cache/block_file_cache.h index ac30e2411fa81be..0de33dadc8249d0 100644 --- a/be/src/io/cache/block_file_cache.h +++ b/be/src/io/cache/block_file_cache.h @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -27,15 +28,51 @@ #include "io/cache/file_block.h" #include "io/cache/file_cache_common.h" #include "io/cache/file_cache_storage.h" +#include "util/threadpool.h" namespace doris::io { +// Note: the cache_lock is scoped, so do not add do...while(0) here. +#ifdef ENABLE_CACHE_LOCK_DEBUG +#define SCOPED_CACHE_LOCK(MUTEX) \ + std::chrono::time_point start_time = \ + std::chrono::steady_clock::now(); \ + std::lock_guard cache_lock(MUTEX); \ + std::chrono::time_point acq_time = \ + std::chrono::steady_clock::now(); \ + auto duration = \ + std::chrono::duration_cast(acq_time - start_time).count(); \ + if (duration > config::cache_lock_long_tail_threshold) \ + LOG(WARNING) << "Lock wait time " << std::to_string(duration) << "ms. " \ + << get_stack_trace_by_boost() << std::endl; \ + LockScopedTimer cache_lock_timer; +#else +#define SCOPED_CACHE_LOCK(MUTEX) std::lock_guard cache_lock(MUTEX); +#endif + template concept IsXLock = std::same_as> || std::same_as>; class FSFileCacheStorage; +class LockScopedTimer { +public: + LockScopedTimer() : start_(std::chrono::steady_clock::now()) {} + + ~LockScopedTimer() { + auto end = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(end - start_).count(); + if (duration > 500) { + LOG(WARNING) << "Lock held time " << std::to_string(duration) << "ms. " + << get_stack_trace_by_boost(); + } + } + +private: + std::chrono::time_point start_; +}; + // The BlockFileCache is responsible for the management of the blocks // The current strategies are lru and ttl. class BlockFileCache { @@ -119,6 +156,7 @@ class BlockFileCache { // remove all blocks that belong to the key void remove_if_cached(const UInt128Wrapper& key); + void remove_if_cached_async(const UInt128Wrapper& key); // modify the expiration time about the key void modify_expiration_time(const UInt128Wrapper& key, uint64_t new_expiration_time); @@ -145,6 +183,9 @@ class BlockFileCache { std::map get_stats(); + // for be UTs + std::map get_stats_unsafe(); + class LRUQueue { public: LRUQueue() = default; @@ -179,6 +220,10 @@ class BlockFileCache { return cache_size; } + size_t get_capacity_unsafe() const { return cache_size; } + + size_t get_elements_num_unsafe() const { return queue.size(); } + size_t get_elements_num(std::lock_guard& /* cache_lock */) const { return queue.size(); } @@ -320,7 +365,7 @@ class BlockFileCache { template requires IsXLock && IsXLock - void remove(FileBlockSPtr file_block, T& cache_lock, U& segment_lock); + void remove(FileBlockSPtr file_block, T& cache_lock, U& segment_lock, bool sync = true); FileBlocks get_impl(const UInt128Wrapper& hash, const CacheContext& context, const FileBlock::Range& range, std::lock_guard& cache_lock); @@ -345,6 +390,7 @@ class BlockFileCache { bool try_reserve_during_async_load(size_t size, std::lock_guard& cache_lock); std::vector get_other_cache_type(FileCacheType cur_cache_type); + std::vector get_other_cache_type_without_ttl(FileCacheType cur_cache_type); bool try_reserve_from_other_queue(FileCacheType cur_cache_type, size_t offset, int64_t cur_time, std::lock_guard& cache_lock); @@ -390,24 +436,30 @@ class BlockFileCache { void recycle_deleted_blocks(); - bool try_reserve_from_other_queue_by_hot_interval(std::vector other_cache_types, + bool try_reserve_from_other_queue_by_hot_interval(FileCacheType cur_type, + std::vector other_cache_types, size_t size, int64_t cur_time, std::lock_guard& cache_lock); - bool try_reserve_from_other_queue_by_size(std::vector other_cache_types, + bool try_reserve_from_other_queue_by_size(FileCacheType cur_type, + std::vector other_cache_types, size_t size, std::lock_guard& cache_lock); - bool is_overflow(size_t removed_size, size_t need_size, size_t cur_cache_size, - bool is_ttl = false) const; + bool is_overflow(size_t removed_size, size_t need_size, size_t cur_cache_size) const; void remove_file_blocks(std::vector&, std::lock_guard&); + void remove_file_blocks_async(std::vector&, std::lock_guard&); + void remove_file_blocks_and_clean_time_maps(std::vector&, std::lock_guard&); void find_evict_candidates(LRUQueue& queue, size_t size, size_t cur_cache_size, size_t& removed_size, std::vector& to_evict, - std::lock_guard& cache_lock, bool is_ttl); + std::lock_guard& cache_lock, size_t& cur_removed_size); + + void recycle_stale_rowset_async_bottom_half(); + // info std::string _cache_base_path; size_t _capacity = 0; @@ -446,7 +498,11 @@ class BlockFileCache { LRUQueue _disposable_queue; LRUQueue _ttl_queue; + // keys for async remove + std::shared_ptr> _recycle_keys; + // metrics + std::shared_ptr> _cache_capacity_metrics; std::shared_ptr> _cur_cache_size_metrics; std::shared_ptr> _cur_ttl_cache_size_metrics; std::shared_ptr> _cur_ttl_cache_lru_queue_cache_size_metrics; @@ -459,6 +515,10 @@ class BlockFileCache { std::shared_ptr> _cur_disposable_queue_cache_size_metrics; std::array>, 4> _queue_evict_size_metrics; std::shared_ptr> _total_evict_size_metrics; + std::shared_ptr> _evict_by_heat_metrics_matrix[4][4]; + std::shared_ptr> _evict_by_size_metrics_matrix[4][4]; + std::shared_ptr> _evict_by_self_lru_metrics_matrix[4]; + std::shared_ptr> _evict_by_try_release; std::shared_ptr>> _num_hit_blocks_5m; std::shared_ptr>> _num_read_blocks_5m; @@ -472,6 +532,7 @@ class BlockFileCache { std::shared_ptr> _hit_ratio; std::shared_ptr> _hit_ratio_5m; std::shared_ptr> _hit_ratio_1h; + std::shared_ptr> _disk_limit_mode_metrics; }; } // namespace doris::io diff --git a/be/src/io/cache/block_file_cache_factory.cpp b/be/src/io/cache/block_file_cache_factory.cpp index 8370962ddd5fe13..2d0d25735fe2fd0 100644 --- a/be/src/io/cache/block_file_cache_factory.cpp +++ b/be/src/io/cache/block_file_cache_factory.cpp @@ -21,6 +21,9 @@ #include "io/cache/block_file_cache_factory.h" #include + +#include +#include #if defined(__APPLE__) #include #else @@ -118,6 +121,20 @@ Status FileCacheFactory::create_file_cache(const std::string& cache_base_path, return Status::OK(); } +std::vector FileCacheFactory::get_cache_file_by_path(const UInt128Wrapper& hash) { + io::BlockFileCache* cache = io::FileCacheFactory::instance()->get_by_path(hash); + auto blocks = cache->get_blocks_by_key(hash); + std::vector ret; + if (blocks.empty()) { + return ret; + } else { + for (auto& [_, fb] : blocks) { + ret.emplace_back(fb->get_cache_file()); + } + } + return ret; +} + BlockFileCache* FileCacheFactory::get_by_path(const UInt128Wrapper& key) { // dont need lock mutex because _caches is immutable after create_file_cache return _caches[KeyHash()(key) % _caches.size()].get(); diff --git a/be/src/io/cache/block_file_cache_factory.h b/be/src/io/cache/block_file_cache_factory.h index 12714fd2087982d..b00bd7bdfcb3152 100644 --- a/be/src/io/cache/block_file_cache_factory.h +++ b/be/src/io/cache/block_file_cache_factory.h @@ -62,6 +62,8 @@ class FileCacheFactory { [[nodiscard]] size_t get_cache_instance_size() const { return _caches.size(); } + std::vector get_cache_file_by_path(const UInt128Wrapper& hash); + BlockFileCache* get_by_path(const UInt128Wrapper& hash); BlockFileCache* get_by_path(const std::string& cache_base_path); std::vector get_query_context_holders( diff --git a/be/src/io/cache/block_file_cache_profile.cpp b/be/src/io/cache/block_file_cache_profile.cpp index 68e6c1433deaf8f..1759d37f9e43148 100644 --- a/be/src/io/cache/block_file_cache_profile.cpp +++ b/be/src/io/cache/block_file_cache_profile.cpp @@ -34,9 +34,9 @@ std::shared_ptr FileCacheProfile::report() { } void FileCacheProfile::update(FileCacheStatistics* stats) { - { - std::lock_guard lock(_mtx); - if (!_profile) { + if (_profile == nullptr) { + std::lock_guard lock(_mtx); + if (_profile == nullptr) { _profile = std::make_shared(); _file_cache_metric = std::make_shared(this); _file_cache_metric->register_entity(); diff --git a/be/src/io/cache/cached_remote_file_reader.cpp b/be/src/io/cache/cached_remote_file_reader.cpp index 0a46c98390e70f8..c9a273c5d368a6a 100644 --- a/be/src/io/cache/cached_remote_file_reader.cpp +++ b/be/src/io/cache/cached_remote_file_reader.cpp @@ -292,6 +292,8 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, size_t* file_offset); } if (!st || block_state != FileBlock::State::DOWNLOADED) { + LOG(WARNING) << "Read data failed from file cache downloaded by others. err=" + << st.msg() << ", block state=" << block_state; size_t bytes_read {0}; stats.hit_cache = false; s3_read_counter << 1; diff --git a/be/src/io/cache/file_block.cpp b/be/src/io/cache/file_block.cpp index b015cbd61110d2a..44cad5520ead064 100644 --- a/be/src/io/cache/file_block.cpp +++ b/be/src/io/cache/file_block.cpp @@ -144,7 +144,7 @@ Status FileBlock::append(Slice data) { Status FileBlock::finalize() { if (_downloaded_size != 0 && _downloaded_size != _block_range.size()) { - std::lock_guard cache_lock(_mgr->_mutex); + SCOPED_CACHE_LOCK(_mgr->_mutex); size_t old_size = _block_range.size(); _block_range.right = _block_range.left + _downloaded_size - 1; size_t new_size = _block_range.size(); @@ -179,7 +179,7 @@ Status FileBlock::change_cache_type_between_ttl_and_others(FileCacheType new_typ } Status FileBlock::change_cache_type_between_normal_and_index(FileCacheType new_type) { - std::lock_guard cache_lock(_mgr->_mutex); + SCOPED_CACHE_LOCK(_mgr->_mutex); std::lock_guard block_lock(_mutex); bool expr = (new_type != FileCacheType::TTL && _key.meta.type != FileCacheType::TTL); if (!expr) { @@ -223,7 +223,7 @@ FileBlock::State FileBlock::wait() { if (_download_state == State::DOWNLOADING) { DCHECK(_downloader_id != 0 && _downloader_id != get_caller_id()); - _cv.wait_for(block_lock, std::chrono::seconds(1)); + _cv.wait_for(block_lock, std::chrono::milliseconds(config::block_cache_wait_timeout_ms)); } return _download_state; @@ -272,20 +272,34 @@ std::string FileBlock::state_to_string(FileBlock::State state) { } } +std::string FileBlock::get_cache_file() const { + return _mgr->_storage->get_local_file(this->_key); +} + FileBlocksHolder::~FileBlocksHolder() { for (auto file_block_it = file_blocks.begin(); file_block_it != file_blocks.end();) { auto current_file_block_it = file_block_it; auto& file_block = *current_file_block_it; BlockFileCache* _mgr = file_block->_mgr; { - std::lock_guard cache_lock(_mgr->_mutex); - std::lock_guard block_lock(file_block->_mutex); - file_block->complete_unlocked(block_lock); - if (file_block.use_count() == 2) { - DCHECK(file_block->state_unlock(block_lock) != FileBlock::State::DOWNLOADING); - // one in cache, one in here - if (file_block->state_unlock(block_lock) == FileBlock::State::EMPTY) { - _mgr->remove(file_block, cache_lock, block_lock); + bool should_remove = false; + { + std::lock_guard block_lock(file_block->_mutex); + file_block->complete_unlocked(block_lock); + if (file_block.use_count() == 2 && + file_block->state_unlock(block_lock) == FileBlock::State::EMPTY) { + should_remove = true; + } + } + if (should_remove) { + SCOPED_CACHE_LOCK(_mgr->_mutex); + std::lock_guard block_lock(file_block->_mutex); + if (file_block.use_count() == 2) { + DCHECK(file_block->state_unlock(block_lock) != FileBlock::State::DOWNLOADING); + // one in cache, one in here + if (file_block->state_unlock(block_lock) == FileBlock::State::EMPTY) { + _mgr->remove(file_block, cache_lock, block_lock); + } } } } diff --git a/be/src/io/cache/file_block.h b/be/src/io/cache/file_block.h index 6e49a597b7b95cc..3a4490d67a3f9d2 100644 --- a/be/src/io/cache/file_block.h +++ b/be/src/io/cache/file_block.h @@ -123,6 +123,8 @@ class FileBlock { uint64_t expiration_time() const { return _key.meta.expiration_time; } + std::string get_cache_file() const; + State state_unlock(std::lock_guard&) const; FileBlock& operator=(const FileBlock&) = delete; diff --git a/be/src/io/cache/file_cache_common.cpp b/be/src/io/cache/file_cache_common.cpp index c569ace0011866f..674879300452dfc 100644 --- a/be/src/io/cache/file_cache_common.cpp +++ b/be/src/io/cache/file_cache_common.cpp @@ -34,6 +34,7 @@ std::string FileCacheSettings::to_string() const { << ", disposable_queue_elements: " << disposable_queue_elements << ", index_queue_size: " << index_queue_size << ", index_queue_elements: " << index_queue_elements + << ", ttl_queue_size: " << ttl_queue_size << ", ttl_queue_elements: " << ttl_queue_elements << ", query_queue_size: " << query_queue_size << ", query_queue_elements: " << query_queue_elements << ", storage: " << storage; return ss.str(); @@ -58,6 +59,10 @@ FileCacheSettings get_file_cache_settings(size_t capacity, size_t max_query_cach std::max(settings.index_queue_size / settings.max_file_block_size, REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS); + settings.ttl_queue_size = per_size * config::max_ttl_cache_ratio; + settings.ttl_queue_elements = std::max(settings.ttl_queue_size / settings.max_file_block_size, + REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS); + settings.query_queue_size = settings.capacity - settings.disposable_queue_size - settings.index_queue_size; settings.query_queue_elements = diff --git a/be/src/io/cache/file_cache_common.h b/be/src/io/cache/file_cache_common.h index 21309831a8284c9..30579ba7851b28e 100644 --- a/be/src/io/cache/file_cache_common.h +++ b/be/src/io/cache/file_cache_common.h @@ -26,17 +26,17 @@ namespace doris::io { inline static constexpr size_t REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS = 100 * 1024; inline static constexpr size_t FILE_CACHE_MAX_FILE_BLOCK_SIZE = 1 * 1024 * 1024; -inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 85; -inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 10; +inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 40; +inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 5; inline static constexpr size_t DEFAULT_INDEX_PERCENT = 5; using uint128_t = vectorized::UInt128; -enum class FileCacheType { - INDEX, - NORMAL, - DISPOSABLE, - TTL, +enum FileCacheType { + INDEX = 2, + NORMAL = 1, + DISPOSABLE = 0, + TTL = 3, }; struct UInt128Wrapper { @@ -93,6 +93,8 @@ struct FileCacheSettings { size_t index_queue_elements {0}; size_t query_queue_size {0}; size_t query_queue_elements {0}; + size_t ttl_queue_size {0}; + size_t ttl_queue_elements {0}; size_t max_file_block_size {0}; size_t max_query_cache_size {0}; std::string storage; diff --git a/be/src/io/cache/file_cache_storage.h b/be/src/io/cache/file_cache_storage.h index 642c4711cf6c623..024e701c6fa08be 100644 --- a/be/src/io/cache/file_cache_storage.h +++ b/be/src/io/cache/file_cache_storage.h @@ -65,6 +65,8 @@ class FileCacheStorage { // force clear all current data in the cache virtual Status clear(std::string& msg) = 0; virtual FileCacheStorageType get_type() = 0; + // get local cached file + virtual std::string get_local_file(const FileCacheKey& key) = 0; }; } // namespace doris::io diff --git a/be/src/io/cache/fs_file_cache_storage.cpp b/be/src/io/cache/fs_file_cache_storage.cpp index ecdf04c88304f05..cf1cd41a537abc0 100644 --- a/be/src/io/cache/fs_file_cache_storage.cpp +++ b/be/src/io/cache/fs_file_cache_storage.cpp @@ -160,30 +160,36 @@ Status FSFileCacheStorage::read(const FileCacheKey& key, size_t value_offset, Sl get_path_in_local_cache(get_path_in_local_cache(key.hash, key.meta.expiration_time), key.offset, key.meta.type); Status s = fs->open_file(file, &file_reader); - if (!s.ok()) { - if (!s.is() || key.meta.type != FileCacheType::TTL) { - return s; + + // handle the case that the file is not found but actually exists in other type format + // TODO(zhengyu): nasty! better eliminate the type encoding in file name in the future + if (!s.ok() && !s.is()) { + LOG(WARNING) << "open file failed, file=" << file << ", error=" << s.to_string(); + return s; // return other error directly + } else if (!s.ok() && s.is()) { // but handle NOT_FOUND error + auto candidates = get_path_in_local_cache_all_candidates( + get_path_in_local_cache(key.hash, key.meta.expiration_time), key.offset); + for (auto& candidate : candidates) { + s = fs->open_file(candidate, &file_reader); + if (s.ok()) { + break; // success with one of there candidates + } } - std::string file_old_format = get_path_in_local_cache_old_ttl_format( - get_path_in_local_cache(key.hash, key.meta.expiration_time), key.offset, - key.meta.type); - if (config::translate_to_new_ttl_format_during_read) { - // try to rename the file with old ttl format to new and retry - VLOG(7) << "try to rename the file with old ttl format to new and retry" - << " oldformat=" << file_old_format << " original=" << file; - RETURN_IF_ERROR(fs->rename(file_old_format, file)); - RETURN_IF_ERROR(fs->open_file(file, &file_reader)); - } else { - // try to open the file with old ttl format - VLOG(7) << "try to open the file with old ttl format" - << " oldformat=" << file_old_format << " original=" << file; - RETURN_IF_ERROR(fs->open_file(file_old_format, &file_reader)); + if (!s.ok()) { // still not found, return error + LOG(WARNING) << "open file failed, file=" << file << ", error=" << s.to_string(); + return s; } - } + } // else, s.ok() means open file success + FDCache::instance()->insert_file_reader(fd_key, file_reader); } size_t bytes_read = 0; - RETURN_IF_ERROR(file_reader->read_at(value_offset, buffer, &bytes_read)); + auto s = file_reader->read_at(value_offset, buffer, &bytes_read); + if (!s.ok()) { + LOG(WARNING) << "read file failed, file=" << file_reader->path() + << ", error=" << s.to_string(); + return s; + } DCHECK(bytes_read == buffer.get_size()); return Status::OK(); } @@ -270,6 +276,17 @@ std::string FSFileCacheStorage::get_path_in_local_cache_old_ttl_format(const std return Path(dir) / (std::to_string(offset) + BlockFileCache::cache_type_to_string(type)); } +std::vector FSFileCacheStorage::get_path_in_local_cache_all_candidates( + const std::string& dir, size_t offset) { + std::vector candidates; + std::string base = get_path_in_local_cache(dir, offset, FileCacheType::NORMAL); + candidates.push_back(base); + candidates.push_back(base + "_idx"); + candidates.push_back(base + "_ttl"); + candidates.push_back(base + "_disposable"); + return candidates; +} + std::string FSFileCacheStorage::get_path_in_local_cache(const UInt128Wrapper& value, uint64_t expiration_time) const { auto str = value.to_string(); @@ -471,7 +488,8 @@ void FSFileCacheStorage::load_cache_info_into_memory(BlockFileCache* _mgr) const std::vector batch_load_buffer; batch_load_buffer.reserve(scan_length); auto add_cell_batch_func = [&]() { - std::lock_guard cache_lock(_mgr->_mutex); + SCOPED_CACHE_LOCK(_mgr->_mutex); + auto f = [&](const BatchLoadArgs& args) { // in async load mode, a cell may be added twice. if (_mgr->_files.contains(args.hash) && _mgr->_files[args.hash].contains(args.offset)) { @@ -659,6 +677,11 @@ Status FSFileCacheStorage::clear(std::string& msg) { return Status::OK(); } +std::string FSFileCacheStorage::get_local_file(const FileCacheKey& key) { + return get_path_in_local_cache(get_path_in_local_cache(key.hash, key.meta.expiration_time), + key.offset, key.meta.type, false); +} + FSFileCacheStorage::~FSFileCacheStorage() { if (_cache_background_load_thread.joinable()) { _cache_background_load_thread.join(); diff --git a/be/src/io/cache/fs_file_cache_storage.h b/be/src/io/cache/fs_file_cache_storage.h index 23e98f422ac884c..8a97aa109ad7411 100644 --- a/be/src/io/cache/fs_file_cache_storage.h +++ b/be/src/io/cache/fs_file_cache_storage.h @@ -70,6 +70,7 @@ class FSFileCacheStorage : public FileCacheStorage { void load_blocks_directly_unlocked(BlockFileCache* _mgr, const FileCacheKey& key, std::lock_guard& cache_lock) override; Status clear(std::string& msg) override; + std::string get_local_file(const FileCacheKey& key) override; [[nodiscard]] static std::string get_path_in_local_cache(const std::string& dir, size_t offset, FileCacheType type, @@ -101,6 +102,9 @@ class FSFileCacheStorage : public FileCacheStorage { void load_cache_info_into_memory(BlockFileCache* _mgr) const; + [[nodiscard]] std::vector get_path_in_local_cache_all_candidates( + const std::string& dir, size_t offset); + std::string _cache_base_path; std::thread _cache_background_load_thread; const std::shared_ptr& fs = global_local_filesystem(); diff --git a/be/src/io/cache/mem_file_cache_storage.cpp b/be/src/io/cache/mem_file_cache_storage.cpp index bffa75ae305b595..7e76dd5f88c5653 100644 --- a/be/src/io/cache/mem_file_cache_storage.cpp +++ b/be/src/io/cache/mem_file_cache_storage.cpp @@ -128,4 +128,8 @@ Status MemFileCacheStorage::clear(std::string& msg) { return Status::OK(); } +std::string MemFileCacheStorage::get_local_file(const FileCacheKey& key) { + return ""; +} + } // namespace doris::io diff --git a/be/src/io/cache/mem_file_cache_storage.h b/be/src/io/cache/mem_file_cache_storage.h index 20fdd8ce9f65202..82064c6e9edc786 100644 --- a/be/src/io/cache/mem_file_cache_storage.h +++ b/be/src/io/cache/mem_file_cache_storage.h @@ -44,6 +44,7 @@ class MemFileCacheStorage : public FileCacheStorage { void load_blocks_directly_unlocked(BlockFileCache* _mgr, const FileCacheKey& key, std::lock_guard& cache_lock) override; Status clear(std::string& msg) override; + std::string get_local_file(const FileCacheKey& key) override; FileCacheStorageType get_type() override { return MEMORY; } diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index 20d5684734e2d4c..7fd85caa43b6c0c 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -869,5 +869,107 @@ Result DelegateReader::create_file_reader( return reader; }); } + +Status LinearProbeRangeFinder::get_range_for(int64_t desired_offset, + io::PrefetchRange& result_range) { + while (index < _ranges.size()) { + io::PrefetchRange& range = _ranges[index]; + if (range.end_offset > desired_offset) { + if (range.start_offset > desired_offset) [[unlikely]] { + return Status::InvalidArgument("Invalid desiredOffset"); + } + result_range = range; + return Status::OK(); + } + ++index; + } + return Status::InvalidArgument("Invalid desiredOffset"); +} + +RangeCacheFileReader::RangeCacheFileReader(RuntimeProfile* profile, io::FileReaderSPtr inner_reader, + std::shared_ptr range_finder) + : _profile(profile), + _inner_reader(std::move(inner_reader)), + _range_finder(std::move(range_finder)) { + _size = _inner_reader->size(); + uint64_t max_cache_size = + std::max((uint64_t)4096, (uint64_t)_range_finder->get_max_range_size()); + _cache = OwnedSlice(max_cache_size); + + if (_profile != nullptr) { + const char* random_profile = "RangeCacheFileReader"; + ADD_TIMER_WITH_LEVEL(_profile, random_profile, 1); + _request_io = + ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestIO", TUnit::UNIT, random_profile, 1); + _request_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestBytes", TUnit::BYTES, + random_profile, 1); + _request_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "RequestTime", random_profile, 1); + _read_to_cache_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadToCacheTime", random_profile, 1); + _cache_refresh_count = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "CacheRefreshCount", + TUnit::UNIT, random_profile, 1); + _read_to_cache_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "ReadToCacheBytes", + TUnit::BYTES, random_profile, 1); + } +} + +Status RangeCacheFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_read, + const IOContext* io_ctx) { + auto request_size = result.size; + + _cache_statistics.request_io++; + _cache_statistics.request_bytes += request_size; + SCOPED_RAW_TIMER(&_cache_statistics.request_time); + + PrefetchRange range; + if (_range_finder->get_range_for(offset, range)) [[likely]] { + if (_current_start_offset != range.start_offset) { // need read new range to cache. + auto range_size = range.end_offset - range.start_offset; + + _cache_statistics.cache_refresh_count++; + _cache_statistics.read_to_cache_bytes += range_size; + SCOPED_RAW_TIMER(&_cache_statistics.read_to_cache_time); + + Slice cache_slice = {_cache.data(), range_size}; + RETURN_IF_ERROR( + _inner_reader->read_at(range.start_offset, cache_slice, bytes_read, io_ctx)); + + if (*bytes_read != range_size) [[unlikely]] { + return Status::InternalError( + "RangeCacheFileReader use inner reader read bytes {} not eq expect size {}", + *bytes_read, range_size); + } + + _current_start_offset = range.start_offset; + } + + int64_t buffer_offset = offset - _current_start_offset; + memcpy(result.data, _cache.data() + buffer_offset, request_size); + *bytes_read = request_size; + + return Status::OK(); + } else { + return Status::InternalError("RangeCacheFileReader read not in Ranges. Offset = {}", + offset); + // RETURN_IF_ERROR(_inner_reader->read_at(offset, result , bytes_read, io_ctx)); + // return Status::OK(); + // think return error is ok,otherwise it will cover up the error. + } +} + +void RangeCacheFileReader::_collect_profile_before_close() { + if (_profile != nullptr) { + COUNTER_UPDATE(_request_io, _cache_statistics.request_io); + COUNTER_UPDATE(_request_bytes, _cache_statistics.request_bytes); + COUNTER_UPDATE(_request_time, _cache_statistics.request_time); + COUNTER_UPDATE(_read_to_cache_time, _cache_statistics.read_to_cache_time); + COUNTER_UPDATE(_cache_refresh_count, _cache_statistics.cache_refresh_count); + COUNTER_UPDATE(_read_to_cache_bytes, _cache_statistics.read_to_cache_bytes); + if (_inner_reader != nullptr) { + _inner_reader->collect_profile_before_close(); + } + } +} + } // namespace io } // namespace doris diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h index 907ea11b216ac45..67e07665fbfd9f3 100644 --- a/be/src/io/fs/buffered_reader.h +++ b/be/src/io/fs/buffered_reader.h @@ -53,6 +53,147 @@ struct PrefetchRange { : start_offset(start_offset), end_offset(end_offset) {} PrefetchRange() : start_offset(0), end_offset(0) {} + + bool operator==(const PrefetchRange& other) const { + return (start_offset == other.start_offset) && (end_offset == other.end_offset); + } + + bool operator!=(const PrefetchRange& other) const { return !(*this == other); } + + PrefetchRange span(const PrefetchRange& other) const { + return {std::min(start_offset, other.end_offset), std::max(start_offset, other.end_offset)}; + } + PrefetchRange seq_span(const PrefetchRange& other) const { + return {start_offset, other.end_offset}; + } + + //Ranges needs to be sorted. + static std::vector merge_adjacent_seq_ranges( + const std::vector& seq_ranges, int64_t max_merge_distance_bytes, + int64_t once_max_read_bytes) { + if (seq_ranges.empty()) { + return {}; + } + // Merge overlapping ranges + std::vector result; + PrefetchRange last = seq_ranges.front(); + for (size_t i = 1; i < seq_ranges.size(); ++i) { + PrefetchRange current = seq_ranges[i]; + PrefetchRange merged = last.seq_span(current); + if (merged.end_offset <= once_max_read_bytes + merged.start_offset && + last.end_offset + max_merge_distance_bytes >= current.start_offset) { + last = merged; + } else { + result.push_back(last); + last = current; + } + } + result.push_back(last); + return result; + } +}; + +class RangeFinder { +public: + virtual ~RangeFinder() = default; + virtual Status get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) = 0; + virtual size_t get_max_range_size() const = 0; +}; + +class LinearProbeRangeFinder : public RangeFinder { +public: + LinearProbeRangeFinder(std::vector&& ranges) : _ranges(std::move(ranges)) {} + + Status get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) override; + + size_t get_max_range_size() const override { + size_t max_range_size = 0; + for (const auto& range : _ranges) { + max_range_size = std::max(max_range_size, range.end_offset - range.start_offset); + } + return max_range_size; + } + + ~LinearProbeRangeFinder() override = default; + +private: + std::vector _ranges; + size_t index {0}; +}; + +/** + * The reader provides a solution to read one range at a time. You can customize RangeFinder to meet your scenario. + * For me, since there will be tiny stripes when reading orc files, in order to reduce the requests to hdfs, + * I first merge the access to the orc files to be read (of course there is a problem of read amplification, + * but in my scenario, compared with reading hdfs multiple times, it is faster to read more data on hdfs at one time), + * and then because the actual reading of orc files is in order from front to back, I provide LinearProbeRangeFinder. + */ +class RangeCacheFileReader : public io::FileReader { + struct RangeCacheReaderStatistics { + int64_t request_io = 0; + int64_t request_bytes = 0; + int64_t request_time = 0; + int64_t read_to_cache_time = 0; + int64_t cache_refresh_count = 0; + int64_t read_to_cache_bytes = 0; + }; + +public: + RangeCacheFileReader(RuntimeProfile* profile, io::FileReaderSPtr inner_reader, + std::shared_ptr range_finder); + + ~RangeCacheFileReader() override = default; + + Status close() override { + if (!_closed) { + _closed = true; + } + return Status::OK(); + } + + const io::Path& path() const override { return _inner_reader->path(); } + + size_t size() const override { return _size; } + + bool closed() const override { return _closed; } + +protected: + Status read_at_impl(size_t offset, Slice result, size_t* bytes_read, + const IOContext* io_ctx) override; + + void _collect_profile_before_close() override; + +private: + RuntimeProfile* _profile = nullptr; + io::FileReaderSPtr _inner_reader; + std::shared_ptr _range_finder; + + OwnedSlice _cache; + int64_t _current_start_offset = -1; + + size_t _size; + bool _closed = false; + + RuntimeProfile::Counter* _request_io = nullptr; + RuntimeProfile::Counter* _request_bytes = nullptr; + RuntimeProfile::Counter* _request_time = nullptr; + RuntimeProfile::Counter* _read_to_cache_time = nullptr; + RuntimeProfile::Counter* _cache_refresh_count = nullptr; + RuntimeProfile::Counter* _read_to_cache_bytes = nullptr; + RangeCacheReaderStatistics _cache_statistics; + /** + * `RangeCacheFileReader`: + * 1. `CacheRefreshCount`: how many IOs are merged + * 2. `ReadToCacheBytes`: how much data is actually read after merging + * 3. `ReadToCacheTime`: how long it takes to read data after merging + * 4. `RequestBytes`: how many bytes does the apache-orc library actually need to read the orc file + * 5. `RequestIO`: how many times the apache-orc library calls this read interface + * 6. `RequestTime`: how long it takes the apache-orc library to call this read interface + * + * It should be noted that `RangeCacheFileReader` is a wrapper of the reader that actually reads data,such as + * the hdfs reader, so strictly speaking, `CacheRefreshCount` is not equal to how many IOs are initiated to hdfs, + * because each time the hdfs reader is requested, the hdfs reader may not be able to read all the data at once. + */ }; /** diff --git a/be/src/io/fs/local_file_reader.cpp b/be/src/io/fs/local_file_reader.cpp index b4f144a633048e4..4a41fa479d9808b 100644 --- a/be/src/io/fs/local_file_reader.cpp +++ b/be/src/io/fs/local_file_reader.cpp @@ -34,11 +34,13 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "cpp/sync_point.h" #include "io/fs/err_utils.h" +#include "olap/data_dir.h" #include "olap/olap_common.h" #include "olap/options.h" #include "runtime/thread_context.h" #include "runtime/workload_management/io_throttle.h" #include "util/async_io.h" +#include "util/debug_points.h" #include "util/doris_metrics.h" namespace doris { @@ -139,6 +141,15 @@ Status LocalFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_ while (bytes_req != 0) { auto res = SYNC_POINT_HOOK_RETURN_VALUE(::pread(_fd, to, bytes_req, offset), "LocalFileReader::pread", _fd, to); + DBUG_EXECUTE_IF("LocalFileReader::read_at_impl.io_error", { + auto sub_path = dp->param("sub_path", ""); + if ((sub_path.empty() && _path.filename().compare(kTestFilePath)) || + (!sub_path.empty() && _path.native().find(sub_path) != std::string::npos)) { + res = -1; + errno = EIO; + LOG(WARNING) << Status::IOError("debug read io error: {}", _path.native()); + } + }); if (UNLIKELY(-1 == res && errno != EINTR)) { return localfs_error(errno, fmt::format("failed to read {}", _path.native())); } diff --git a/be/src/io/fs/local_file_system.cpp b/be/src/io/fs/local_file_system.cpp index 4b44027abbbf2dc..0107ed57dc8fb16 100644 --- a/be/src/io/fs/local_file_system.cpp +++ b/be/src/io/fs/local_file_system.cpp @@ -62,9 +62,13 @@ Status LocalFileSystem::create_file_impl(const Path& file, FileWriterPtr* writer int fd = ::open(file.c_str(), O_TRUNC | O_WRONLY | O_CREAT | O_CLOEXEC, 0666); DBUG_EXECUTE_IF("LocalFileSystem.create_file_impl.open_file_failed", { // spare '.testfile' to make bad disk checker happy - if (file.filename().compare(kTestFilePath)) { + auto sub_path = dp->param("sub_path", ""); + if ((sub_path.empty() && file.filename().compare(kTestFilePath)) || + (!sub_path.empty() && file.native().find(sub_path) != std::string::npos)) { ::close(fd); fd = -1; + errno = EIO; + LOG(WARNING) << Status::IOError("debug open io error: {}", file.native()); } }); if (-1 == fd) { @@ -85,6 +89,17 @@ Status LocalFileSystem::open_file_impl(const Path& file, FileReaderSPtr* reader, } int fd = -1; RETRY_ON_EINTR(fd, open(file.c_str(), O_RDONLY)); + DBUG_EXECUTE_IF("LocalFileSystem.create_file_impl.open_file_failed", { + // spare '.testfile' to make bad disk checker happy + auto sub_path = dp->param("sub_path", ""); + if ((sub_path.empty() && file.filename().compare(kTestFilePath)) || + (!sub_path.empty() && file.native().find(sub_path) != std::string::npos)) { + ::close(fd); + fd = -1; + errno = EIO; + LOG(WARNING) << Status::IOError("debug open io error: {}", file.native()); + } + }); if (fd < 0) { return localfs_error(errno, fmt::format("failed to open {}", file.native())); } diff --git a/be/src/io/fs/local_file_writer.cpp b/be/src/io/fs/local_file_writer.cpp index 7301ceae588a0bf..c65dee2535e79df 100644 --- a/be/src/io/fs/local_file_writer.cpp +++ b/be/src/io/fs/local_file_writer.cpp @@ -147,6 +147,15 @@ Status LocalFileWriter::appendv(const Slice* data, size_t data_cnt) { RETRY_ON_EINTR(res, SYNC_POINT_HOOK_RETURN_VALUE( ::writev(_fd, iov.data() + completed_iov, iov_count), "LocalFileWriter::writev", _fd)); + DBUG_EXECUTE_IF("LocalFileWriter::appendv.io_error", { + auto sub_path = dp->param("sub_path", ""); + if ((sub_path.empty() && _path.filename().compare(kTestFilePath)) || + (!sub_path.empty() && _path.native().find(sub_path) != std::string::npos)) { + res = -1; + errno = EIO; + LOG(WARNING) << Status::IOError("debug write io error: {}", _path.native()); + } + }); if (UNLIKELY(res < 0)) { return localfs_error(errno, fmt::format("failed to write {}", _path.native())); } diff --git a/be/src/io/fs/s3_file_system.cpp b/be/src/io/fs/s3_file_system.cpp index 3a5fffb2549938d..d841c79ed660693 100644 --- a/be/src/io/fs/s3_file_system.cpp +++ b/be/src/io/fs/s3_file_system.cpp @@ -86,7 +86,7 @@ Status ObjClientHolder::reset(const S3ClientConf& conf) { S3ClientConf reset_conf; { std::shared_lock lock(_mtx); - if (conf.ak == _conf.ak && conf.sk == _conf.sk && conf.token == _conf.token) { + if (conf.get_hash() == _conf.get_hash()) { return Status::OK(); // Same conf } @@ -95,6 +95,10 @@ Status ObjClientHolder::reset(const S3ClientConf& conf) { reset_conf.sk = conf.sk; reset_conf.token = conf.token; reset_conf.bucket = conf.bucket; + reset_conf.connect_timeout_ms = conf.connect_timeout_ms; + reset_conf.max_connections = conf.max_connections; + reset_conf.request_timeout_ms = conf.request_timeout_ms; + reset_conf.use_virtual_addressing = conf.use_virtual_addressing; // Should check endpoint here? } diff --git a/be/src/io/fs/s3_file_writer.cpp b/be/src/io/fs/s3_file_writer.cpp index 24b72a4b6c902c2..e40b9e171eb08f6 100644 --- a/be/src/io/fs/s3_file_writer.cpp +++ b/be/src/io/fs/s3_file_writer.cpp @@ -204,12 +204,12 @@ Status S3FileWriter::_build_upload_buffer() { Status S3FileWriter::_close_impl() { VLOG_DEBUG << "S3FileWriter::close, path: " << _obj_storage_path_opts.path.native(); - if (_cur_part_num == 1 && _pending_buf) { + if (_cur_part_num == 1 && _pending_buf) { // data size is less than config::s3_write_buffer_size RETURN_IF_ERROR(_set_upload_to_remote_less_than_buffer_size()); } if (_bytes_appended == 0) { - DCHECK(_cur_part_num == 1); + DCHECK_EQ(_cur_part_num, 1); // No data written, but need to create an empty file RETURN_IF_ERROR(_build_upload_buffer()); if (!_used_by_s3_committer) { @@ -220,10 +220,15 @@ Status S3FileWriter::_close_impl() { } } - if (_pending_buf != nullptr) { + if (_pending_buf != nullptr) { // there is remaining data in buffer need to be uploaded _countdown_event.add_count(); RETURN_IF_ERROR(FileBuffer::submit(std::move(_pending_buf))); _pending_buf = nullptr; + } else if (_bytes_appended != 0) { // Non-empty file and has nothing to be uploaded + // NOTE: When the data size is a multiple of config::s3_write_buffer_size, + // _cur_part_num may exceed the actual number of parts that need to be uploaded. + // This is because it is incremented by 1 in advance within the S3FileWriter::appendv method. + _cur_part_num--; } RETURN_IF_ERROR(_complete()); @@ -327,26 +332,29 @@ Status S3FileWriter::_complete() { _wait_until_finish("Complete"); TEST_SYNC_POINT_CALLBACK("S3FileWriter::_complete:1", std::make_pair(&_failed, &_completed_parts)); - if (!_used_by_s3_committer) { // S3 committer will complete multipart upload file on FE side. - if (_failed || _completed_parts.size() != _cur_part_num) { - _st = Status::InternalError( - "error status {}, have failed {}, complete parts {}, cur part num {}, whole " - "parts {}, file path {}, file size {}, has left buffer {}", - _st, _failed, _completed_parts.size(), _cur_part_num, _dump_completed_part(), - _obj_storage_path_opts.path.native(), _bytes_appended, _pending_buf != nullptr); - LOG(WARNING) << _st; - return _st; - } - // make sure _completed_parts are ascending order - std::sort(_completed_parts.begin(), _completed_parts.end(), - [](auto& p1, auto& p2) { return p1.part_num < p2.part_num; }); - TEST_SYNC_POINT_CALLBACK("S3FileWriter::_complete:2", &_completed_parts); - auto resp = client->complete_multipart_upload(_obj_storage_path_opts, _completed_parts); - if (resp.status.code != ErrorCode::OK) { - LOG_WARNING("Compltet multi part upload failed because {}, file path {}", - resp.status.msg, _obj_storage_path_opts.path.native()); - return {resp.status.code, std::move(resp.status.msg)}; - } + if (_used_by_s3_committer) { // S3 committer will complete multipart upload file on FE side. + s3_file_created_total << 1; // Assume that it will be created successfully + return Status::OK(); + } + + if (_failed || _completed_parts.size() != _cur_part_num) { + _st = Status::InternalError( + "error status={} failed={} #complete_parts={} #expected_parts={} " + "completed_parts_list={} file_path={} file_size={} has left buffer not uploaded={}", + _st, _failed, _completed_parts.size(), _cur_part_num, _dump_completed_part(), + _obj_storage_path_opts.path.native(), _bytes_appended, _pending_buf != nullptr); + LOG(WARNING) << _st; + return _st; + } + // make sure _completed_parts are ascending order + std::sort(_completed_parts.begin(), _completed_parts.end(), + [](auto& p1, auto& p2) { return p1.part_num < p2.part_num; }); + TEST_SYNC_POINT_CALLBACK("S3FileWriter::_complete:2", &_completed_parts); + auto resp = client->complete_multipart_upload(_obj_storage_path_opts, _completed_parts); + if (resp.status.code != ErrorCode::OK) { + LOG_WARNING("Compltet multi part upload failed because {}, file path {}", resp.status.msg, + _obj_storage_path_opts.path.native()); + return {resp.status.code, std::move(resp.status.msg)}; } s3_file_created_total << 1; return Status::OK(); diff --git a/be/src/io/hdfs_builder.cpp b/be/src/io/hdfs_builder.cpp index 99ee89596ed9ac7..59ca46e86944df8 100644 --- a/be/src/io/hdfs_builder.cpp +++ b/be/src/io/hdfs_builder.cpp @@ -20,17 +20,18 @@ #include #include +#include #include -#include #include #include -#include "agent/utils.h" #include "common/config.h" #include "common/logging.h" +#ifdef USE_HADOOP_HDFS +#include "hadoop_hdfs/hdfs.h" +#endif #include "io/fs/hdfs.h" #include "util/string_util.h" -#include "util/uid_util.h" namespace doris { diff --git a/be/src/io/hdfs_util.cpp b/be/src/io/hdfs_util.cpp index 6c1bbf80a1526f1..92d8933d8b5c929 100644 --- a/be/src/io/hdfs_util.cpp +++ b/be/src/io/hdfs_util.cpp @@ -17,10 +17,13 @@ #include "io/hdfs_util.h" +#include +#include #include #include #include +#include #include "common/logging.h" #include "io/fs/err_utils.h" @@ -30,7 +33,7 @@ namespace doris::io { namespace { -Status create_hdfs_fs(const THdfsParams& hdfs_params, const std::string& fs_name, hdfsFS* fs) { +Status _create_hdfs_fs(const THdfsParams& hdfs_params, const std::string& fs_name, hdfsFS* fs) { HDFSCommonBuilder builder; RETURN_IF_ERROR(create_hdfs_builder(hdfs_params, fs_name, &builder)); hdfsFS hdfs_fs = hdfsBuilderConnect(builder.get()); @@ -41,6 +44,39 @@ Status create_hdfs_fs(const THdfsParams& hdfs_params, const std::string& fs_name return Status::OK(); } +// https://brpc.apache.org/docs/server/basics/ +// According to the brpc doc, JNI code checks stack layout and cannot be run in +// bthreads so create a pthread for creating hdfs connection if necessary. +Status create_hdfs_fs(const THdfsParams& hdfs_params, const std::string& fs_name, hdfsFS* fs) { + bool is_pthread = bthread_self() == 0; + LOG(INFO) << "create hfdfs fs, is_pthread=" << is_pthread << " fs_name=" << fs_name; + if (is_pthread) { // running in pthread + return _create_hdfs_fs(hdfs_params, fs_name, fs); + } + + // running in bthread, switch to a pthread and wait + Status st; + auto btx = bthread::butex_create(); + *(int*)btx = 0; + std::thread t([&] { + st = _create_hdfs_fs(hdfs_params, fs_name, fs); + *(int*)btx = 1; + bthread::butex_wake_all(btx); + }); + std::unique_ptr> defer((int*)0x01, [&t, &btx](...) { + if (t.joinable()) t.join(); + bthread::butex_destroy(btx); + }); + timespec tmout {.tv_sec = std::chrono::system_clock::now().time_since_epoch().count() + 60, + .tv_nsec = 0}; + if (int ret = bthread::butex_wait(btx, 1, &tmout); ret != 0) { + std::string msg = "failed to wait _create_hdfs_fs fs_name=" + fs_name; + LOG(WARNING) << msg << " error=" << std::strerror(errno); + st = Status::Error(msg); + } + return st; +} + uint64_t hdfs_hash_code(const THdfsParams& hdfs_params, const std::string& fs_name) { uint64_t hash_code = 0; // The specified fsname is used first. diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index 8be29383c1e9b1e..8b9cbd75ed33b80 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -80,7 +80,7 @@ Status BaseCompaction::execute_compact() { tablet()->set_last_base_compaction_success_time(UnixMillis()); DorisMetrics::instance()->base_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_size); + DorisMetrics::instance()->base_compaction_bytes_total->increment(_input_rowsets_total_size); return Status::OK(); } diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 8a830cd25e7d0d9..e5ec38738155e52 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -80,7 +80,8 @@ Status _get_segment_column_iterator(const BetaRowsetSharedPtr& rowset, uint32_t .use_page_cache = !config::disable_storage_page_cache, .file_reader = segment->file_reader().get(), .stats = stats, - .io_ctx = io::IOContext {.reader_type = ReaderType::READER_QUERY}, + .io_ctx = io::IOContext {.reader_type = ReaderType::READER_QUERY, + .file_cache_stats = &stats->file_cache_stats}, }; RETURN_IF_ERROR((*column_iterator)->init(opt)); return Status::OK(); @@ -443,7 +444,7 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest RowLocation* row_location, uint32_t version, std::vector>& segment_caches, RowsetSharedPtr* rowset, bool with_rowid, - std::string* encoded_seq_value) { + std::string* encoded_seq_value, OlapReaderStatistics* stats) { SCOPED_BVAR_LATENCY(g_tablet_lookup_rowkey_latency); size_t seq_col_length = 0; // use the latest tablet schema to decide if the tablet has sequence column currently @@ -467,13 +468,9 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest DCHECK_EQ(segments_key_bounds.size(), num_segments); std::vector picked_segments; for (int i = num_segments - 1; i >= 0; i--) { - // If mow table has cluster keys, the key bounds is short keys, not primary keys - // use PrimaryKeyIndexMetaPB in primary key index? - if (schema->cluster_key_idxes().empty()) { - if (key_without_seq.compare(segments_key_bounds[i].max_key()) > 0 || - key_without_seq.compare(segments_key_bounds[i].min_key()) < 0) { - continue; - } + if (key_without_seq.compare(segments_key_bounds[i].max_key()) > 0 || + key_without_seq.compare(segments_key_bounds[i].min_key()) < 0) { + continue; } picked_segments.emplace_back(i); } @@ -491,7 +488,7 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest for (auto id : picked_segments) { Status s = segments[id]->lookup_row_key(encoded_key, schema, with_seq_col, with_rowid, - &loc, encoded_seq_value); + &loc, encoded_seq_value, stats); if (s.is()) { continue; } diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index 4aaca77770db0fa..b5da0e3bf06be18 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -155,7 +155,8 @@ class BaseTablet { RowLocation* row_location, uint32_t version, std::vector>& segment_caches, RowsetSharedPtr* rowset = nullptr, bool with_rowid = true, - std::string* encoded_seq_value = nullptr); + std::string* encoded_seq_value = nullptr, + OlapReaderStatistics* stats = nullptr); // calc delete bitmap when flush memtable, use a fake version to calc // For example, cur max version is 5, and we use version 6 to calc but @@ -292,6 +293,9 @@ class BaseTablet { Status show_nested_index_file(std::string* json_meta); + TabletUid tablet_uid() const { return _tablet_meta->tablet_uid(); } + TabletInfo get_tablet_info() const { return TabletInfo(tablet_id(), tablet_uid()); } + protected: // Find the missed versions until the spec_version. // diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h index 431182c4ce821ea..48e93642f4c368c 100644 --- a/be/src/olap/bitmap_filter_predicate.h +++ b/be/src/olap/bitmap_filter_predicate.h @@ -105,7 +105,7 @@ class BitmapFilterColumnPredicate : public ColumnPredicate { SpecificFilter* _specific_filter; // owned by _filter int get_filter_id() const override { return _filter->get_filter_id(); } - bool is_filter() const override { return true; } + bool is_runtime_filter() const override { return true; } }; template diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index 0e2ae500ac6a583..2c49ff2ea8d1a23 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -96,7 +96,6 @@ class BloomFilterColumnPredicate : public ColumnPredicate { DCHECK(filter_id != -1); return filter_id; } - bool is_filter() const override { return true; } std::shared_ptr _filter; SpecificFilter* _specific_filter; // owned by _filter diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 764521818f5b1f7..2b76c777228b2cf 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -262,8 +262,6 @@ class ColumnPredicate { } virtual int get_filter_id() const { return -1; } - // now InListPredicateBase BloomFilterColumnPredicate BitmapFilterColumnPredicate = true - virtual bool is_filter() const { return false; } PredicateFilterInfo get_filtered_info() const { return PredicateFilterInfo {static_cast(type()), _evaluated_rows - 1, _evaluated_rows - 1 - _passed_rows}; @@ -303,6 +301,13 @@ class ColumnPredicate { } bool always_true() const { return _always_true; } + // Return whether the ColumnPredicate was created by a runtime filter. + // If true, it was definitely created by a runtime filter. + // If false, it may still have been created by a runtime filter, + // as certain filters like "in filter" generate key ranges instead of ColumnPredicate. + // is_runtime_filter uses _can_ignore, except for BitmapFilter, + // as BitmapFilter cannot ignore data. + virtual bool is_runtime_filter() const { return _can_ignore(); } protected: virtual std::string _debug_string() const = 0; @@ -325,7 +330,7 @@ class ColumnPredicate { _judge_filter_rows = 0; } - void do_judge_selectivity(int64_t filter_rows, int64_t input_rows) const { + void do_judge_selectivity(uint64_t filter_rows, uint64_t input_rows) const { if ((_judge_counter--) == 0) { reset_judge_selectivity(); } @@ -352,8 +357,8 @@ class ColumnPredicate { // without recalculating. At the beginning of the next period, // reset_judge_selectivity is used to reset these variables. mutable int _judge_counter = 0; - mutable int _judge_input_rows = 0; - mutable int _judge_filter_rows = 0; + mutable uint64_t _judge_input_rows = 0; + mutable uint64_t _judge_filter_rows = 0; mutable bool _always_true = false; }; diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index dee06a8a79b20c1..a581bce72c294e3 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -188,6 +188,7 @@ Status Compaction::merge_input_rowsets() { Status res; { SCOPED_TIMER(_merge_rowsets_latency_timer); + // 1. Merge segment files and write bkd inverted index if (_is_vertical) { res = Merger::vertical_merge_rowsets(_tablet, compaction_type(), *_cur_tablet_schema, input_rs_readers, _output_rs_writer.get(), @@ -200,17 +201,19 @@ Status Compaction::merge_input_rowsets() { res = Merger::vmerge_rowsets(_tablet, compaction_type(), *_cur_tablet_schema, input_rs_readers, _output_rs_writer.get(), &_stats); } - } - - _tablet->last_compaction_status = res; - if (!res.ok()) { - return res; + _tablet->last_compaction_status = res; + if (!res.ok()) { + return res; + } + // 2. Merge the remaining inverted index files of the string type + RETURN_IF_ERROR(do_inverted_index_compaction()); } COUNTER_UPDATE(_merged_rows_counter, _stats.merged_rows); COUNTER_UPDATE(_filtered_rows_counter, _stats.filtered_rows); + // 3. In the `build`, `_close_file_writers` is called to close the inverted index file writer and write the final compound index file. RETURN_NOT_OK_STATUS_WITH_WARN(_output_rs_writer->build(_output_rowset), fmt::format("rowset writer build failed. output_version: {}", _output_version.to_string())); @@ -254,10 +257,10 @@ int64_t Compaction::get_avg_segment_rows() { if (meta->compaction_policy() == CUMULATIVE_TIME_SERIES_POLICY) { int64_t compaction_goal_size_mbytes = meta->time_series_compaction_goal_size_mbytes(); return (compaction_goal_size_mbytes * 1024 * 1024 * 2) / - (_input_rowsets_size / (_input_row_num + 1) + 1); + (_input_rowsets_data_size / (_input_row_num + 1) + 1); } return config::vertical_compaction_max_segment_size / - (_input_rowsets_size / (_input_row_num + 1) + 1); + (_input_rowsets_data_size / (_input_row_num + 1) + 1); } CompactionMixin::CompactionMixin(StorageEngine& engine, TabletSharedPtr tablet, @@ -302,9 +305,9 @@ Status CompactionMixin::do_compact_ordered_rowsets() { // build output rowset RowsetMetaSharedPtr rowset_meta = std::make_shared(); rowset_meta->set_num_rows(_input_row_num); - rowset_meta->set_total_disk_size(_input_rowsets_size); - rowset_meta->set_data_disk_size(_input_rowsets_size); - rowset_meta->set_index_disk_size(_input_index_size); + rowset_meta->set_total_disk_size(_input_rowsets_data_size + _input_rowsets_index_size); + rowset_meta->set_data_disk_size(_input_rowsets_data_size); + rowset_meta->set_index_disk_size(_input_rowsets_index_size); rowset_meta->set_empty(_input_row_num == 0); rowset_meta->set_num_segments(_input_num_segments); rowset_meta->set_segments_overlap(NONOVERLAPPING); @@ -317,12 +320,13 @@ Status CompactionMixin::do_compact_ordered_rowsets() { void CompactionMixin::build_basic_info() { for (auto& rowset : _input_rowsets) { - _input_rowsets_size += rowset->data_disk_size(); - _input_index_size += rowset->index_disk_size(); + _input_rowsets_data_size += rowset->data_disk_size(); + _input_rowsets_index_size += rowset->index_disk_size(); + _input_rowsets_total_size += rowset->total_disk_size(); _input_row_num += rowset->num_rows(); _input_num_segments += rowset->num_segments(); } - COUNTER_UPDATE(_input_rowsets_data_size_counter, _input_rowsets_size); + COUNTER_UPDATE(_input_rowsets_data_size_counter, _input_rowsets_data_size); COUNTER_UPDATE(_input_row_num_counter, _input_row_num); COUNTER_UPDATE(_input_segments_num_counter, _input_num_segments); @@ -441,8 +445,12 @@ Status CompactionMixin::execute_compact_impl(int64_t permits) { << ", disk=" << tablet()->data_dir()->path() << ", segments=" << _input_num_segments << ", input_row_num=" << _input_row_num << ", output_row_num=" << _output_rowset->num_rows() - << ", input_rowset_size=" << _input_rowsets_size - << ", output_rowset_size=" << _output_rowset->data_disk_size() + << ", input_rowsets_data_size=" << _input_rowsets_data_size + << ", input_rowsets_index_size=" << _input_rowsets_index_size + << ", input_rowsets_total_size=" << _input_rowsets_total_size + << ", output_rowset_data_size=" << _output_rowset->data_disk_size() + << ", output_rowset_index_size=" << _output_rowset->index_disk_size() + << ", output_rowset_total_size=" << _output_rowset->total_disk_size() << ". elapsed time=" << watch.get_elapse_second() << "s."; _state = CompactionState::SUCCESS; return Status::OK(); @@ -456,8 +464,6 @@ Status CompactionMixin::execute_compact_impl(int64_t permits) { RETURN_IF_ERROR(merge_input_rowsets()); - RETURN_IF_ERROR(do_inverted_index_compaction()); - RETURN_IF_ERROR(modify_rowsets()); auto* cumu_policy = tablet()->cumulative_compaction_policy(); @@ -466,8 +472,8 @@ Status CompactionMixin::execute_compact_impl(int64_t permits) { << ". tablet=" << _tablet->tablet_id() << ", output_version=" << _output_version << ", current_max_version=" << tablet()->max_version().second << ", disk=" << tablet()->data_dir()->path() << ", segments=" << _input_num_segments - << ", input_rowset_size=" << _input_rowsets_size - << ", output_rowset_size=" << _output_rowset->data_disk_size() + << ", input_data_size=" << _input_rowsets_data_size + << ", output_rowset_size=" << _output_rowset->total_disk_size() << ", input_row_num=" << _input_row_num << ", output_row_num=" << _output_rowset->num_rows() << ", filtered_row_num=" << _stats.filtered_rows @@ -510,10 +516,11 @@ Status Compaction::do_inverted_index_compaction() { auto src_segment_num = src_seg_to_id_map.size(); auto dest_segment_num = dest_segment_num_rows.size(); + DBUG_EXECUTE_IF("Compaction::do_inverted_index_compaction_dest_segment_num_is_zero", + { dest_segment_num = 0; }) if (dest_segment_num <= 0) { LOG(INFO) << "skip doing index compaction due to no output segments" << ". tablet=" << _tablet->tablet_id() << ", input row number=" << _input_row_num - << ", output row number=" << _output_rowset->num_rows() << ". elapsed time=" << inverted_watch.get_elapse_second() << "s."; return Status::OK(); } @@ -585,14 +592,17 @@ Status Compaction::do_inverted_index_compaction() { const auto& [rowset_id, seg_id] = m.first; auto find_it = rs_id_to_rowset_map.find(rowset_id); + DBUG_EXECUTE_IF("Compaction::do_inverted_index_compaction_find_rowset_error", + { find_it = rs_id_to_rowset_map.end(); }) if (find_it == rs_id_to_rowset_map.end()) [[unlikely]] { - DCHECK(false) << _tablet->tablet_id() << ' ' << rowset_id; + // DCHECK(false) << _tablet->tablet_id() << ' ' << rowset_id; return Status::InternalError("cannot find rowset. tablet_id={} rowset_id={}", _tablet->tablet_id(), rowset_id.to_string()); } auto* rowset = find_it->second; - const auto& fs = rowset->rowset_meta()->fs(); + auto fs = rowset->rowset_meta()->fs(); + DBUG_EXECUTE_IF("Compaction::do_inverted_index_compaction_get_fs_error", { fs = nullptr; }) if (!fs) { return Status::InternalError("get fs failed, resource_id={}", rowset->rowset_meta()->resource_id()); @@ -613,58 +623,9 @@ Status Compaction::do_inverted_index_compaction() { // dest index files // format: rowsetId_segmentId - std::vector> inverted_index_file_writers( - dest_segment_num); - - // Some columns have already been indexed - // key: seg_id, value: inverted index file size - std::unordered_map compacted_idx_file_size; - for (int seg_id = 0; seg_id < dest_segment_num; ++seg_id) { - std::string index_path_prefix { - InvertedIndexDescriptor::get_index_file_path_prefix(ctx.segment_path(seg_id))}; - auto inverted_index_file_reader = std::make_unique( - ctx.fs(), index_path_prefix, - _cur_tablet_schema->get_inverted_index_storage_format()); - bool open_idx_file_cache = false; - auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, - open_idx_file_cache); - if (st.ok()) { - auto index_not_need_to_compact = - DORIS_TRY(inverted_index_file_reader->get_all_directories()); - // V1: each index is a separate file - // V2: all indexes are in a single file - if (_cur_tablet_schema->get_inverted_index_storage_format() != - doris::InvertedIndexStorageFormatPB::V1) { - int64_t fsize = 0; - st = ctx.fs()->file_size( - InvertedIndexDescriptor::get_index_file_path_v2(index_path_prefix), &fsize); - if (!st.ok()) { - LOG(ERROR) << "file size error in index compaction, error:" << st.msg(); - return st; - } - compacted_idx_file_size[seg_id] = fsize; - } - auto inverted_index_file_writer = std::make_unique( - ctx.fs(), index_path_prefix, ctx.rowset_id.to_string(), seg_id, - _cur_tablet_schema->get_inverted_index_storage_format()); - RETURN_IF_ERROR(inverted_index_file_writer->initialize(index_not_need_to_compact)); - inverted_index_file_writers[seg_id] = std::move(inverted_index_file_writer); - } else if (st.is()) { - auto inverted_index_file_writer = std::make_unique( - ctx.fs(), index_path_prefix, ctx.rowset_id.to_string(), seg_id, - _cur_tablet_schema->get_inverted_index_storage_format()); - inverted_index_file_writers[seg_id] = std::move(inverted_index_file_writer); - // no index file - compacted_idx_file_size[seg_id] = 0; - } else { - LOG(ERROR) << "inverted_index_file_reader init failed in index compaction, error:" - << st; - return st; - } - } - for (const auto& writer : inverted_index_file_writers) { - writer->set_file_writer_opts(ctx.get_file_writer_options()); - } + auto& inverted_index_file_writers = dynamic_cast(_output_rs_writer.get()) + ->inverted_index_file_writers(); + DCHECK_EQ(inverted_index_file_writers.size(), dest_segment_num); // use tmp file dir to store index files auto tmp_file_dir = ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir(); @@ -688,29 +649,13 @@ Status Compaction::do_inverted_index_compaction() { Status status = Status::OK(); for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) { auto col = _cur_tablet_schema->column_by_uid(column_uniq_id); - const auto* index_meta = _cur_tablet_schema->get_inverted_index(col); - - // if index properties are different, index compaction maybe needs to be skipped. - bool is_continue = false; - std::optional> first_properties; - for (const auto& rowset : _input_rowsets) { - const auto* tablet_index = rowset->tablet_schema()->get_inverted_index(col); - const auto& properties = tablet_index->properties(); - if (!first_properties.has_value()) { - first_properties = properties; - } else { - if (properties != first_properties.value()) { - error_handler(index_meta->index_id(), column_uniq_id); - status = Status::Error( - "if index properties are different, index compaction needs to be " - "skipped."); - is_continue = true; - break; - } - } - } - if (is_continue) { - continue; + const auto* index_meta = _cur_tablet_schema->inverted_index(col); + DBUG_EXECUTE_IF("Compaction::do_inverted_index_compaction_can_not_find_index_meta", + { index_meta = nullptr; }) + if (index_meta == nullptr) { + status = Status::Error( + fmt::format("Can not find index_meta for col {}", col.name())); + break; } std::vector dest_index_dirs(dest_segment_num); @@ -737,55 +682,23 @@ Status Compaction::do_inverted_index_compaction() { } } - std::vector all_inverted_index_file_info(dest_segment_num); - uint64_t inverted_index_file_size = 0; - for (int seg_id = 0; seg_id < dest_segment_num; ++seg_id) { - auto inverted_index_file_writer = inverted_index_file_writers[seg_id].get(); - if (Status st = inverted_index_file_writer->close(); !st.ok()) { - status = Status::Error(st.msg()); - } else { - inverted_index_file_size += inverted_index_file_writer->get_index_file_total_size(); - inverted_index_file_size -= compacted_idx_file_size[seg_id]; - } - all_inverted_index_file_info[seg_id] = inverted_index_file_writer->get_index_file_info(); - } // check index compaction status. If status is not ok, we should return error and end this compaction round. if (!status.ok()) { return status; } - - // index compaction should update total disk size and index disk size - _output_rowset->rowset_meta()->set_data_disk_size(_output_rowset->data_disk_size() + - inverted_index_file_size); - _output_rowset->rowset_meta()->set_total_disk_size(_output_rowset->data_disk_size() + - inverted_index_file_size); - _output_rowset->rowset_meta()->set_index_disk_size(_output_rowset->index_disk_size() + - inverted_index_file_size); - - _output_rowset->rowset_meta()->update_inverted_index_files_info(all_inverted_index_file_info); - COUNTER_UPDATE(_output_rowset_data_size_counter, _output_rowset->data_disk_size()); - LOG(INFO) << "succeed to do index compaction" - << ". tablet=" << _tablet->tablet_id() << ", input row number=" << _input_row_num - << ", output row number=" << _output_rowset->num_rows() - << ", input_rowset_size=" << _input_rowsets_size - << ", output_rowset_size=" << _output_rowset->data_disk_size() - << ", inverted index file size=" << inverted_index_file_size + << ". tablet=" << _tablet->tablet_id() << ". elapsed time=" << inverted_watch.get_elapse_second() << "s."; return Status::OK(); } void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { - for (const auto& index : _cur_tablet_schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - - auto col_unique_ids = index.col_unique_ids(); + for (const auto& index : _cur_tablet_schema->inverted_indexes()) { + auto col_unique_ids = index->col_unique_ids(); // check if column unique ids is empty to avoid crash if (col_unique_ids.empty()) { - LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" << index.index_id() + LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" << index->index_id() << "] has no column unique id, will skip index compaction." << " tablet_schema=" << _cur_tablet_schema->dump_full_schema(); continue; @@ -795,8 +708,37 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { if (!field_is_slice_type(_cur_tablet_schema->column_by_uid(col_unique_id).type())) { continue; } + + // if index properties are different, index compaction maybe needs to be skipped. + bool is_continue = false; + std::optional> first_properties; + for (const auto& rowset : _input_rowsets) { + const auto* tablet_index = rowset->tablet_schema()->inverted_index(col_unique_id); + // no inverted index or index id is different from current index id + if (tablet_index == nullptr || tablet_index->index_id() != index->index_id()) { + is_continue = true; + break; + } + auto properties = tablet_index->properties(); + if (!first_properties.has_value()) { + first_properties = properties; + } else { + DBUG_EXECUTE_IF( + "Compaction::do_inverted_index_compaction_index_properties_different", + { properties.emplace("dummy_key", "dummy_value"); }) + if (properties != first_properties.value()) { + is_continue = true; + break; + } + } + } + if (is_continue) { + continue; + } auto has_inverted_index = [&](const RowsetSharedPtr& src_rs) { auto* rowset = static_cast(src_rs.get()); + DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_is_skip_index_compaction", + { rowset->set_skip_index_compaction(col_unique_id); }) if (rowset->is_skip_index_compaction(col_unique_id)) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] rowset[" << rowset->rowset_id() << "] column_unique_id[" << col_unique_id @@ -804,14 +746,18 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { return false; } - const auto& fs = rowset->rowset_meta()->fs(); + auto fs = rowset->rowset_meta()->fs(); + DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_get_fs_error", + { fs = nullptr; }) if (!fs) { LOG(WARNING) << "get fs failed, resource_id=" << rowset->rowset_meta()->resource_id(); return false; } - const auto* index_meta = rowset->tablet_schema()->get_inverted_index(col_unique_id, ""); + const auto* index_meta = rowset->tablet_schema()->inverted_index(col_unique_id); + DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_index_meta_nullptr", + { index_meta = nullptr; }) if (index_meta == nullptr) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" << col_unique_id << "] index meta is null, will skip index compaction"; @@ -821,6 +767,9 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { for (auto i = 0; i < rowset->num_segments(); i++) { // TODO: inverted_index_path auto seg_path = rowset->segment_path(i); + DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_seg_path_nullptr", { + seg_path = ResultError(Status::Error("error")); + }) if (!seg_path) { LOG(WARNING) << seg_path.error(); return false; @@ -838,6 +787,16 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { auto st = inverted_index_file_reader->init( config::inverted_index_read_buffer_size, open_idx_file_cache); index_file_path = inverted_index_file_reader->get_index_file_path(index_meta); + DBUG_EXECUTE_IF( + "Compaction::construct_skip_inverted_index_index_file_reader_init_" + "status_not_ok", + { + st = Status::Error( + "debug point: " + "construct_skip_inverted_index_index_file_reader_init_" + "status_" + "not_ok"); + }) if (!st.ok()) { LOG(WARNING) << "init index " << index_file_path << " error:" << st; return false; @@ -845,6 +804,14 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { // check index meta auto result = inverted_index_file_reader->open(index_meta); + DBUG_EXECUTE_IF( + "Compaction::construct_skip_inverted_index_index_file_reader_open_" + "error", + { + result = ResultError( + Status::Error( + "CLuceneError occur when open idx file")); + }) if (!result.has_value()) { LOG(WARNING) << "open index " << index_file_path << " error:" << result.error(); @@ -854,6 +821,12 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { std::vector files; reader->list(&files); reader->close(); + DBUG_EXECUTE_IF( + "Compaction::construct_skip_inverted_index_index_reader_close_error", + { _CLTHROWA(CL_ERR_IO, "debug point: reader close error"); }) + + DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_index_files_count", + { files.clear(); }) // why is 3? // slice type index file at least has 3 files: null_bitmap, segments_N, segments.gen @@ -887,9 +860,7 @@ Status CompactionMixin::construct_output_rowset_writer(RowsetWriterContext& ctx) if (config::inverted_index_compaction_enable && (((_tablet->keys_type() == KeysType::UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write()) || - _tablet->keys_type() == KeysType::DUP_KEYS)) && - _cur_tablet_schema->get_inverted_index_storage_format() == - InvertedIndexStorageFormatPB::V1) { + _tablet->keys_type() == KeysType::DUP_KEYS))) { construct_index_compaction_columns(ctx); } ctx.version = _output_version; @@ -898,6 +869,7 @@ Status CompactionMixin::construct_output_rowset_writer(RowsetWriterContext& ctx) ctx.tablet_schema = _cur_tablet_schema; ctx.newest_write_timestamp = _newest_write_timestamp; ctx.write_type = DataWriteType::TYPE_COMPACTION; + ctx.storage_page_size = _tablet->tablet_meta()->storage_page_size(); _output_rs_writer = DORIS_TRY(_tablet->create_rowset_writer(ctx, _is_vertical)); _pending_rs_guard = _engine.add_pending_rowset(ctx); return Status::OK(); @@ -1156,8 +1128,6 @@ Status CloudCompactionMixin::execute_compact_impl(int64_t permits) { RETURN_IF_ERROR(merge_input_rowsets()); - RETURN_IF_ERROR(do_inverted_index_compaction()); - RETURN_IF_ERROR(_engine.meta_mgr().commit_rowset(*_output_rowset->rowset_meta().get())); // 4. modify rowsets in memory @@ -1184,9 +1154,7 @@ Status CloudCompactionMixin::construct_output_rowset_writer(RowsetWriterContext& if (config::inverted_index_compaction_enable && (((_tablet->keys_type() == KeysType::UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write()) || - _tablet->keys_type() == KeysType::DUP_KEYS)) && - _cur_tablet_schema->get_inverted_index_storage_format() == - InvertedIndexStorageFormatPB::V1) { + _tablet->keys_type() == KeysType::DUP_KEYS))) { construct_index_compaction_columns(ctx); } @@ -1204,6 +1172,7 @@ Status CloudCompactionMixin::construct_output_rowset_writer(RowsetWriterContext& ctx.tablet_schema = _cur_tablet_schema; ctx.newest_write_timestamp = _newest_write_timestamp; ctx.write_type = DataWriteType::TYPE_COMPACTION; + ctx.storage_page_size = _tablet->tablet_meta()->storage_page_size(); auto compaction_policy = _tablet->tablet_meta()->compaction_policy(); if (_tablet->tablet_meta()->time_series_compaction_level_threshold() >= 2) { diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h index 08afe840280ecfb..06ef4268529247b 100644 --- a/be/src/olap/compaction.h +++ b/be/src/olap/compaction.h @@ -67,6 +67,7 @@ class Compaction { protected: Status merge_input_rowsets(); + // merge inverted index files Status do_inverted_index_compaction(); void construct_index_compaction_columns(RowsetWriterContext& ctx); @@ -89,10 +90,11 @@ class Compaction { BaseTabletSPtr _tablet; std::vector _input_rowsets; - int64_t _input_rowsets_size {0}; + int64_t _input_rowsets_data_size {0}; + int64_t _input_rowsets_index_size {0}; + int64_t _input_rowsets_total_size {0}; int64_t _input_row_num {0}; int64_t _input_num_segments {0}; - int64_t _input_index_size {0}; Merger::Statistics _stats; diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index b762468b3455a47..b961c694ede4d0e 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -125,7 +125,8 @@ Status CumulativeCompaction::execute_compact() { tablet()->set_last_cumu_compaction_success_time(UnixMillis()); } DorisMetrics::instance()->cumulative_compaction_deltas_total->increment(_input_rowsets.size()); - DorisMetrics::instance()->cumulative_compaction_bytes_total->increment(_input_rowsets_size); + DorisMetrics::instance()->cumulative_compaction_bytes_total->increment( + _input_rowsets_total_size); return Status::OK(); } diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 4d5b1ce9add3e0e..80fc440ce36a6db 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -346,6 +346,8 @@ Status DeleteHandler::parse_condition(const std::string& condition_str, TConditi } template + requires(std::is_same_v or + std::is_same_v) Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema, TabletSchemaSPtr delete_pred_related_schema, const RepeatedPtrField& sub_pred_list, @@ -353,10 +355,13 @@ Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema, for (const auto& sub_predicate : sub_pred_list) { TCondition condition; RETURN_IF_ERROR(parse_condition(sub_predicate, &condition)); - int32_t col_unique_id; - if constexpr (std::is_same_v) { - col_unique_id = sub_predicate.col_unique_id; - } else { + int32_t col_unique_id = -1; + if constexpr (std::is_same_v) { + if (sub_predicate.has_column_unique_id()) [[likely]] { + col_unique_id = sub_predicate.column_unique_id(); + } + } + if (col_unique_id < 0) { const auto& column = *DORIS_TRY(delete_pred_related_schema->column(condition.column_name)); col_unique_id = column.unique_id(); diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h index cc585c0abcf9f6b..77de62d31d988e1 100644 --- a/be/src/olap/delete_handler.h +++ b/be/src/olap/delete_handler.h @@ -21,6 +21,7 @@ #include #include +#include #include "common/factory_creator.h" #include "common/status.h" @@ -115,6 +116,8 @@ class DeleteHandler { private: template + requires(std::is_same_v or + std::is_same_v) Status _parse_column_pred( TabletSchemaSPtr complete_schema, TabletSchemaSPtr delete_pred_related_schema, const ::google::protobuf::RepeatedPtrField& sub_pred_list, diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index e0e3a5281bcb953..88277775f961016 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -254,7 +254,7 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(const PNodeInfo& node_info) auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema(); if (!tablet_schema->skip_write_index_on_load()) { for (auto& column : tablet_schema->columns()) { - const TabletIndex* index_meta = tablet_schema->get_inverted_index(*column); + const TabletIndex* index_meta = tablet_schema->inverted_index(*column); if (index_meta) { indices_ids.emplace_back(index_meta->index_id(), index_meta->get_index_suffix()); } diff --git a/be/src/olap/delta_writer_v2.cpp b/be/src/olap/delta_writer_v2.cpp index a6fb01544890429..b770d3c07906b36 100644 --- a/be/src/olap/delta_writer_v2.cpp +++ b/be/src/olap/delta_writer_v2.cpp @@ -125,6 +125,9 @@ Status DeltaWriterV2::init() { context.partial_update_info = _partial_update_info; context.memtable_on_sink_support_index_v2 = true; + auto tablet = DORIS_TRY(ExecEnv::GetInstance()->storage_engine().get_tablet(_req.tablet_id)); + context.storage_page_size = tablet->tablet_meta()->storage_page_size(); + _rowset_writer = std::make_shared(_streams); RETURN_IF_ERROR(_rowset_writer->init(context)); std::shared_ptr wg_sptr = nullptr; diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 3929cd31ba4382d..c88ac0cdd6cfae8 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -225,7 +225,6 @@ class InListPredicateBase : public ColumnPredicate { } int get_filter_id() const override { return _values->get_filter_id(); } - bool is_filter() const override { return true; } template void _evaluate_bit(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, diff --git a/be/src/olap/inverted_index_parser.cpp b/be/src/olap/inverted_index_parser.cpp index a9ed7ec062e1627..f7e511970d91f2b 100644 --- a/be/src/olap/inverted_index_parser.cpp +++ b/be/src/olap/inverted_index_parser.cpp @@ -128,6 +128,7 @@ std::string get_parser_ignore_above_value_from_properties( std::string get_parser_stopwords_from_properties( const std::map& properties) { + DBUG_EXECUTE_IF("inverted_index_parser.get_parser_stopwords_from_properties", { return ""; }) if (properties.find(INVERTED_INDEX_PARSER_STOPWORDS_KEY) != properties.end()) { return properties.at(INVERTED_INDEX_PARSER_STOPWORDS_KEY); } else { diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index b0ad59b6c8d15c9..e539f4a440ab0c9 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -26,6 +26,8 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_element_count, MetricUnit::NOUNIT); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage_ratio, MetricUnit::NOUNIT); DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_lookup_count, MetricUnit::OPERATIONS); DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_hit_count, MetricUnit::OPERATIONS); +DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_miss_count, MetricUnit::OPERATIONS); +DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_stampede_count, MetricUnit::OPERATIONS); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_hit_ratio, MetricUnit::NOUNIT); uint32_t CacheKey::hash(const char* data, size_t n, uint32_t seed) const { @@ -207,6 +209,16 @@ uint64_t LRUCache::get_hit_count() { return _hit_count; } +uint64_t LRUCache::get_stampede_count() { + std::lock_guard l(_mutex); + return _stampede_count; +} + +uint64_t LRUCache::get_miss_count() { + std::lock_guard l(_mutex); + return _miss_count; +} + size_t LRUCache::get_usage() { std::lock_guard l(_mutex); return _usage; @@ -290,6 +302,8 @@ Cache::Handle* LRUCache::lookup(const CacheKey& key, uint32_t hash) { e->refs++; ++_hit_count; e->last_visit_time = UnixMillis(); + } else { + ++_miss_count; } return reinterpret_cast(e); } @@ -430,6 +444,7 @@ Cache::Handle* LRUCache::insert(const CacheKey& key, uint32_t hash, void* value, auto old = _table.insert(e); _usage += e->total_size; if (old != nullptr) { + _stampede_count++; old->in_cache = false; if (_unref(old)) { _usage -= old->total_size; @@ -592,6 +607,8 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t capacity, LRUCa INT_DOUBLE_METRIC_REGISTER(_entity, cache_usage_ratio); INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_lookup_count); INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_hit_count); + INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_stampede_count); + INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_miss_count); INT_DOUBLE_METRIC_REGISTER(_entity, cache_hit_ratio); _hit_count_bvar.reset(new bvar::Adder("doris_cache", _name)); @@ -714,12 +731,17 @@ void ShardedLRUCache::update_cache_metrics() const { size_t total_lookup_count = 0; size_t total_hit_count = 0; size_t total_element_count = 0; + size_t total_miss_count = 0; + size_t total_stampede_count = 0; + for (int i = 0; i < _num_shards; i++) { capacity += _shards[i]->get_capacity(); total_usage += _shards[i]->get_usage(); total_lookup_count += _shards[i]->get_lookup_count(); total_hit_count += _shards[i]->get_hit_count(); total_element_count += _shards[i]->get_element_count(); + total_miss_count += _shards[i]->get_miss_count(); + total_stampede_count += _shards[i]->get_stampede_count(); } cache_capacity->set_value(capacity); @@ -727,6 +749,8 @@ void ShardedLRUCache::update_cache_metrics() const { cache_element_count->set_value(total_element_count); cache_lookup_count->set_value(total_lookup_count); cache_hit_count->set_value(total_hit_count); + cache_miss_count->set_value(total_miss_count); + cache_stampede_count->set_value(total_stampede_count); cache_usage_ratio->set_value(capacity == 0 ? 0 : ((double)total_usage / capacity)); cache_hit_ratio->set_value( total_lookup_count == 0 ? 0 : ((double)total_hit_count / total_lookup_count)); diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index ba2dd2b5c52c562..303a4cf2065ef96 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -350,6 +350,9 @@ class LRUCache { uint64_t get_lookup_count(); uint64_t get_hit_count(); + uint64_t get_miss_count(); + uint64_t get_stampede_count(); + size_t get_usage(); size_t get_capacity(); size_t get_element_count(); @@ -384,6 +387,8 @@ class LRUCache { uint64_t _lookup_count = 0; // number of cache lookups uint64_t _hit_count = 0; // number of cache hits + uint64_t _miss_count = 0; // number of cache misses + uint64_t _stampede_count = 0; CacheValueTimeExtractor _cache_value_time_extractor; bool _cache_value_check_timestamp = false; @@ -444,6 +449,8 @@ class ShardedLRUCache : public Cache { DoubleGauge* cache_usage_ratio = nullptr; IntAtomicCounter* cache_lookup_count = nullptr; IntAtomicCounter* cache_hit_count = nullptr; + IntAtomicCounter* cache_miss_count = nullptr; + IntAtomicCounter* cache_stampede_count = nullptr; DoubleGauge* cache_hit_ratio = nullptr; // bvars std::unique_ptr> _hit_count_bvar; diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index c1d3038050fbd40..d3bd0f0a3a24363 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -305,24 +305,22 @@ struct OlapReaderStatistics { // block_load_ns // block_init_ns // block_init_seek_ns - // block_conditions_filtered_ns - // first_read_ns - // block_first_read_seek_ns + // generate_row_ranges_ns + // predicate_column_read_ns + // predicate_column_read_seek_ns // lazy_read_ns // block_lazy_read_seek_ns int64_t block_init_ns = 0; int64_t block_init_seek_num = 0; int64_t block_init_seek_ns = 0; - int64_t first_read_ns = 0; - int64_t second_read_ns = 0; - int64_t block_first_read_seek_num = 0; - int64_t block_first_read_seek_ns = 0; + int64_t predicate_column_read_ns = 0; + int64_t non_predicate_read_ns = 0; + int64_t predicate_column_read_seek_num = 0; + int64_t predicate_column_read_seek_ns = 0; int64_t lazy_read_ns = 0; int64_t block_lazy_read_seek_num = 0; int64_t block_lazy_read_seek_ns = 0; - int64_t block_convert_ns = 0; - int64_t raw_rows_read = 0; int64_t rows_vec_cond_filtered = 0; @@ -351,11 +349,10 @@ struct OlapReaderStatistics { int64_t rows_del_by_bitmap = 0; // the number of rows filtered by various column indexes. int64_t rows_conditions_filtered = 0; - int64_t block_conditions_filtered_ns = 0; - int64_t block_conditions_filtered_bf_ns = 0; - int64_t block_conditions_filtered_zonemap_ns = 0; - int64_t block_conditions_filtered_zonemap_rp_ns = 0; - int64_t block_conditions_filtered_dict_ns = 0; + int64_t generate_row_ranges_ns = 0; + int64_t generate_row_ranges_by_bf_ns = 0; + int64_t generate_row_ranges_by_zonemap_ns = 0; + int64_t generate_row_ranges_by_dict_ns = 0; int64_t index_load_ns = 0; @@ -372,7 +369,6 @@ struct OlapReaderStatistics { int64_t inverted_index_query_cache_miss = 0; int64_t inverted_index_query_null_bitmap_timer = 0; int64_t inverted_index_query_bitmap_copy_timer = 0; - int64_t inverted_index_query_bitmap_op_timer = 0; int64_t inverted_index_searcher_open_timer = 0; int64_t inverted_index_searcher_search_timer = 0; int64_t inverted_index_searcher_cache_hit = 0; diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 020d151d16b849a..8fae8887d7a7722 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -78,6 +78,7 @@ #include "runtime/memory/cache_manager.h" #include "runtime/memory/global_memory_arbitrator.h" #include "util/countdown_latch.h" +#include "util/debug_points.h" #include "util/doris_metrics.h" #include "util/mem_info.h" #include "util/thread.h" @@ -1134,6 +1135,8 @@ Status StorageEngine::submit_seg_compaction_task(std::shared_ptrget_tablet(tablet_id); + DBUG_EXECUTE_IF("StorageEngine::process_index_change_task_tablet_nullptr", + { tablet = nullptr; }) if (tablet == nullptr) { LOG(WARNING) << "tablet: " << tablet_id << " not exist"; return Status::InternalError("tablet not exist, tablet_id={}.", tablet_id); diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp index d3554cae15d66a4..9d40ff5a8fad51b 100644 --- a/be/src/olap/primary_key_index.cpp +++ b/be/src/olap/primary_key_index.cpp @@ -17,6 +17,7 @@ #include "olap/primary_key_index.h" +#include #include #include @@ -95,7 +96,8 @@ Status PrimaryKeyIndexReader::parse_index(io::FileReaderSPtr file_reader, // parse primary key index _index_reader.reset(new segment_v2::IndexedColumnReader(file_reader, meta.primary_key_index())); _index_reader->set_is_pk_index(true); - RETURN_IF_ERROR(_index_reader->load(!config::disable_pk_storage_page_cache, false)); + RETURN_IF_ERROR(_index_reader->load(!config::disable_pk_storage_page_cache, false, + _pk_index_load_stats)); _index_parsed = true; return Status::OK(); @@ -107,7 +109,8 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader, segment_v2::ColumnIndexMetaPB column_index_meta = meta.bloom_filter_index(); segment_v2::BloomFilterIndexReader bf_index_reader(std::move(file_reader), column_index_meta.bloom_filter_index()); - RETURN_IF_ERROR(bf_index_reader.load(!config::disable_pk_storage_page_cache, false)); + RETURN_IF_ERROR(bf_index_reader.load(!config::disable_pk_storage_page_cache, false, + _pk_index_load_stats)); std::unique_ptr bf_iter; RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter)); RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf)); diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index b5eb13131b73a09..dcbbc5f30625f4e 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -25,6 +25,7 @@ #include "common/status.h" #include "io/fs/file_reader_writer_fwd.h" +#include "olap/olap_common.h" #include "olap/rowset/segment_v2/bloom_filter.h" #include "olap/rowset/segment_v2/bloom_filter_index_writer.h" #include "olap/rowset/segment_v2/indexed_column_reader.h" @@ -97,7 +98,8 @@ class PrimaryKeyIndexBuilder { class PrimaryKeyIndexReader { public: - PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {} + PrimaryKeyIndexReader(OlapReaderStatistics* pk_index_load_stats = nullptr) + : _index_parsed(false), _bf_parsed(false), _pk_index_load_stats(pk_index_load_stats) {} ~PrimaryKeyIndexReader() { segment_v2::g_pk_total_bloom_filter_num << -static_cast(_bf_num); @@ -111,9 +113,10 @@ class PrimaryKeyIndexReader { Status parse_bf(io::FileReaderSPtr file_reader, const segment_v2::PrimaryKeyIndexMetaPB& meta); - Status new_iterator(std::unique_ptr* index_iterator) const { + Status new_iterator(std::unique_ptr* index_iterator, + OlapReaderStatistics* stats = nullptr) const { DCHECK(_index_parsed); - index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get())); + index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get(), stats)); return Status::OK(); } @@ -152,6 +155,7 @@ class PrimaryKeyIndexReader { std::unique_ptr _bf; size_t _bf_num = 0; uint64 _bf_bytes = 0; + OlapReaderStatistics* _pk_index_load_stats = nullptr; }; } // namespace doris diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 575b002b2f6086b..56d167459f5be7e 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -471,7 +471,7 @@ Status PushBrokerReader::_init_src_block() { } Status PushBrokerReader::_cast_to_input_block() { - size_t idx = 0; + uint32_t idx = 0; for (auto& slot_desc : _src_slot_descs) { if (_name_to_col_type.find(slot_desc->col_name()) == _name_to_col_type.end()) { continue; diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 209aca7fb03b4c7..bbb2ca72b4ae7fb 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -81,12 +81,7 @@ Status BetaRowset::get_inverted_index_size(size_t* index_size) { } if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - auto indices = _schema->indexes(); - for (auto& index : indices) { - // only get file_size for inverted index - if (index.index_type() != IndexType::INVERTED) { - continue; - } + for (const auto& index : _schema->inverted_indexes()) { for (int seg_id = 0; seg_id < num_segments(); ++seg_id) { auto seg_path = DORIS_TRY(segment_path(seg_id)); int64_t file_size = 0; @@ -94,7 +89,7 @@ Status BetaRowset::get_inverted_index_size(size_t* index_size) { std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(seg_path), - index.index_id(), index.get_index_suffix()); + index->index_id(), index->get_index_suffix()); RETURN_IF_ERROR(fs->file_size(inverted_index_file_path, &file_size)); *index_size += file_size; } @@ -122,7 +117,7 @@ void BetaRowset::clear_inverted_index_cache() { auto index_path_prefix = InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path); for (const auto& column : tablet_schema()->columns()) { - const TabletIndex* index_meta = tablet_schema()->get_inverted_index(*column); + const TabletIndex* index_meta = tablet_schema()->inverted_index(*column); if (index_meta) { auto inverted_index_file_cache_key = InvertedIndexDescriptor::get_index_file_cache_key( @@ -183,8 +178,9 @@ Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* se .file_size = _rowset_meta->segment_file_size(seg_id), }; - auto s = segment_v2::Segment::open(fs, seg_path, seg_id, rowset_id(), _schema, reader_options, - segment, _rowset_meta->inverted_index_file_info(seg_id)); + auto s = segment_v2::Segment::open(fs, seg_path, _rowset_meta->tablet_id(), seg_id, rowset_id(), + _schema, reader_options, segment, + _rowset_meta->inverted_index_file_info(seg_id)); if (!s.ok()) { LOG(WARNING) << "failed to open segment. " << seg_path << " under rowset " << rowset_id() << " : " << s.to_string(); @@ -226,7 +222,7 @@ Status BetaRowset::remove() { if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (auto& column : _schema->columns()) { - const TabletIndex* index_meta = _schema->get_inverted_index(*column); + const TabletIndex* index_meta = _schema->inverted_index(*column); if (index_meta) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( @@ -310,22 +306,19 @@ Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id, return status; }); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : _schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : _schema->inverted_indexes()) { + auto index_id = index->index_id(); if (without_index_uids != nullptr && without_index_uids->count(index_id)) { continue; } std::string inverted_index_src_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(src_path), - index_id, index.get_index_suffix()); + index_id, index->get_index_suffix()); std::string inverted_index_dst_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(dst_path), - index_id, index.get_index_suffix()); + index_id, index->get_index_suffix()); bool index_file_exists = true; RETURN_IF_ERROR(local_fs->exists(inverted_index_src_file_path, &index_file_exists)); if (index_file_exists) { @@ -404,7 +397,7 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->get_inverted_index(*column); + const TabletIndex* index_meta = _schema->inverted_index(*column); if (index_meta) { std::string inverted_index_src_file_path = InvertedIndexDescriptor::get_index_file_path_v1( @@ -463,7 +456,7 @@ Status BetaRowset::upload_to(const StorageResource& dest_fs, const RowsetId& new if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->get_inverted_index(*column); + const TabletIndex* index_meta = _schema->inverted_index(*column); if (index_meta) { std::string remote_inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( @@ -497,7 +490,7 @@ Status BetaRowset::upload_to(const StorageResource& dest_fs, const RowsetId& new auto st = dest_fs.fs->batch_upload(local_paths, dest_paths); if (st.ok()) { DorisMetrics::instance()->upload_rowset_count->increment(1); - DorisMetrics::instance()->upload_total_byte->increment(data_disk_size()); + DorisMetrics::instance()->upload_total_byte->increment(total_disk_size()); } else { DorisMetrics::instance()->upload_fail_count->increment(1); } @@ -543,8 +536,8 @@ Status BetaRowset::check_current_rowset_segment() { .file_size = _rowset_meta->segment_file_size(seg_id), }; - auto s = segment_v2::Segment::open(fs, seg_path, seg_id, rowset_id(), _schema, - reader_options, &segment, + auto s = segment_v2::Segment::open(fs, seg_path, _rowset_meta->tablet_id(), seg_id, + rowset_id(), _schema, reader_options, &segment, _rowset_meta->inverted_index_file_info(seg_id)); if (!s.ok()) { LOG(WARNING) << "segment can not be opened. file=" << seg_path; @@ -612,14 +605,11 @@ Status BetaRowset::add_to_binlog() { linked_success_files.push_back(binlog_file); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : _schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : _schema->inverted_indexes()) { + auto index_id = index->index_id(); auto index_file = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(seg_file), index_id, - index.get_index_suffix()); + index->get_index_suffix()); auto binlog_index_file = (std::filesystem::path(binlog_dir) / std::filesystem::path(index_file).filename()) .string(); @@ -660,7 +650,7 @@ Status BetaRowset::calc_file_crc(uint32_t* crc_value, int64_t* file_count) { file_paths.emplace_back(seg_path); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (auto& column : _schema->columns()) { - const TabletIndex* index_meta = _schema->get_inverted_index(*column); + const TabletIndex* index_meta = _schema->inverted_index(*column); if (index_meta) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( @@ -804,7 +794,7 @@ Status BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value, } else { rapidjson::Value indices(rapidjson::kArrayType); for (auto column : _rowset_meta->tablet_schema()->columns()) { - const auto* index_meta = _rowset_meta->tablet_schema()->get_inverted_index(*column); + const auto* index_meta = _rowset_meta->tablet_schema()->inverted_index(*column); if (index_meta == nullptr) { continue; } diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 5d1b80f8cd7b235..198b4e8595ed207 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -81,7 +81,7 @@ void build_rowset_meta_with_spec_field(RowsetMeta& rowset_meta, const RowsetMeta& spec_rowset_meta) { rowset_meta.set_num_rows(spec_rowset_meta.num_rows()); rowset_meta.set_total_disk_size(spec_rowset_meta.total_disk_size()); - rowset_meta.set_data_disk_size(spec_rowset_meta.total_disk_size()); + rowset_meta.set_data_disk_size(spec_rowset_meta.data_disk_size()); rowset_meta.set_index_disk_size(spec_rowset_meta.index_disk_size()); // TODO write zonemap to meta rowset_meta.set_empty(spec_rowset_meta.num_rows() == 0); @@ -189,13 +189,67 @@ Result> SegmentFileCollection::segments_file_size(int seg_id return ResultError(st); } +InvertedIndexFileCollection::~InvertedIndexFileCollection() = default; + +Status InvertedIndexFileCollection::add(int seg_id, InvertedIndexFileWriterPtr&& index_writer) { + std::lock_guard lock(_lock); + if (_inverted_index_file_writers.find(seg_id) != _inverted_index_file_writers.end()) + [[unlikely]] { + DCHECK(false); + return Status::InternalError("The seg_id already exists, seg_id is {}", seg_id); + } + _inverted_index_file_writers.emplace(seg_id, std::move(index_writer)); + return Status::OK(); +} + +Status InvertedIndexFileCollection::close() { + std::lock_guard lock(_lock); + for (auto&& [id, writer] : _inverted_index_file_writers) { + RETURN_IF_ERROR(writer->close()); + _total_size += writer->get_index_file_total_size(); + } + + return Status::OK(); +} + +Result> +InvertedIndexFileCollection::inverted_index_file_info(int seg_id_offset) { + std::lock_guard lock(_lock); + + Status st; + std::vector idx_file_info(_inverted_index_file_writers.size()); + bool succ = std::all_of( + _inverted_index_file_writers.begin(), _inverted_index_file_writers.end(), + [&](auto&& it) { + auto&& [seg_id, writer] = it; + + int idx = seg_id - seg_id_offset; + if (idx >= idx_file_info.size()) [[unlikely]] { + auto err_msg = + fmt::format("invalid seg_id={} num_file_writers={} seg_id_offset={}", + seg_id, idx_file_info.size(), seg_id_offset); + DCHECK(false) << err_msg; + st = Status::InternalError(err_msg); + return false; + } + idx_file_info[idx] = _inverted_index_file_writers[seg_id]->get_index_file_info(); + return true; + }); + + if (succ) { + return idx_file_info; + } + + return ResultError(st); +} + BaseBetaRowsetWriter::BaseBetaRowsetWriter() : _num_segment(0), _segment_start_id(0), _num_rows_written(0), _total_data_size(0), _total_index_size(0), - _segment_creator(_context, _seg_files, _idx_files_info) {} + _segment_creator(_context, _seg_files, _idx_files) {} BetaRowsetWriter::BetaRowsetWriter(StorageEngine& engine) : _engine(engine), _segcompaction_worker(std::make_shared(this)) {} @@ -282,8 +336,7 @@ Status BaseBetaRowsetWriter::_generate_delete_bitmap(int32_t segment_id) { LOG(INFO) << "[Memtable Flush] construct delete bitmap tablet: " << _context.tablet->tablet_id() << ", rowset_ids: " << _context.mow_context->rowset_ids.size() << ", cur max_version: " << _context.mow_context->max_version - << ", transaction_id: " << _context.mow_context->txn_id << ", delete_bitmap_count: " - << _context.tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count() + << ", transaction_id: " << _context.mow_context->txn_id << ", cost: " << watch.get_elapse_time_us() << "(us), total rows: " << total_rows; return Status::OK(); } @@ -315,7 +368,8 @@ Status BetaRowsetWriter::_load_noncompacted_segment(segment_v2::SegmentSharedPtr .is_doris_table = true, .cache_base_path {}, }; - auto s = segment_v2::Segment::open(io::global_local_filesystem(), path, segment_id, rowset_id(), + auto s = segment_v2::Segment::open(io::global_local_filesystem(), path, + _rowset_meta->tablet_id(), segment_id, rowset_id(), _context.tablet_schema, reader_options, &segment); if (!s.ok()) { LOG(WARNING) << "failed to open segment. " << path << ":" << s; @@ -493,8 +547,8 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t begin, int64_t end, u } // rename remaining inverted index files for (auto column : _context.tablet_schema->columns()) { - if (_context.tablet_schema->has_inverted_index(*column)) { - const auto* index_info = _context.tablet_schema->get_inverted_index(*column); + if (const auto& index_info = _context.tablet_schema->inverted_index(*column); + index_info != nullptr) { auto index_id = index_info->index_id(); if (_context.tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { @@ -728,7 +782,6 @@ Status BetaRowsetWriter::_close_file_writers() { Status BetaRowsetWriter::build(RowsetSharedPtr& rowset) { RETURN_IF_ERROR(_close_file_writers()); - const auto total_segment_num = _num_segment - _segcompacted_point + 1 + _num_segcompacted; RETURN_NOT_OK_STATUS_WITH_WARN(_check_segment_number_limit(total_segment_num), "too many segments when build new rowset"); @@ -748,12 +801,15 @@ Status BetaRowsetWriter::build(RowsetSharedPtr& rowset) { : _context.tablet_schema; _rowset_meta->set_tablet_schema(rowset_schema); - if (auto idx_files_info = _idx_files_info.get_inverted_files_info(_segment_start_id); - !idx_files_info.has_value()) [[unlikely]] { - LOG(ERROR) << "expected inverted index files info, but none presents: " - << idx_files_info.error(); - } else { - _rowset_meta->add_inverted_index_files_info(idx_files_info.value()); + // If segment compaction occurs, the idx file info will become inaccurate. + if (rowset_schema->has_inverted_index() && _num_segcompacted == 0) { + if (auto idx_files_info = _idx_files.inverted_index_file_info(_segment_start_id); + !idx_files_info.has_value()) [[unlikely]] { + LOG(ERROR) << "expected inverted index files info, but none presents: " + << idx_files_info.error(); + } else { + _rowset_meta->add_inverted_index_files_info(idx_files_info.value()); + } } RETURN_NOT_OK_STATUS_WITH_WARN(RowsetFactory::create_rowset(rowset_schema, _context.tablet_path, @@ -830,7 +886,8 @@ Status BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool ch rowset_meta->set_num_segments(segment_num); rowset_meta->set_num_rows(num_rows_written + _num_rows_written); - rowset_meta->set_total_disk_size(total_data_size + _total_data_size); + rowset_meta->set_total_disk_size(total_data_size + _total_data_size + total_index_size + + _total_index_size); rowset_meta->set_data_disk_size(total_data_size + _total_data_size); rowset_meta->set_index_disk_size(total_index_size + _total_index_size); rowset_meta->set_segments_key_bounds(segments_encoded_key_bounds); @@ -891,7 +948,15 @@ Status BaseBetaRowsetWriter::create_file_writer(uint32_t segment_id, io::FileWri fmt::format("failed to create file = {}, file type = {}", segment_path, file_type)); } -Status BetaRowsetWriter::_create_segment_writer_for_segcompaction( +Status BaseBetaRowsetWriter::create_inverted_index_file_writer( + uint32_t segment_id, InvertedIndexFileWriterPtr* index_file_writer) { + RETURN_IF_ERROR(RowsetWriter::create_inverted_index_file_writer(segment_id, index_file_writer)); + // used for inverted index format v1 + (*index_file_writer)->set_file_writer_opts(_context.get_file_writer_options()); + return Status::OK(); +} + +Status BetaRowsetWriter::create_segment_writer_for_segcompaction( std::unique_ptr* writer, int64_t begin, int64_t end) { DCHECK(begin >= 0 && end >= 0); std::string path = BetaRowset::local_segment_path_segcompacted(_context.tablet_path, @@ -899,6 +964,22 @@ Status BetaRowsetWriter::_create_segment_writer_for_segcompaction( io::FileWriterPtr file_writer; RETURN_IF_ERROR(_create_file_writer(path, file_writer)); + InvertedIndexFileWriterPtr index_file_writer; + if (_context.tablet_schema->has_inverted_index()) { + io::FileWriterPtr idx_file_writer; + if (_context.tablet_schema->get_inverted_index_storage_format() != + InvertedIndexStorageFormatPB::V1) { + std::string prefix = + std::string {InvertedIndexDescriptor::get_index_file_path_prefix(path)}; + std::string index_path = InvertedIndexDescriptor::get_index_file_path_v2(prefix); + RETURN_IF_ERROR(_create_file_writer(index_path, idx_file_writer)); + } + index_file_writer = std::make_unique( + _context.fs(), path, _context.rowset_id.to_string(), _num_segcompacted, + _context.tablet_schema->get_inverted_index_storage_format(), + std::move(idx_file_writer)); + } + segment_v2::SegmentWriterOptions writer_options; writer_options.enable_unique_key_merge_on_write = _context.enable_unique_key_merge_on_write; writer_options.rowset_ctx = &_context; @@ -907,15 +988,19 @@ Status BetaRowsetWriter::_create_segment_writer_for_segcompaction( writer_options.max_rows_per_segment = _context.max_rows_per_segment; writer_options.mow_ctx = _context.mow_context; - *writer = std::make_unique(file_writer.get(), _num_segcompacted, - _context.tablet_schema, _context.tablet, - _context.data_dir, writer_options); + *writer = std::make_unique( + file_writer.get(), _num_segcompacted, _context.tablet_schema, _context.tablet, + _context.data_dir, writer_options, index_file_writer.get()); if (auto& seg_writer = _segcompaction_worker->get_file_writer(); seg_writer != nullptr && seg_writer->state() != io::FileWriter::State::CLOSED) { RETURN_IF_ERROR(_segcompaction_worker->get_file_writer()->close()); } _segcompaction_worker->get_file_writer().reset(file_writer.release()); - + if (auto& idx_file_writer = _segcompaction_worker->get_inverted_index_file_writer(); + idx_file_writer != nullptr) { + RETURN_IF_ERROR(idx_file_writer->close()); + } + _segcompaction_worker->get_inverted_index_file_writer().reset(index_file_writer.release()); return Status::OK(); } @@ -1005,11 +1090,13 @@ Status BetaRowsetWriter::flush_segment_writer_for_segcompaction( return Status::Error("failed to finalize segment: {}", s.to_string()); } + int64_t inverted_index_file_size = 0; + RETURN_IF_ERROR((*writer)->close_inverted_index(&inverted_index_file_size)); SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + (*writer)->get_inverted_index_total_size(); - segstat.index_size = index_size + (*writer)->get_inverted_index_total_size(); + segstat.data_size = segment_size; + segstat.index_size = inverted_index_file_size; segstat.key_bounds = key_bounds; { std::lock_guard lock(_segid_statistics_map_mutex); diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index 82e4c9409b4853d..d96301af22630d1 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -42,6 +42,7 @@ #include "olap/rowset/rowset_writer.h" #include "olap/rowset/rowset_writer_context.h" #include "olap/rowset/segment_creator.h" +#include "segment_v2/inverted_index_file_writer.h" #include "segment_v2/segment.h" #include "util/spinlock.h" @@ -84,58 +85,33 @@ class SegmentFileCollection { bool _closed {false}; }; -// Collect the size of the inverted index files -class InvertedIndexFilesInfo { +class InvertedIndexFileCollection { public: + ~InvertedIndexFileCollection(); + + // `seg_id` -> inverted index file writer + Status add(int seg_id, InvertedIndexFileWriterPtr&& writer); + + // Close all file writers + // If the inverted index file writer is not closed, an error will be thrown during destruction + Status close(); + // Get inverted index file info in segment id order. - // Return the info of inverted index files from seg_id_offset to the last one. - Result> get_inverted_files_info(int seg_id_offset) { - std::lock_guard lock(_lock); - - Status st; - std::vector inverted_files_info(_inverted_index_files_info.size()); - bool succ = std::all_of( - _inverted_index_files_info.begin(), _inverted_index_files_info.end(), - [&](auto&& it) { - auto&& [seg_id, info] = it; - - int idx = seg_id - seg_id_offset; - if (idx >= inverted_files_info.size()) [[unlikely]] { - auto err_msg = fmt::format( - "invalid seg_id={} num_inverted_files_info={} seg_id_offset={}", - seg_id, inverted_files_info.size(), seg_id_offset); - DCHECK(false) << err_msg; - st = Status::InternalError(err_msg); - return false; - } - - auto& finfo = inverted_files_info[idx]; - if (finfo.has_index_size() || finfo.index_info_size() > 0) [[unlikely]] { - // File size should not been set - auto err_msg = fmt::format("duplicate seg_id={}", seg_id); - DCHECK(false) << err_msg; - st = Status::InternalError(err_msg); - return false; - } - finfo = info; - return true; - }); - - if (succ) { - return inverted_files_info; - } - - return ResultError(st); - } + // `seg_id_offset` is the offset of the segment id relative to the subscript of `_inverted_index_file_writers`, + // for more details, see `Tablet::create_transient_rowset_writer`. + Result> inverted_index_file_info(int seg_id_offset); - void add_file_info(int seg_id, InvertedIndexFileInfo file_info) { - std::lock_guard lock(_lock); - _inverted_index_files_info.emplace(seg_id, file_info); + // return all inverted index file writers + std::unordered_map& get_file_writers() { + return _inverted_index_file_writers; } + int64_t get_total_index_size() const { return _total_size; } + private: - std::unordered_map _inverted_index_files_info; mutable SpinLock _lock; + std::unordered_map _inverted_index_file_writers; + int64_t _total_size = 0; }; class BaseBetaRowsetWriter : public RowsetWriter { @@ -156,6 +132,9 @@ class BaseBetaRowsetWriter : public RowsetWriter { Status create_file_writer(uint32_t segment_id, io::FileWriterPtr& writer, FileType file_type = FileType::SEGMENT_FILE) override; + Status create_inverted_index_file_writer(uint32_t segment_id, + InvertedIndexFileWriterPtr* writer) override; + Status add_segment(uint32_t segment_id, const SegmentStatistics& segstat, TabletSchemaSPtr flush_schema) override; @@ -215,7 +194,9 @@ class BaseBetaRowsetWriter : public RowsetWriter { return _seg_files.get_file_writers(); } - InvertedIndexFilesInfo& get_inverted_index_files_info() { return _idx_files_info; } + std::unordered_map& inverted_index_file_writers() { + return this->_idx_files.get_file_writers(); + } private: void update_rowset_schema(TabletSchemaSPtr flush_schema); @@ -235,6 +216,15 @@ class BaseBetaRowsetWriter : public RowsetWriter { std::lock_guard l(_segid_statistics_map_mutex); return std::accumulate(_segment_num_rows.begin(), _segment_num_rows.end(), uint64_t(0)); } + // Only during vertical compaction is this method called + // Some index files are written during normal compaction and some files are written during index compaction. + // After all index writes are completed, call this method to write the final compound index file. + Status _close_inverted_index_file_writers() { + RETURN_NOT_OK_STATUS_WITH_WARN(_idx_files.close(), + "failed to close index file when build new rowset"); + this->_total_index_size += _idx_files.get_total_index_size(); + return Status::OK(); + } std::atomic _num_segment; // number of consecutive flushed segments roaring::Roaring _segment_set; // bitmap set to record flushed segment id @@ -242,6 +232,7 @@ class BaseBetaRowsetWriter : public RowsetWriter { int32_t _segment_start_id; // basic write start from 0, partial update may be different SegmentFileCollection _seg_files; + InvertedIndexFileCollection _idx_files; // record rows number of every segment already written, using for rowid // conversion when compaction in unique key with MoW model @@ -269,9 +260,6 @@ class BaseBetaRowsetWriter : public RowsetWriter { int64_t _delete_bitmap_ns = 0; int64_t _segment_writer_ns = 0; - - // map - InvertedIndexFilesInfo _idx_files_info; }; class SegcompactionWorker; @@ -293,6 +281,8 @@ class BetaRowsetWriter : public BaseBetaRowsetWriter { Status flush_segment_writer_for_segcompaction( std::unique_ptr* writer, uint64_t index_size, KeyBoundsPB& key_bounds); + Status create_segment_writer_for_segcompaction( + std::unique_ptr* writer, int64_t begin, int64_t end); bool is_segcompacted() const { return _num_segcompacted > 0; } @@ -303,8 +293,6 @@ class BetaRowsetWriter : public BaseBetaRowsetWriter { Status _check_segment_number_limit(size_t segnum) override; int64_t _num_seg() const override; Status _wait_flying_segcompaction(); - Status _create_segment_writer_for_segcompaction( - std::unique_ptr* writer, int64_t begin, int64_t end); Status _segcompaction_if_necessary(); Status _segcompaction_rename_last_segments(); Status _load_noncompacted_segment(segment_v2::SegmentSharedPtr& segment, int32_t segment_id); diff --git a/be/src/olap/rowset/beta_rowset_writer_v2.cpp b/be/src/olap/rowset/beta_rowset_writer_v2.cpp index 0d0ad435b9efd1b..cb5dd5a5ee272d1 100644 --- a/be/src/olap/rowset/beta_rowset_writer_v2.cpp +++ b/be/src/olap/rowset/beta_rowset_writer_v2.cpp @@ -58,7 +58,7 @@ namespace doris { using namespace ErrorCode; BetaRowsetWriterV2::BetaRowsetWriterV2(const std::vector>& streams) - : _segment_creator(_context, _seg_files, _idx_files_info), _streams(streams) {} + : _segment_creator(_context, _seg_files, _idx_files), _streams(streams) {} BetaRowsetWriterV2::~BetaRowsetWriterV2() = default; diff --git a/be/src/olap/rowset/beta_rowset_writer_v2.h b/be/src/olap/rowset/beta_rowset_writer_v2.h index a9e41e603cef63f..78ec4a7dce703c4 100644 --- a/be/src/olap/rowset/beta_rowset_writer_v2.h +++ b/be/src/olap/rowset/beta_rowset_writer_v2.h @@ -154,11 +154,10 @@ class BetaRowsetWriterV2 : public RowsetWriter { std::vector _segments_encoded_key_bounds; SegmentFileCollection _seg_files; + InvertedIndexFileCollection _idx_files; SegmentCreator _segment_creator; - InvertedIndexFilesInfo _idx_files_info; - fmt::memory_buffer vlog_buffer; std::vector> _streams; diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index 24e660cd2f72101..e1a2347f6aeaa8b 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -149,7 +149,8 @@ class Rowset : public std::enable_shared_from_this { int64_t start_version() const { return rowset_meta()->version().first; } int64_t end_version() const { return rowset_meta()->version().second; } size_t index_disk_size() const { return rowset_meta()->index_disk_size(); } - size_t data_disk_size() const { return rowset_meta()->total_disk_size(); } + size_t data_disk_size() const { return rowset_meta()->data_disk_size(); } + size_t total_disk_size() const { return rowset_meta()->total_disk_size(); } bool empty() const { return rowset_meta()->empty(); } bool zero_num_rows() const { return rowset_meta()->num_rows() == 0; } size_t num_rows() const { return rowset_meta()->num_rows(); } diff --git a/be/src/olap/rowset/rowset_meta.cpp b/be/src/olap/rowset/rowset_meta.cpp index 1843fb8a41ee083..6bed5e800ede4dd 100644 --- a/be/src/olap/rowset/rowset_meta.cpp +++ b/be/src/olap/rowset/rowset_meta.cpp @@ -226,6 +226,7 @@ void RowsetMeta::merge_rowset_meta(const RowsetMeta& other) { set_data_disk_size(data_disk_size() + other.data_disk_size()); set_total_disk_size(total_disk_size() + other.total_disk_size()); set_index_disk_size(index_disk_size() + other.index_disk_size()); + set_total_disk_size(data_disk_size() + index_disk_size()); for (auto&& key_bound : other.get_segments_key_bounds()) { add_segment_key_bounds(key_bound); } @@ -273,20 +274,14 @@ InvertedIndexFileInfo RowsetMeta::inverted_index_file_info(int seg_id) { } void RowsetMeta::add_inverted_index_files_info( - const std::vector& idx_file_info) { + const std::vector& idx_file_info) { _rowset_meta_pb.set_enable_inverted_index_file_info(true); for (auto finfo : idx_file_info) { auto* new_file_info = _rowset_meta_pb.add_inverted_index_file_info(); - *new_file_info = finfo; + *new_file_info = *finfo; } } -void RowsetMeta::update_inverted_index_files_info( - const std::vector& idx_file_info) { - _rowset_meta_pb.clear_inverted_index_file_info(); - add_inverted_index_files_info(idx_file_info); -} - bool operator==(const RowsetMeta& a, const RowsetMeta& b) { if (a._rowset_id != b._rowset_id) return false; if (a._is_removed_from_rowset_meta != b._is_removed_from_rowset_meta) return false; diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h index 164d42cbb16230a..46121aeae2be6de 100644 --- a/be/src/olap/rowset/rowset_meta.h +++ b/be/src/olap/rowset/rowset_meta.h @@ -364,9 +364,8 @@ class RowsetMeta : public MetadataAdder { return _rowset_meta_pb.inverted_index_file_info(); } - void add_inverted_index_files_info(const std::vector& idx_file_info); - - void update_inverted_index_files_info(const std::vector& idx_file_info); + void add_inverted_index_files_info( + const std::vector& idx_file_info); int64_t get_metadata_size() const override; diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index 43a84acea02f79f..fd4fe7a18234f1a 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -77,14 +77,12 @@ struct RowsetReaderContext { const DeleteBitmap* delete_bitmap = nullptr; bool record_rowids = false; RowIdConversion* rowid_conversion; - bool is_vertical_compaction = false; bool is_key_column_group = false; const std::set* output_columns = nullptr; RowsetId rowset_id; // slots that cast may be eliminated in storage layer std::map target_cast_type_for_variants; int64_t ttl_seconds = 0; - size_t topn_limit = 0; }; } // namespace doris diff --git a/be/src/olap/rowset/rowset_writer.h b/be/src/olap/rowset/rowset_writer.h index 6861b8ab7e2ce63..ad42982488b3166 100644 --- a/be/src/olap/rowset/rowset_writer.h +++ b/be/src/olap/rowset/rowset_writer.h @@ -31,6 +31,7 @@ #include "olap/column_mapping.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_writer_context.h" +#include "olap/rowset/segment_v2/inverted_index_file_writer.h" #include "olap/tablet_fwd.h" #include "olap/tablet_schema.h" #include "vec/core/block.h" @@ -95,6 +96,24 @@ class RowsetWriter { return Status::NotSupported("RowsetWriter does not support create_file_writer"); } + virtual Status create_inverted_index_file_writer( + uint32_t segment_id, InvertedIndexFileWriterPtr* index_file_writer) { + // Create file writer for the inverted index format v2. + io::FileWriterPtr idx_file_v2_ptr; + if (_context.tablet_schema->get_inverted_index_storage_format() != + InvertedIndexStorageFormatPB::V1) { + RETURN_IF_ERROR( + create_file_writer(segment_id, idx_file_v2_ptr, FileType::INVERTED_INDEX_FILE)); + } + std::string segment_prefix {InvertedIndexDescriptor::get_index_file_path_prefix( + _context.segment_path(segment_id))}; + *index_file_writer = std::make_unique( + _context.fs(), segment_prefix, _context.rowset_id.to_string(), segment_id, + _context.tablet_schema->get_inverted_index_storage_format(), + std::move(idx_file_v2_ptr)); + return Status::OK(); + } + // explicit flush all buffered rows into segment file. // note that `add_row` could also trigger flush when certain conditions are met virtual Status flush() = 0; diff --git a/be/src/olap/rowset/rowset_writer_context.h b/be/src/olap/rowset/rowset_writer_context.h index cb0fda83e607770..df85c11cd356fe9 100644 --- a/be/src/olap/rowset/rowset_writer_context.h +++ b/be/src/olap/rowset/rowset_writer_context.h @@ -108,6 +108,7 @@ struct RowsetWriterContext { std::shared_ptr schema_lock; int64_t compaction_level = 0; + int64_t storage_page_size = segment_v2::STORAGE_PAGE_SIZE_DEFAULT_VALUE; // For local rowset std::string tablet_path; diff --git a/be/src/olap/rowset/segcompaction.cpp b/be/src/olap/rowset/segcompaction.cpp index e5d043d8a224864..330174952b622c7 100644 --- a/be/src/olap/rowset/segcompaction.cpp +++ b/be/src/olap/rowset/segcompaction.cpp @@ -165,8 +165,7 @@ Status SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e } // Delete inverted index files for (auto&& column : schema->columns()) { - if (schema->has_inverted_index(*column)) { - const auto* index_info = schema->get_inverted_index(*column); + if (const auto* index_info = schema->inverted_index(*column); index_info != nullptr) { auto index_id = index_info->index_id(); if (schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { @@ -232,7 +231,7 @@ Status SegcompactionWorker::_check_correctness(OlapReaderStatistics& reader_stat Status SegcompactionWorker::_create_segment_writer_for_segcompaction( std::unique_ptr* writer, uint32_t begin, uint32_t end) { - return _writer->_create_segment_writer_for_segcompaction(writer, begin, end); + return _writer->create_segment_writer_for_segcompaction(writer, begin, end); } Status SegcompactionWorker::_do_compact_segments(SegCompactionCandidatesSharedPtr segments) { diff --git a/be/src/olap/rowset/segcompaction.h b/be/src/olap/rowset/segcompaction.h index 54c5c3758c20c85..5ec74c0e6609635 100644 --- a/be/src/olap/rowset/segcompaction.h +++ b/be/src/olap/rowset/segcompaction.h @@ -25,6 +25,7 @@ #include "olap/merger.h" #include "olap/simple_rowid_conversion.h" #include "olap/tablet.h" +#include "segment_v2/inverted_index_file_writer.h" #include "segment_v2/segment.h" namespace doris { @@ -69,6 +70,9 @@ class SegcompactionWorker { DeleteBitmapPtr get_converted_delete_bitmap() { return _converted_delete_bitmap; } io::FileWriterPtr& get_file_writer() { return _file_writer; } + InvertedIndexFileWriterPtr& get_inverted_index_file_writer() { + return _inverted_index_file_writer; + } // set the cancel flag, tasks already started will not be cancelled. bool cancel(); @@ -96,6 +100,7 @@ class SegcompactionWorker { // Currently cloud storage engine doesn't need segcompaction BetaRowsetWriter* _writer = nullptr; io::FileWriterPtr _file_writer; + InvertedIndexFileWriterPtr _inverted_index_file_writer = nullptr; // for unique key mow table std::unique_ptr _rowid_conversion = nullptr; diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index 1afd3215db42f62..e0eb7534123a860 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -53,8 +53,8 @@ namespace doris { using namespace ErrorCode; SegmentFlusher::SegmentFlusher(RowsetWriterContext& context, SegmentFileCollection& seg_files, - InvertedIndexFilesInfo& idx_files_info) - : _context(context), _seg_files(seg_files), _idx_files_info(idx_files_info) {} + InvertedIndexFileCollection& idx_files) + : _context(context), _seg_files(seg_files), _idx_files(idx_files) {} SegmentFlusher::~SegmentFlusher() = default; @@ -140,13 +140,10 @@ Status SegmentFlusher::_create_segment_writer(std::unique_ptrcreate(segment_id, segment_file_writer)); - io::FileWriterPtr inverted_file_writer; - if (_context.tablet_schema->has_inverted_index() && - _context.tablet_schema->get_inverted_index_storage_format() >= - InvertedIndexStorageFormatPB::V2 && - _context.memtable_on_sink_support_index_v2) { - RETURN_IF_ERROR(_context.file_writer_creator->create(segment_id, inverted_file_writer, - FileType::INVERTED_INDEX_FILE)); + InvertedIndexFileWriterPtr inverted_index_file_writer; + if (_context.tablet_schema->has_inverted_index()) { + RETURN_IF_ERROR( + _context.file_writer_creator->create(segment_id, &inverted_index_file_writer)); } segment_v2::SegmentWriterOptions writer_options; @@ -161,8 +158,11 @@ Status SegmentFlusher::_create_segment_writer(std::unique_ptr( segment_file_writer.get(), segment_id, _context.tablet_schema, _context.tablet, - _context.data_dir, writer_options, std::move(inverted_file_writer)); + _context.data_dir, writer_options, inverted_index_file_writer.get()); RETURN_IF_ERROR(_seg_files.add(segment_id, std::move(segment_file_writer))); + if (_context.tablet_schema->has_inverted_index()) { + RETURN_IF_ERROR(_idx_files.add(segment_id, std::move(inverted_index_file_writer))); + } auto s = writer->init(); if (!s.ok()) { LOG(WARNING) << "failed to init segment writer: " << s.to_string(); @@ -178,13 +178,10 @@ Status SegmentFlusher::_create_segment_writer( io::FileWriterPtr segment_file_writer; RETURN_IF_ERROR(_context.file_writer_creator->create(segment_id, segment_file_writer)); - io::FileWriterPtr inverted_file_writer; - if (_context.tablet_schema->has_inverted_index() && - _context.tablet_schema->get_inverted_index_storage_format() >= - InvertedIndexStorageFormatPB::V2 && - _context.memtable_on_sink_support_index_v2) { - RETURN_IF_ERROR(_context.file_writer_creator->create(segment_id, inverted_file_writer, - FileType::INVERTED_INDEX_FILE)); + InvertedIndexFileWriterPtr inverted_index_file_writer; + if (_context.tablet_schema->has_inverted_index()) { + RETURN_IF_ERROR( + _context.file_writer_creator->create(segment_id, &inverted_index_file_writer)); } segment_v2::VerticalSegmentWriterOptions writer_options; @@ -198,8 +195,11 @@ Status SegmentFlusher::_create_segment_writer( writer = std::make_unique( segment_file_writer.get(), segment_id, _context.tablet_schema, _context.tablet, - _context.data_dir, writer_options, std::move(inverted_file_writer)); + _context.data_dir, writer_options, inverted_index_file_writer.get()); RETURN_IF_ERROR(_seg_files.add(segment_id, std::move(segment_file_writer))); + if (_context.tablet_schema->has_inverted_index()) { + RETURN_IF_ERROR(_idx_files.add(segment_id, std::move(inverted_index_file_writer))); + } auto s = writer->init(); if (!s.ok()) { LOG(WARNING) << "failed to init segment writer: " << s.to_string(); @@ -225,12 +225,16 @@ Status SegmentFlusher::_flush_segment_writer( if (row_num == 0) { return Status::OK(); } - uint64_t segment_size; - uint64_t index_size; - Status s = writer->finalize(&segment_size, &index_size); + uint64_t segment_file_size; + uint64_t common_index_size; + Status s = writer->finalize(&segment_file_size, &common_index_size); if (!s.ok()) { return Status::Error(s.code(), "failed to finalize segment: {}", s.to_string()); } + + int64_t inverted_index_file_size = 0; + RETURN_IF_ERROR(writer->close_inverted_index(&inverted_index_file_size)); + VLOG_DEBUG << "tablet_id:" << _context.tablet_id << " flushing filename: " << writer->data_dir_path() << " rowset_id:" << _context.rowset_id; @@ -245,17 +249,20 @@ Status SegmentFlusher::_flush_segment_writer( uint32_t segment_id = writer->segment_id(); SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + writer->get_inverted_index_total_size(); - segstat.index_size = index_size + writer->get_inverted_index_total_size(); + segstat.data_size = segment_file_size; + segstat.index_size = inverted_index_file_size; segstat.key_bounds = key_bounds; + LOG(INFO) << "tablet_id:" << _context.tablet_id + << ", flushing rowset_dir: " << _context.tablet_path + << ", rowset_id:" << _context.rowset_id << ", data size:" << segstat.data_size + << ", index size:" << segstat.index_size; - _idx_files_info.add_file_info(segment_id, writer->get_inverted_index_file_info()); writer.reset(); RETURN_IF_ERROR(_context.segment_collector->add(segment_id, segstat, flush_schema)); if (flush_size) { - *flush_size = segment_size + index_size; + *flush_size = segment_file_size; } return Status::OK(); } @@ -271,12 +278,16 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptrfinalize(&segment_size, &index_size); + uint64_t segment_file_size; + uint64_t common_index_size; + Status s = writer->finalize(&segment_file_size, &common_index_size); if (!s.ok()) { return Status::Error(s.code(), "failed to finalize segment: {}", s.to_string()); } + + int64_t inverted_index_file_size = 0; + RETURN_IF_ERROR(writer->close_inverted_index(&inverted_index_file_size)); + VLOG_DEBUG << "tablet_id:" << _context.tablet_id << " flushing rowset_dir: " << _context.tablet_path << " rowset_id:" << _context.rowset_id; @@ -291,17 +302,20 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptrget_segment_id(); SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + writer->get_inverted_index_total_size(); - segstat.index_size = index_size + writer->get_inverted_index_total_size(); + segstat.data_size = segment_file_size; + segstat.index_size = inverted_index_file_size; segstat.key_bounds = key_bounds; + LOG(INFO) << "tablet_id:" << _context.tablet_id + << ", flushing rowset_dir: " << _context.tablet_path + << ", rowset_id:" << _context.rowset_id << ", data size:" << segstat.data_size + << ", index size:" << segstat.index_size; - _idx_files_info.add_file_info(segment_id, writer->get_inverted_index_file_info()); writer.reset(); RETURN_IF_ERROR(_context.segment_collector->add(segment_id, segstat, flush_schema)); if (flush_size) { - *flush_size = segment_size + index_size; + *flush_size = segment_file_size; } return Status::OK(); } @@ -330,8 +344,8 @@ int64_t SegmentFlusher::Writer::max_row_to_add(size_t row_avg_size_in_bytes) { } SegmentCreator::SegmentCreator(RowsetWriterContext& context, SegmentFileCollection& seg_files, - InvertedIndexFilesInfo& idx_files_info) - : _segment_flusher(context, seg_files, idx_files_info) {} + InvertedIndexFileCollection& idx_files) + : _segment_flusher(context, seg_files, idx_files) {} Status SegmentCreator::add_block(const vectorized::Block* block) { if (block->rows() == 0) { diff --git a/be/src/olap/rowset/segment_creator.h b/be/src/olap/rowset/segment_creator.h index c862fce87a43bdc..f8afd5798927d41 100644 --- a/be/src/olap/rowset/segment_creator.h +++ b/be/src/olap/rowset/segment_creator.h @@ -29,6 +29,7 @@ #include "io/fs/file_reader_writer_fwd.h" #include "olap/olap_common.h" #include "olap/rowset/rowset_writer_context.h" +#include "olap/rowset/segment_v2/inverted_index_file_writer.h" #include "olap/tablet_fwd.h" #include "util/spinlock.h" #include "vec/core/block.h" @@ -46,7 +47,7 @@ class VerticalSegmentWriter; struct SegmentStatistics; class BetaRowsetWriter; class SegmentFileCollection; -class InvertedIndexFilesInfo; +class InvertedIndexFileCollection; class FileWriterCreator { public: @@ -54,9 +55,12 @@ class FileWriterCreator { virtual Status create(uint32_t segment_id, io::FileWriterPtr& file_writer, FileType file_type = FileType::SEGMENT_FILE) = 0; + + virtual Status create(uint32_t segment_id, InvertedIndexFileWriterPtr* file_writer) = 0; }; template + requires std::is_base_of_v class FileWriterCreatorT : public FileWriterCreator { public: explicit FileWriterCreatorT(T* t) : _t(t) {} @@ -66,6 +70,10 @@ class FileWriterCreatorT : public FileWriterCreator { return _t->create_file_writer(segment_id, file_writer, file_type); } + Status create(uint32_t segment_id, InvertedIndexFileWriterPtr* file_writer) override { + return _t->create_inverted_index_file_writer(segment_id, file_writer); + } + private: T* _t = nullptr; }; @@ -79,6 +87,7 @@ class SegmentCollector { }; template + requires std::is_base_of_v class SegmentCollectorT : public SegmentCollector { public: explicit SegmentCollectorT(T* t) : _t(t) {} @@ -95,7 +104,7 @@ class SegmentCollectorT : public SegmentCollector { class SegmentFlusher { public: SegmentFlusher(RowsetWriterContext& context, SegmentFileCollection& seg_files, - InvertedIndexFilesInfo& idx_files_info); + InvertedIndexFileCollection& idx_files); ~SegmentFlusher(); @@ -164,7 +173,7 @@ class SegmentFlusher { private: RowsetWriterContext& _context; SegmentFileCollection& _seg_files; - InvertedIndexFilesInfo& _idx_files_info; + InvertedIndexFileCollection& _idx_files; // written rows by add_block/add_row std::atomic _num_rows_written = 0; @@ -177,7 +186,7 @@ class SegmentFlusher { class SegmentCreator { public: SegmentCreator(RowsetWriterContext& context, SegmentFileCollection& seg_files, - InvertedIndexFilesInfo& idx_files_info); + InvertedIndexFileCollection& idx_files); ~SegmentCreator() = default; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp index 3a1c9f538138f40..609d21ce4f5c224 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp @@ -31,8 +31,10 @@ namespace doris { namespace segment_v2 { -Status BloomFilterIndexReader::load(bool use_page_cache, bool kept_in_memory) { +Status BloomFilterIndexReader::load(bool use_page_cache, bool kept_in_memory, + OlapReaderStatistics* index_load_stats) { // TODO yyq: implement a new once flag to avoid status construct. + _index_load_stats = index_load_stats; return _load_once.call([this, use_page_cache, kept_in_memory] { return _load(use_page_cache, kept_in_memory); }); @@ -47,7 +49,7 @@ Status BloomFilterIndexReader::_load(bool use_page_cache, bool kept_in_memory) { const IndexedColumnMetaPB& bf_index_meta = _bloom_filter_index_meta->bloom_filter(); _bloom_filter_reader.reset(new IndexedColumnReader(_file_reader, bf_index_meta)); - RETURN_IF_ERROR(_bloom_filter_reader->load(use_page_cache, kept_in_memory)); + RETURN_IF_ERROR(_bloom_filter_reader->load(use_page_cache, kept_in_memory, _index_load_stats)); update_metadata_size(); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h index a10a910b2e1ac4a..fcb0239a2440fa3 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h @@ -47,7 +47,8 @@ class BloomFilterIndexReader : public MetadataAdder { _bloom_filter_index_meta.reset(new BloomFilterIndexPB(bloom_filter_index_meta)); } - Status load(bool use_page_cache, bool kept_in_memory); + Status load(bool use_page_cache, bool kept_in_memory, + OlapReaderStatistics* _bf_index_load_stats = nullptr); BloomFilterAlgorithmPB algorithm() { return _bloom_filter_index_meta->algorithm(); } @@ -69,6 +70,7 @@ class BloomFilterIndexReader : public MetadataAdder { const TypeInfo* _type_info = nullptr; std::unique_ptr _bloom_filter_index_meta = nullptr; std::unique_ptr _bloom_filter_reader; + OlapReaderStatistics* _index_load_stats = nullptr; }; class BloomFilterIndexIterator { diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 62f209db5ad4a57..2d66b940a3893bc 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -63,7 +63,7 @@ struct ColumnWriterOptions { bool need_inverted_index = false; uint8_t gram_size; uint16_t gram_bf_size; - std::vector indexes; + std::vector indexes; // unused const TabletIndex* inverted_index = nullptr; InvertedIndexFileWriter* inverted_index_file_writer; std::string to_string() const { diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp index cce35d0b8d63e60..3028211f2661577 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp @@ -62,9 +62,11 @@ int64_t IndexedColumnReader::get_metadata_size() const { return sizeof(IndexedColumnReader) + _meta.ByteSizeLong(); } -Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) { +Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, + OlapReaderStatistics* index_load_stats) { _use_page_cache = use_page_cache; _kept_in_memory = kept_in_memory; + _index_load_stats = index_load_stats; _type_info = get_scalar_type_info((FieldType)_meta.data_type()); if (_type_info == nullptr) { @@ -107,7 +109,7 @@ Status IndexedColumnReader::load_index_page(const PagePointerPB& pp, PageHandle* BlockCompressionCodec* local_compress_codec; RETURN_IF_ERROR(get_block_compression_codec(_meta.compression(), &local_compress_codec)); RETURN_IF_ERROR(read_page(PagePointer(pp), handle, &body, &footer, INDEX_PAGE, - local_compress_codec, false)); + local_compress_codec, false, _index_load_stats)); RETURN_IF_ERROR(reader->parse(body, footer.index_page_footer())); _mem_size += body.get_size(); return Status::OK(); @@ -115,8 +117,10 @@ Status IndexedColumnReader::load_index_page(const PagePointerPB& pp, PageHandle* Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* handle, Slice* body, PageFooterPB* footer, PageTypePB type, - BlockCompressionCodec* codec, bool pre_decode) const { + BlockCompressionCodec* codec, bool pre_decode, + OlapReaderStatistics* stats) const { OlapReaderStatistics tmp_stats; + OlapReaderStatistics* stats_ptr = stats != nullptr ? stats : &tmp_stats; PageReadOptions opts { .use_page_cache = _use_page_cache, .kept_in_memory = _kept_in_memory, @@ -125,9 +129,10 @@ Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* handle, .file_reader = _file_reader.get(), .page_pointer = pp, .codec = codec, - .stats = &tmp_stats, + .stats = stats_ptr, .encoding_info = _encoding_info, - .io_ctx = io::IOContext {.is_index_data = true}, + .io_ctx = io::IOContext {.is_index_data = true, + .file_cache_stats = &stats_ptr->file_cache_stats}, }; if (_is_pk_index) { opts.type = PRIMARY_KEY_INDEX_PAGE; @@ -154,8 +159,8 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) { PageHandle handle; Slice body; PageFooterPB footer; - RETURN_IF_ERROR( - _reader->read_page(pp, &handle, &body, &footer, DATA_PAGE, _compress_codec, true)); + RETURN_IF_ERROR(_reader->read_page(pp, &handle, &body, &footer, DATA_PAGE, _compress_codec, + true, _stats)); // parse data page // note that page_index is not used in IndexedColumnIterator, so we pass 0 PageDecoderOptions opts; diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h b/be/src/olap/rowset/segment_v2/indexed_column_reader.h index 8a57383cd04c36b..c3469f9f6bed0d4 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h @@ -27,6 +27,7 @@ #include "common/status.h" #include "io/fs/file_reader_writer_fwd.h" +#include "olap/olap_common.h" #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/index_page.h" #include "olap/rowset/segment_v2/page_handle.h" @@ -53,11 +54,13 @@ class IndexedColumnReader : public MetadataAdder { ~IndexedColumnReader(); - Status load(bool use_page_cache, bool kept_in_memory); + Status load(bool use_page_cache, bool kept_in_memory, + OlapReaderStatistics* index_load_stats = nullptr); // read a page specified by `pp' from `file' into `handle' Status read_page(const PagePointer& pp, PageHandle* handle, Slice* body, PageFooterPB* footer, - PageTypePB type, BlockCompressionCodec* codec, bool pre_decode) const; + PageTypePB type, BlockCompressionCodec* codec, bool pre_decode, + OlapReaderStatistics* stats = nullptr) const; int64_t num_values() const { return _num_values; } const EncodingInfo* encoding_info() const { return _encoding_info; } @@ -97,14 +100,17 @@ class IndexedColumnReader : public MetadataAdder { const KeyCoder* _value_key_coder = nullptr; uint64_t _mem_size = 0; bool _is_pk_index = false; + OlapReaderStatistics* _index_load_stats = nullptr; }; class IndexedColumnIterator { public: - explicit IndexedColumnIterator(const IndexedColumnReader* reader) + explicit IndexedColumnIterator(const IndexedColumnReader* reader, + OlapReaderStatistics* stats = nullptr) : _reader(reader), _ordinal_iter(&reader->_ordinal_index_reader), - _value_iter(&reader->_value_index_reader) {} + _value_iter(&reader->_value_index_reader), + _stats(stats) {} // Seek to the given ordinal entry. Entry 0 is the first entry. // Return Status::Error if provided seek point is past the end. @@ -153,6 +159,7 @@ class IndexedColumnIterator { ordinal_t _current_ordinal = 0; // iterator owned compress codec, should NOT be shared by threads, initialized before used BlockCompressionCodec* _compress_codec = nullptr; + OlapReaderStatistics* _stats = nullptr; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h index 561054863d74618..bebbea58f72d866 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h @@ -27,6 +27,7 @@ class CharFilterFactory { public: template static lucene::analysis::CharFilter* create(const std::string& name, Args&&... args) { + DBUG_EXECUTE_IF("CharFilterFactory::create_return_nullptr", { return nullptr; }) if (name == INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE) { return new CharReplaceCharFilter(std::forward(args)...); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp index 7d1b348b95b3b2c..88a8f2417228bc3 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp @@ -44,10 +44,16 @@ Status compact_column(int64_t index_id, bool can_use_ram_dir = true; lucene::store::Directory* dir = DorisFSDirectoryFactory::getDirectory( io::global_local_filesystem(), tmp_path.data(), can_use_ram_dir); + DBUG_EXECUTE_IF("compact_column_getDirectory_error", { + _CLTHROWA(CL_ERR_IO, "debug point: compact_column_getDirectory_error in index compaction"); + }) lucene::analysis::SimpleAnalyzer analyzer; auto* index_writer = _CLNEW lucene::index::IndexWriter(dir, &analyzer, true /* create */, true /* closeDirOnShutdown */); - + DBUG_EXECUTE_IF("compact_column_create_index_writer_error", { + _CLTHROWA(CL_ERR_IO, + "debug point: compact_column_create_index_writer_error in index compaction"); + }) DCHECK_EQ(src_index_dirs.size(), trans_vec.size()); std::vector tmp_src_index_dirs(src_index_dirs.size()); for (size_t i = 0; i < tmp_src_index_dirs.size(); ++i) { @@ -55,8 +61,16 @@ Status compact_column(int64_t index_id, } index_writer->indexCompaction(tmp_src_index_dirs, dest_index_dirs, trans_vec, dest_segment_num_rows); + DBUG_EXECUTE_IF("compact_column_indexCompaction_error", { + _CLTHROWA(CL_ERR_IO, + "debug point: compact_column_indexCompaction_error in index compaction"); + }) index_writer->close(); + DBUG_EXECUTE_IF("compact_column_index_writer_close_error", { + _CLTHROWA(CL_ERR_IO, + "debug point: compact_column_index_writer_close_error in index compaction"); + }) _CLDELETE(index_writer); // NOTE: need to ref_cnt-- for dir, // when index_writer is destroyed, if closeDir is set, dir will be close diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp index 7a784a55b862d03..5599faa351dfd6d 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp @@ -17,6 +17,8 @@ #include "olap/rowset/segment_v2/inverted_index_file_writer.h" +#include + #include #include "common/status.h" @@ -44,11 +46,13 @@ Result InvertedIndexFileWriter::open(const TabletIndex* index index_meta->get_index_suffix()); bool exists = false; auto st = local_fs->exists(local_fs_index_path, &exists); + DBUG_EXECUTE_IF("InvertedIndexFileWriter::open_local_fs_exists_error", + { st = Status::Error("debug point: no such file error"); }) if (!st.ok()) { LOG(ERROR) << "index_path:" << local_fs_index_path << " exists error:" << st; return ResultError(st); } - + DBUG_EXECUTE_IF("InvertedIndexFileWriter::open_local_fs_exists_true", { exists = true; }) if (exists) { LOG(ERROR) << "try to init a directory:" << local_fs_index_path << " already exists"; return ResultError( @@ -75,6 +79,8 @@ Result InvertedIndexFileWriter::open(const TabletIndex* index } Status InvertedIndexFileWriter::delete_index(const TabletIndex* index_meta) { + DBUG_EXECUTE_IF("InvertedIndexFileWriter::delete_index_index_meta_nullptr", + { index_meta = nullptr; }); if (!index_meta) { return Status::Error("Index metadata is null."); } @@ -84,6 +90,8 @@ Status InvertedIndexFileWriter::delete_index(const TabletIndex* index_meta) { // Check if the specified index exists auto index_it = _indices_dirs.find(std::make_pair(index_id, index_suffix)); + DBUG_EXECUTE_IF("InvertedIndexFileWriter::delete_index_indices_dirs_reach_end", + { index_it = _indices_dirs.end(); }) if (index_it == _indices_dirs.end()) { std::ostringstream errMsg; errMsg << "No inverted index with id " << index_id << " and suffix " << index_suffix @@ -122,6 +130,8 @@ int64_t InvertedIndexFileWriter::headerLength() { } Status InvertedIndexFileWriter::close() { + DCHECK(!_closed) << debug_string(); + _closed = true; if (_indices_dirs.empty()) { return Status::OK(); } @@ -134,7 +144,7 @@ Status InvertedIndexFileWriter::close() { }) if (_storage_format == InvertedIndexStorageFormatPB::V1) { try { - _total_file_size = write_v1(); + RETURN_IF_ERROR(write_v1()); for (const auto& entry : _indices_dirs) { const auto& dir = entry.second; // delete index path, which contains separated inverted index files @@ -149,7 +159,7 @@ Status InvertedIndexFileWriter::close() { } } else { try { - _total_file_size = write_v2(); + RETURN_IF_ERROR(write_v2()); for (const auto& entry : _indices_dirs) { const auto& dir = entry.second; // delete index path, which contains separated inverted index files @@ -196,7 +206,12 @@ void InvertedIndexFileWriter::copyFile(const char* fileName, lucene::store::Dire int64_t bufferLength) { lucene::store::IndexInput* tmp = nullptr; CLuceneError err; - if (!dir->openInput(fileName, tmp, err)) { + auto open = dir->openInput(fileName, tmp, err); + DBUG_EXECUTE_IF("InvertedIndexFileWriter::copyFile_openInput_error", { + open = false; + err.set(CL_ERR_IO, "debug point: copyFile_openInput_error"); + }); + if (!open) { throw err; } @@ -212,6 +227,7 @@ void InvertedIndexFileWriter::copyFile(const char* fileName, lucene::store::Dire output->writeBytes(buffer, len); remainder -= len; } + DBUG_EXECUTE_IF("InvertedIndexFileWriter::copyFile_remainder_is_not_zero", { remainder = 10; }); if (remainder != 0) { std::ostringstream errMsg; errMsg << "Non-zero remainder length after copying: " << remainder << " (id: " << fileName @@ -222,6 +238,8 @@ void InvertedIndexFileWriter::copyFile(const char* fileName, lucene::store::Dire int64_t end_ptr = output->getFilePointer(); int64_t diff = end_ptr - start_ptr; + DBUG_EXECUTE_IF("InvertedIndexFileWriter::copyFile_diff_not_equals_length", + { diff = length - 10; }); if (diff != length) { std::ostringstream errMsg; errMsg << "Difference in the output file offsets " << diff @@ -232,7 +250,7 @@ void InvertedIndexFileWriter::copyFile(const char* fileName, lucene::store::Dire input->close(); } -int64_t InvertedIndexFileWriter::write_v1() { +Status InvertedIndexFileWriter::write_v1() { int64_t total_size = 0; for (const auto& entry : _indices_dirs) { const int64_t index_id = entry.first.first; @@ -265,6 +283,8 @@ int64_t InvertedIndexFileWriter::write_v1() { // write file entries to ram directory to get header length lucene::store::RAMDirectory ram_dir; auto* out_idx = ram_dir.createOutput(idx_name.c_str()); + DBUG_EXECUTE_IF("InvertedIndexFileWriter::write_v1_ram_output_is_nullptr", + { out_idx = nullptr; }) if (out_idx == nullptr) { LOG(WARNING) << "Write compound file error: RAMDirectory output is nullptr."; _CLTHROWA(CL_ERR_IO, "Create RAMDirectory output error"); @@ -298,6 +318,8 @@ int64_t InvertedIndexFileWriter::write_v1() { out_dir->set_file_writer_opts(_opts); auto* out = out_dir->createOutput(idx_name.c_str()); + DBUG_EXECUTE_IF("InvertedIndexFileWriter::write_v1_out_dir_createOutput_nullptr", + { out = nullptr; }); if (out == nullptr) { LOG(WARNING) << "Write compound file error: CompoundDirectory output is nullptr."; _CLTHROWA(CL_ERR_IO, "Create CompoundDirectory output error"); @@ -349,110 +371,125 @@ int64_t InvertedIndexFileWriter::write_v1() { auto* new_index_info = _file_info.add_index_info(); *new_index_info = index_info; } catch (CLuceneError& err) { - LOG(ERROR) << "CLuceneError occur when close idx file " - << InvertedIndexDescriptor::get_index_file_path_v1(_index_path_prefix, - index_id, index_suffix) + auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( + _index_path_prefix, index_id, index_suffix); + LOG(ERROR) << "CLuceneError occur when write_v1 idx file " << index_path << " error msg: " << err.what(); - throw err; + return Status::Error( + "CLuceneError occur when write_v1 idx file: {}, error msg: {}", index_path, + err.what()); } } - return total_size; + _total_file_size = total_size; + return Status::OK(); } -int64_t InvertedIndexFileWriter::write_v2() { - // Create the output stream to write the compound file - int64_t current_offset = headerLength(); - +Status InvertedIndexFileWriter::write_v2() { io::Path index_path {InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix)}; + std::unique_ptr compound_file_output; + try { + // Create the output stream to write the compound file + int64_t current_offset = headerLength(); - auto* out_dir = DorisFSDirectoryFactory::getDirectory(_fs, index_path.parent_path().c_str()); - out_dir->set_file_writer_opts(_opts); + io::Path index_path {InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix)}; - std::unique_ptr compound_file_output; - // idx v2 writer != nullptr means memtable on sink node now - if (_idx_v2_writer != nullptr) { + auto* out_dir = + DorisFSDirectoryFactory::getDirectory(_fs, index_path.parent_path().c_str()); + out_dir->set_file_writer_opts(_opts); + + std::unique_ptr compound_file_output; + + DCHECK(_idx_v2_writer != nullptr) << "inverted index file writer v2 is nullptr"; compound_file_output = std::unique_ptr( out_dir->createOutputV2(_idx_v2_writer.get())); - } else { - compound_file_output = std::unique_ptr( - out_dir->createOutput(index_path.filename().c_str())); - } - // Write the version number - compound_file_output->writeInt(InvertedIndexStorageFormatPB::V2); + // Write the version number + compound_file_output->writeInt(InvertedIndexStorageFormatPB::V2); - // Write the number of indices - const auto numIndices = static_cast(_indices_dirs.size()); - compound_file_output->writeInt(numIndices); + // Write the number of indices + const auto numIndices = static_cast(_indices_dirs.size()); + compound_file_output->writeInt(numIndices); - std::vector> - file_metadata; // Store file name, offset, file length, and corresponding directory + std::vector> + file_metadata; // Store file name, offset, file length, and corresponding directory - // First, write all index information and file metadata - for (const auto& entry : _indices_dirs) { - const int64_t index_id = entry.first.first; - const auto& index_suffix = entry.first.second; - const auto& dir = entry.second; - std::vector files; - dir->list(&files); + // First, write all index information and file metadata + for (const auto& entry : _indices_dirs) { + const int64_t index_id = entry.first.first; + const auto& index_suffix = entry.first.second; + const auto& dir = entry.second; + std::vector files; + dir->list(&files); - auto it = std::find(files.begin(), files.end(), DorisFSDirectory::WRITE_LOCK_FILE); - if (it != files.end()) { - files.erase(it); - } - // sort file list by file length - std::vector> sorted_files; - for (const auto& file : files) { - sorted_files.emplace_back(file, dir->fileLength(file.c_str())); - } + auto it = std::find(files.begin(), files.end(), DorisFSDirectory::WRITE_LOCK_FILE); + if (it != files.end()) { + files.erase(it); + } + // sort file list by file length + std::vector> sorted_files; + for (const auto& file : files) { + sorted_files.emplace_back(file, dir->fileLength(file.c_str())); + } - std::sort(sorted_files.begin(), sorted_files.end(), - [](const std::pair& a, - const std::pair& b) { return (a.second < b.second); }); - - int32_t file_count = sorted_files.size(); - - // Write the index ID and the number of files - compound_file_output->writeLong(index_id); - compound_file_output->writeInt(static_cast(index_suffix.length())); - compound_file_output->writeBytes(reinterpret_cast(index_suffix.data()), - index_suffix.length()); - compound_file_output->writeInt(file_count); - - // Calculate the offset for each file and write the file metadata - for (const auto& file : sorted_files) { - int64_t file_length = dir->fileLength(file.first.c_str()); - compound_file_output->writeInt(static_cast(file.first.length())); - compound_file_output->writeBytes(reinterpret_cast(file.first.data()), - file.first.length()); - compound_file_output->writeLong(current_offset); - compound_file_output->writeLong(file_length); - - file_metadata.emplace_back(file.first, current_offset, file_length, dir.get()); - current_offset += file_length; // Update the data offset + std::sort( + sorted_files.begin(), sorted_files.end(), + [](const std::pair& a, + const std::pair& b) { return (a.second < b.second); }); + + int32_t file_count = sorted_files.size(); + + // Write the index ID and the number of files + compound_file_output->writeLong(index_id); + compound_file_output->writeInt(static_cast(index_suffix.length())); + compound_file_output->writeBytes(reinterpret_cast(index_suffix.data()), + index_suffix.length()); + compound_file_output->writeInt(file_count); + + // Calculate the offset for each file and write the file metadata + for (const auto& file : sorted_files) { + int64_t file_length = dir->fileLength(file.first.c_str()); + compound_file_output->writeInt(static_cast(file.first.length())); + compound_file_output->writeBytes( + reinterpret_cast(file.first.data()), file.first.length()); + compound_file_output->writeLong(current_offset); + compound_file_output->writeLong(file_length); + + file_metadata.emplace_back(file.first, current_offset, file_length, dir.get()); + current_offset += file_length; // Update the data offset + } } - } - const int64_t buffer_length = 16384; - uint8_t header_buffer[buffer_length]; + const int64_t buffer_length = 16384; + uint8_t header_buffer[buffer_length]; - // Next, write the file data - for (const auto& info : file_metadata) { - const std::string& file = std::get<0>(info); - auto* dir = std::get<3>(info); + // Next, write the file data + for (const auto& info : file_metadata) { + const std::string& file = std::get<0>(info); + auto* dir = std::get<3>(info); - // Write the actual file data - copyFile(file.c_str(), dir, compound_file_output.get(), header_buffer, buffer_length); - } + // Write the actual file data + copyFile(file.c_str(), dir, compound_file_output.get(), header_buffer, buffer_length); + } - out_dir->close(); - // NOTE: need to decrease ref count, but not to delete here, - // because index cache may get the same directory from DIRECTORIES - _CLDECDELETE(out_dir) - auto compound_file_size = compound_file_output->getFilePointer(); - compound_file_output->close(); - _file_info.set_index_size(compound_file_size); - return compound_file_size; + out_dir->close(); + // NOTE: need to decrease ref count, but not to delete here, + // because index cache may get the same directory from DIRECTORIES + _CLDECDELETE(out_dir) + _total_file_size = compound_file_output->getFilePointer(); + compound_file_output->close(); + _file_info.set_index_size(_total_file_size); + } catch (CLuceneError& err) { + LOG(ERROR) << "CLuceneError occur when close idx file " << index_path + << " error msg: " << err.what(); + if (compound_file_output) { + compound_file_output->close(); + compound_file_output.reset(); + } + return Status::Error( + "CLuceneError occur when close idx file: {}, error msg: {}", index_path.c_str(), + err.what()); + } + return Status::OK(); } -} // namespace doris::segment_v2 \ No newline at end of file +} // namespace doris::segment_v2 diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h index 2aceb671d809a77..31e287d6dd3f711 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h @@ -38,6 +38,9 @@ class DorisFSDirectory; using InvertedIndexDirectoryMap = std::map, std::unique_ptr>; +class InvertedIndexFileWriter; +using InvertedIndexFileWriterPtr = std::unique_ptr; + class FileInfo { public: std::string filename; @@ -61,12 +64,18 @@ class InvertedIndexFileWriter { Status delete_index(const TabletIndex* index_meta); Status initialize(InvertedIndexDirectoryMap& indices_dirs); ~InvertedIndexFileWriter() = default; - int64_t write_v2(); - int64_t write_v1(); + Status write_v2(); + Status write_v1(); Status close(); int64_t headerLength(); - InvertedIndexFileInfo get_index_file_info() const { return _file_info; } - int64_t get_index_file_total_size() const { return _total_file_size; } + const InvertedIndexFileInfo* get_index_file_info() const { + DCHECK(_closed) << debug_string(); + return &_file_info; + } + int64_t get_index_file_total_size() const { + DCHECK(_closed) << debug_string(); + return _total_file_size; + } const io::FileSystemSPtr& get_fs() const { return _fs; } void sort_files(std::vector& file_infos); void copyFile(const char* fileName, lucene::store::Directory* dir, @@ -75,6 +84,20 @@ class InvertedIndexFileWriter { void set_file_writer_opts(const io::FileWriterOptions& opts) { _opts = opts; } + std::string debug_string() const { + std::stringstream indices_dirs; + for (const auto& [index, dir] : _indices_dirs) { + indices_dirs << "index id is: " << index.first << " , index suffix is: " << index.second + << " , index dir is: " << dir->toString(); + } + return fmt::format( + "inverted index file writer debug string: index storage format is: {}, index path " + "prefix is: {}, rowset id is: {}, seg id is: {}, closed is: {}, total file size " + "is: {}, index dirs is: {}", + _storage_format, _index_path_prefix, _rowset_id, _seg_id, _closed, _total_file_size, + indices_dirs.str()); + } + private: InvertedIndexDirectoryMap _indices_dirs; const io::FileSystemSPtr _fs; @@ -82,14 +105,18 @@ class InvertedIndexFileWriter { std::string _rowset_id; int64_t _seg_id; InvertedIndexStorageFormatPB _storage_format; - // v1: all file size - // v2: file size - int64_t _total_file_size = 0; + // write to disk or stream - io::FileWriterPtr _idx_v2_writer; + io::FileWriterPtr _idx_v2_writer = nullptr; io::FileWriterOptions _opts; + // v1: all file size + // v2: file size + int64_t _total_file_size = 0; InvertedIndexFileInfo _file_info; + + // only once + bool _closed = false; }; } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp index f752c5300204de7..ded71c8a6cc73e6 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp @@ -183,7 +183,10 @@ DorisFSDirectory::FSIndexInput::SharedHandle::SharedHandle(const char* path) { DorisFSDirectory::FSIndexInput::SharedHandle::~SharedHandle() { if (_reader) { - if (_reader->close().ok()) { + auto st = _reader->close(); + DBUG_EXECUTE_IF("FSIndexInput::~SharedHandle_reader_close_error", + { st = Status::Error("failed to close"); }); + if (st.ok()) { _reader = nullptr; } } @@ -238,10 +241,17 @@ void DorisFSDirectory::FSIndexInput::readInternal(uint8_t* b, const int32_t len) Slice result {b, (size_t)len}; size_t bytes_read = 0; - if (!_handle->_reader->read_at(_pos, result, &bytes_read, &_io_ctx).ok()) { + auto st = _handle->_reader->read_at(_pos, result, &bytes_read, &_io_ctx); + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexInput::readInternal_reader_read_at_error", { + st = Status::InternalError( + "debug point: DorisFSDirectory::FSIndexInput::readInternal_reader_read_at_error"); + }) + if (!st.ok()) { _CLTHROWA(CL_ERR_IO, "read past EOF"); } bufferLength = len; + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexInput::readInternal_bytes_read_error", + { bytes_read = len + 10; }) if (bytes_read != len) { _CLTHROWA(CL_ERR_IO, "read error"); } @@ -313,6 +323,10 @@ void DorisFSDirectory::FSIndexOutput::flushBuffer(const uint8_t* b, const int32_ _CLTHROWA(CL_ERR_IO, "writer append data when flushBuffer error"); } } else { + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutput::flushBuffer_writer_is_nullptr", + { _writer = nullptr; }) + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutput::flushBuffer_b_is_nullptr", + { b = nullptr; }) if (_writer == nullptr) { LOG(WARNING) << "File writer is nullptr in DorisFSDirectory::FSIndexOutput, " "ignore flush."; @@ -327,8 +341,7 @@ void DorisFSDirectory::FSIndexOutput::close() { try { BufferedIndexOutput::close(); DBUG_EXECUTE_IF( - "DorisFSDirectory::FSIndexOutput._throw_clucene_error_in_bufferedindexoutput_" - "close", + "DorisFSDirectory::FSIndexOutput._throw_clucene_error_in_bufferedindexoutput_close", { _CLTHROWA(CL_ERR_IO, "debug point: test throw error in bufferedindexoutput close"); @@ -342,6 +355,10 @@ void DorisFSDirectory::FSIndexOutput::close() { _writer.reset(nullptr); _CLTHROWA(err.number(), err.what()); } + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutput.set_writer_nullptr", { + LOG(WARNING) << "Dbug execute, set _writer to nullptr"; + _writer = nullptr; + }) if (_writer) { auto ret = _writer->close(); DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutput._set_writer_close_status_error", @@ -353,6 +370,7 @@ void DorisFSDirectory::FSIndexOutput::close() { } } else { LOG(WARNING) << "File writer is nullptr, ignore finalize and close."; + _CLTHROWA(CL_ERR_IO, "close file writer error, _writer = nullptr"); } _writer.reset(nullptr); } @@ -364,13 +382,9 @@ int64_t DorisFSDirectory::FSIndexOutput::length() const { void DorisFSDirectory::FSIndexOutputV2::init(io::FileWriter* file_writer) { _index_v2_file_writer = file_writer; - DBUG_EXECUTE_IF( - "DorisFSDirectory::FSIndexOutput._throw_clucene_error_in_fsindexoutput_" - "init", - { - _CLTHROWA(CL_ERR_IO, - "debug point: test throw error in fsindexoutput init mock error"); - }) + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutput._throw_clucene_error_in_fsindexoutput_init", { + _CLTHROWA(CL_ERR_IO, "debug point: test throw error in fsindexoutput init mock error"); + }) } DorisFSDirectory::FSIndexOutputV2::~FSIndexOutputV2() {} @@ -393,6 +407,10 @@ void DorisFSDirectory::FSIndexOutputV2::flushBuffer(const uint8_t* b, const int3 _CLTHROWA(CL_ERR_IO, "writer append data when flushBuffer error"); } } else { + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutputV2::flushBuffer_file_writer_is_nullptr", + { _index_v2_file_writer = nullptr; }) + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutputV2::flushBuffer_b_is_nullptr", + { b = nullptr; }) if (_index_v2_file_writer == nullptr) { LOG(WARNING) << "File writer is nullptr in DorisFSDirectory::FSIndexOutputV2, " "ignore flush."; @@ -408,8 +426,7 @@ void DorisFSDirectory::FSIndexOutputV2::close() { try { BufferedIndexOutput::close(); DBUG_EXECUTE_IF( - "DorisFSDirectory::FSIndexOutput._throw_clucene_error_in_bufferedindexoutput_" - "close", + "DorisFSDirectory::FSIndexOutput._throw_clucene_error_in_bufferedindexoutput_close", { _CLTHROWA(CL_ERR_IO, "debug point: test throw error in bufferedindexoutput close"); @@ -422,6 +439,10 @@ void DorisFSDirectory::FSIndexOutputV2::close() { } _CLTHROWA(err.number(), err.what()); } + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutput.set_writer_nullptr", { + LOG(WARNING) << "Dbug execute, set _index_v2_file_writer to nullptr"; + _index_v2_file_writer = nullptr; + }) if (_index_v2_file_writer) { auto ret = _index_v2_file_writer->close(); DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutput._set_writer_close_status_error", @@ -480,7 +501,16 @@ bool DorisFSDirectory::list(std::vector* names) const { priv_getFN(fl, ""); std::vector files; bool exists; - LOG_AND_THROW_IF_ERROR(_fs->list(fl, true, &files, &exists), "List file IO error"); + auto st = _fs->list(fl, true, &files, &exists); + DBUG_EXECUTE_IF("DorisFSDirectory::list_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::list_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "List file IO error"); + DBUG_EXECUTE_IF("DorisFSDirectory::list_directory_not_exists", { exists = false; }) + if (!exists) { + LOG_AND_THROW_IF_ERROR(st, fmt::format("Directory {} is not exist", fl)); + } for (auto& file : files) { names->push_back(file.file_name); } @@ -492,7 +522,12 @@ bool DorisFSDirectory::fileExists(const char* name) const { char fl[CL_MAX_DIR]; priv_getFN(fl, name); bool exists = false; - LOG_AND_THROW_IF_ERROR(_fs->exists(fl, &exists), "File exists IO error"); + auto st = _fs->exists(fl, &exists); + DBUG_EXECUTE_IF("DorisFSDirectory::fileExists_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::fileExists_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "File exists IO error"); return exists; } @@ -518,7 +553,12 @@ void DorisFSDirectory::touchFile(const char* name) { snprintf(buffer, CL_MAX_DIR, "%s%s%s", directory.c_str(), PATH_DELIMITERA, name); io::FileWriterPtr tmp_writer; - LOG_AND_THROW_IF_ERROR(_fs->create_file(buffer, &tmp_writer), "Touch file IO error"); + auto st = _fs->create_file(buffer, &tmp_writer); + DBUG_EXECUTE_IF("DorisFSDirectory::touchFile_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::touchFile_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "Touch file IO error"); } int64_t DorisFSDirectory::fileLength(const char* name) const { @@ -532,6 +572,10 @@ int64_t DorisFSDirectory::fileLength(const char* name) const { if (st.code() == ErrorCode::NOT_FOUND) { _CLTHROWA(CL_ERR_FileNotFound, "File does not exist"); } + DBUG_EXECUTE_IF("DorisFSDirectory::fileLength_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::fileLength_status_is_not_ok"); + }) LOG_AND_THROW_IF_ERROR(st, "Get file size IO error"); return size; } @@ -544,13 +588,21 @@ bool DorisFSDirectory::openInput(const char* name, lucene::store::IndexInput*& r return FSIndexInput::open(_fs, fl, ret, error, bufferSize); } -void DorisFSDirectory::close() {} +void DorisFSDirectory::close() { + DBUG_EXECUTE_IF("DorisFSDirectory::close_close_with_error", + { _CLTHROWA(CL_ERR_IO, "debug_point: close DorisFSDirectory error"); }) +} bool DorisFSDirectory::doDeleteFile(const char* name) { CND_PRECONDITION(directory[0] != 0, "directory is not open"); char fl[CL_MAX_DIR]; priv_getFN(fl, name); - LOG_AND_THROW_IF_ERROR(_fs->delete_file(fl), "Delete file IO error"); + auto st = _fs->delete_file(fl); + DBUG_EXECUTE_IF("DorisFSDirectory::doDeleteFile_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::doDeleteFile_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "Delete file IO error"); return true; } @@ -558,8 +610,12 @@ bool DorisFSDirectory::deleteDirectory() { CND_PRECONDITION(directory[0] != 0, "directory is not open"); char fl[CL_MAX_DIR]; priv_getFN(fl, ""); - LOG_AND_THROW_IF_ERROR(_fs->delete_directory(fl), - fmt::format("Delete directory {} IO error", fl)); + auto st = _fs->delete_directory(fl); + DBUG_EXECUTE_IF("DorisFSDirectory::deleteDirectory_throw_is_not_directory", { + st = Status::Error( + fmt::format("debug point: {} is not a directory", fl)); + }) + LOG_AND_THROW_IF_ERROR(st, fmt::format("Delete directory {} IO error", fl)); return true; } @@ -573,11 +629,26 @@ void DorisFSDirectory::renameFile(const char* from, const char* to) { priv_getFN(nu, to); bool exists = false; - LOG_AND_THROW_IF_ERROR(_fs->exists(nu, &exists), "File exists IO error"); + auto st = _fs->exists(nu, &exists); + DBUG_EXECUTE_IF("DorisFSDirectory::renameFile_exists_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::renameFile_exists_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "File exists IO error"); if (exists) { - LOG_AND_THROW_IF_ERROR(_fs->delete_directory(nu), fmt::format("Delete {} IO error", nu)); + st = _fs->delete_directory(nu); + DBUG_EXECUTE_IF("DorisFSDirectory::renameFile_delete_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::renameFile_delete_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, fmt::format("Delete {} IO error", nu)); } - LOG_AND_THROW_IF_ERROR(_fs->rename(old, nu), fmt::format("Rename {} to {} IO error", old, nu)); + st = _fs->rename(old, nu); + DBUG_EXECUTE_IF("DorisFSDirectory::renameFile_rename_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::renameFile_rename_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, fmt::format("Rename {} to {} IO error", old, nu)); } lucene::store::IndexOutput* DorisFSDirectory::createOutput(const char* name) { @@ -585,11 +656,31 @@ lucene::store::IndexOutput* DorisFSDirectory::createOutput(const char* name) { char fl[CL_MAX_DIR]; priv_getFN(fl, name); bool exists = false; - LOG_AND_THROW_IF_ERROR(_fs->exists(fl, &exists), "Create output file exists IO error"); + auto st = _fs->exists(fl, &exists); + DBUG_EXECUTE_IF("DorisFSDirectory::createOutput_exists_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::createOutput_exists_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "Create output file exists IO error"); if (exists) { - LOG_AND_THROW_IF_ERROR(_fs->delete_file(fl), - fmt::format("Create output delete file {} IO error", fl)); - LOG_AND_THROW_IF_ERROR(_fs->exists(fl, &exists), "Create output file exists IO error"); + st = _fs->delete_file(fl); + DBUG_EXECUTE_IF("DorisFSDirectory::createOutput_delete_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectory::createOutput_delete_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, fmt::format("Create output delete file {} IO error", fl)); + st = _fs->exists(fl, &exists); + DBUG_EXECUTE_IF("DorisFSDirectory::createOutput_exists_after_delete_status_is_not_ok", { + st = Status::Error( + "debug point: " + "DorisFSDirectory::createOutput_exists_after_delete_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "Create output file exists IO error"); + DBUG_EXECUTE_IF("DorisFSDirectory::createOutput_exists_after_delete_error", + { exists = true; }) + if (exists) { + _CLTHROWA(CL_ERR_IO, fmt::format("File {} should not exist", fl).c_str()); + } assert(!exists); } auto* ret = _CLNEW FSIndexOutput(); @@ -653,6 +744,10 @@ bool DorisRAMFSDirectory::fileExists(const char* name) const { int64_t DorisRAMFSDirectory::fileModified(const char* name) const { std::lock_guard wlock(_this_lock); auto* f = filesMap->get((char*)name); + DBUG_EXECUTE_IF("DorisRAMFSDirectory::fileModified_file_not_found", { f = nullptr; }) + if (f == nullptr) { + _CLTHROWA(CL_ERR_IO, fmt::format("NOT FOUND File {}.", name).c_str()); + } return f->getLastModified(); } @@ -661,6 +756,10 @@ void DorisRAMFSDirectory::touchFile(const char* name) { { std::lock_guard wlock(_this_lock); file = filesMap->get((char*)name); + DBUG_EXECUTE_IF("DorisRAMFSDirectory::touchFile_file_not_found", { file = nullptr; }) + if (file == nullptr) { + _CLTHROWA(CL_ERR_IO, fmt::format("NOT FOUND File {}.", name).c_str()); + } } const uint64_t ts1 = file->getLastModified(); uint64_t ts2 = lucene::util::Misc::currentTimeMillis(); @@ -677,6 +776,10 @@ void DorisRAMFSDirectory::touchFile(const char* name) { int64_t DorisRAMFSDirectory::fileLength(const char* name) const { std::lock_guard wlock(_this_lock); auto* f = filesMap->get((char*)name); + DBUG_EXECUTE_IF("DorisRAMFSDirectory::fileLength_file_not_found", { f = nullptr; }) + if (f == nullptr) { + _CLTHROWA(CL_ERR_IO, fmt::format("NOT FOUND File {}.", name).c_str()); + } return f->getLength(); } @@ -684,6 +787,7 @@ bool DorisRAMFSDirectory::openInput(const char* name, lucene::store::IndexInput* CLuceneError& error, int32_t bufferSize) { std::lock_guard wlock(_this_lock); auto* file = filesMap->get((char*)name); + DBUG_EXECUTE_IF("DorisRAMFSDirectory::openInput_file_not_found", { file = nullptr; }) if (file == nullptr) { error.set(CL_ERR_IO, "[DorisRAMCompoundDirectory::open] The requested file does not exist."); @@ -695,6 +799,8 @@ bool DorisRAMFSDirectory::openInput(const char* name, lucene::store::IndexInput* void DorisRAMFSDirectory::close() { DorisFSDirectory::close(); + DBUG_EXECUTE_IF("DorisRAMFSDirectory::close_close_with_error", + { _CLTHROWA(CL_ERR_IO, "debug_point: close DorisRAMFSDirectory error"); }) } bool DorisRAMFSDirectory::doDeleteFile(const char* name) { @@ -730,6 +836,7 @@ void DorisRAMFSDirectory::renameFile(const char* from, const char* to) { sizeInBytes -= itr1->second->sizeInBytes; filesMap->removeitr(itr1); } + DBUG_EXECUTE_IF("DorisRAMFSDirectory::renameFile_itr_filesMap_end", { itr = filesMap->end(); }) if (itr == filesMap->end()) { char tmp[1024]; snprintf(tmp, 1024, "cannot rename %s, file does not exist", from); @@ -752,6 +859,8 @@ lucene::store::IndexOutput* DorisRAMFSDirectory::createOutput(const char* name) // get the actual pointer to the output name char* n = nullptr; auto itr = filesMap->find(const_cast(name)); + DBUG_EXECUTE_IF("DorisRAMFSDirectory::createOutput_itr_filesMap_end", + { itr = filesMap->end(); }) if (itr != filesMap->end()) { n = itr->first; lucene::store::RAMFile* rf = itr->second; @@ -784,6 +893,7 @@ DorisFSDirectory* DorisFSDirectoryFactory::getDirectory(const io::FileSystemSPtr const char* _file, bool can_use_ram_dir, lucene::store::LockFactory* lock_factory) { DorisFSDirectory* dir = nullptr; + DBUG_EXECUTE_IF("DorisFSDirectoryFactory::getDirectory_file_is_nullptr", { _file = nullptr; }); if (!_file || !*_file) { _CLTHROWA(CL_ERR_IO, "Invalid directory"); } @@ -797,10 +907,22 @@ DorisFSDirectory* DorisFSDirectoryFactory::getDirectory(const io::FileSystemSPtr dir = _CLNEW DorisRAMFSDirectory(); } else { bool exists = false; - LOG_AND_THROW_IF_ERROR(_fs->exists(file, &exists), "Get directory exists IO error"); + auto st = _fs->exists(file, &exists); + DBUG_EXECUTE_IF("DorisFSDirectoryFactory::getDirectory_exists_status_is_not_ok", { + st = Status::Error( + "debug point: DorisFSDirectoryFactory::getDirectory_exists_status_is_not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "Get directory exists IO error"); if (!exists) { - LOG_AND_THROW_IF_ERROR(_fs->create_directory(file), - "Get directory create directory IO error"); + st = _fs->create_directory(file); + DBUG_EXECUTE_IF( + "DorisFSDirectoryFactory::getDirectory_create_directory_status_is_not_ok", { + st = Status::Error( + "debug point: " + "DorisFSDirectoryFactory::getDirectory_create_directory_status_is_" + "not_ok"); + }) + LOG_AND_THROW_IF_ERROR(st, "Get directory create directory IO error"); } dir = _CLNEW DorisFSDirectory(); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 8729bd0c5902763..29fe4609e59e9ca 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -75,6 +75,23 @@ const int32_t MAX_LEAF_COUNT = 1024; const float MAXMBSortInHeap = 512.0 * 8; const int DIMS = 1; +bool InvertedIndexColumnWriter::check_support_inverted_index(const TabletColumn& column) { + // bellow types are not supported in inverted index for extracted columns + static std::set invalid_types = { + FieldType::OLAP_FIELD_TYPE_DOUBLE, + FieldType::OLAP_FIELD_TYPE_JSONB, + FieldType::OLAP_FIELD_TYPE_ARRAY, + FieldType::OLAP_FIELD_TYPE_FLOAT, + }; + if (column.is_extracted_column() && (invalid_types.contains(column.type()))) { + return false; + } + if (column.is_variant_type()) { + return false; + } + return true; +} + template class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { public: @@ -101,6 +118,12 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { Status init() override { try { + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::init_field_type_not_supported", { + return Status::Error( + "Field type not supported"); + }) + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::init_inverted_index_writer_init_error", + { _CLTHROWA(CL_ERR_IO, "debug point: init index error"); }) if constexpr (field_is_slice_type(field_type)) { return init_fulltext_index(); } else if constexpr (field_is_numeric_type(field_type)) { @@ -124,6 +147,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { void close_on_error() override { try { + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::close_on_error_throw_exception", + { _CLTHROWA(CL_ERR_IO, "debug point: close on error"); }) if (_index_writer) { _index_writer->close(); } @@ -143,6 +168,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { _bkd_writer = std::make_shared( max_doc, DIMS, DIMS, value_length, MAX_LEAF_COUNT, MAXMBSortInHeap, total_point_count, true, config::max_depth_in_bkd_tree); + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::init_bkd_index_throw_error", { + _CLTHROWA(CL_ERR_IllegalArgument, "debug point: create bkd_writer error"); + }) return open_index_directory(); } @@ -157,6 +185,10 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { } Status open_index_directory() { + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::open_index_directory_error", { + return Status::Error( + "debug point: open_index_directory_error"); + }) _dir = DORIS_TRY(_index_file_writer->open(_index_meta)); return Status::OK(); } @@ -166,6 +198,12 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { bool close_dir_on_shutdown = true; auto index_writer = std::make_unique( _dir, _analyzer.get(), create_index, close_dir_on_shutdown); + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_index_writer_setRAMBufferSizeMB_error", + { index_writer->setRAMBufferSizeMB(-100); }) + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_index_writer_setMaxBufferedDocs_error", + { index_writer->setMaxBufferedDocs(1); }) + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_index_writer_setMergeFactor_error", + { index_writer->setMergeFactor(1); }) index_writer->setRAMBufferSizeMB(config::inverted_index_ram_buffer_size); index_writer->setMaxBufferedDocs(config::inverted_index_max_buffered_docs); index_writer->setMaxFieldLength(MAX_FIELD_LEN); @@ -230,6 +268,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { try { _index_writer->addDocument(_doc.get()); + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_document_throw_error", + { _CLTHROWA(CL_ERR_IO, "debug point: add_document io error"); }) } catch (const CLuceneError& e) { close_on_error(); return Status::Error( @@ -241,6 +281,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { Status add_null_document() { try { _index_writer->addNullDocument(_doc.get()); + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_null_document_throw_error", + { _CLTHROWA(CL_ERR_IO, "debug point: add_null_document io error"); }) } catch (const CLuceneError& e) { close_on_error(); return Status::Error( @@ -253,6 +295,10 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { _null_bitmap.addRange(_rid, _rid + count); _rid += count; if constexpr (field_is_slice_type(field_type)) { + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_nulls_field_nullptr", + { _field = nullptr; }) + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_nulls_index_writer_nullptr", + { _index_writer = nullptr; }) if (_field == nullptr || _index_writer == nullptr) { LOG(ERROR) << "field or index writer is null in inverted index writer."; return Status::InternalError( @@ -271,17 +317,30 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { return Status::OK(); } - void new_inverted_index_field(const char* field_value_data, size_t field_value_size) { - if (_parser_type != InvertedIndexParserType::PARSER_UNKNOWN && - _parser_type != InvertedIndexParserType::PARSER_NONE) { - new_char_token_stream(field_value_data, field_value_size, _field); - } else { - new_field_char_value(field_value_data, field_value_size, _field); + Status new_inverted_index_field(const char* field_value_data, size_t field_value_size) { + try { + if (_parser_type != InvertedIndexParserType::PARSER_UNKNOWN && + _parser_type != InvertedIndexParserType::PARSER_NONE) { + new_char_token_stream(field_value_data, field_value_size, _field); + } else { + new_field_char_value(field_value_data, field_value_size, _field); + } + } catch (const CLuceneError& e) { + return Status::Error( + "CLuceneError create new index field error: {}", e.what()); } + return Status::OK(); } void new_char_token_stream(const char* s, size_t len, lucene::document::Field* field) { _char_string_reader->init(s, len, false); + DBUG_EXECUTE_IF( + "InvertedIndexColumnWriterImpl::new_char_token_stream__char_string_reader_init_" + "error", + { + _CLTHROWA(CL_ERR_UnsupportedOperation, + "UnsupportedOperationException: CLStream::init"); + }) auto* stream = _analyzer->reusableTokenStream(field->name(), _char_string_reader.get()); field->setValue(stream); } @@ -299,6 +358,10 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { Status add_values(const std::string fn, const void* values, size_t count) override { if constexpr (field_is_slice_type(field_type)) { + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_values_field_is_nullptr", + { _field = nullptr; }) + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_values_index_writer_is_nullptr", + { _index_writer = nullptr; }) if (_field == nullptr || _index_writer == nullptr) { LOG(ERROR) << "field or index writer is null in inverted index writer."; return Status::InternalError( @@ -312,7 +375,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { (_parser_type != InvertedIndexParserType::PARSER_NONE && v->empty())) { RETURN_IF_ERROR(add_null_document()); } else { - new_inverted_index_field(v->get_data(), v->get_size()); + RETURN_IF_ERROR(new_inverted_index_field(v->get_data(), v->get_size())); RETURN_IF_ERROR(add_document()); } ++v; @@ -326,12 +389,17 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { Status add_array_values(size_t field_size, const void* value_ptr, const uint8_t* null_map, const uint8_t* offsets_ptr, size_t count) override { + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_array_values_count_is_zero", + { count = 0; }) if (count == 0) { // no values to add inverted index return Status::OK(); } const auto* offsets = reinterpret_cast(offsets_ptr); if constexpr (field_is_slice_type(field_type)) { + DBUG_EXECUTE_IF( + "InvertedIndexColumnWriterImpl::add_array_values_index_writer_is_nullptr", + { _index_writer = nullptr; }) if (_index_writer == nullptr) { LOG(ERROR) << "index writer is null in inverted index writer."; return Status::InternalError("index writer is null in inverted index writer"); @@ -357,7 +425,15 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { continue; } else { // now we temp create field . later make a pool - if (Status st = create_field(&new_field); st != Status::OK()) { + Status st = create_field(&new_field); + DBUG_EXECUTE_IF( + "InvertedIndexColumnWriterImpl::add_array_values_create_field_" + "error", + { + st = Status::Error( + "debug point: add_array_values_create_field_error"); + }) + if (st != Status::OK()) { LOG(ERROR) << "create field " << string(_field_name.begin(), _field_name.end()) << " error:" << st; @@ -409,7 +485,14 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { // avoid to add doc which without any field which may make threadState init skip // init fieldDataArray, then will make error with next doc with fields in // resetCurrentFieldData - if (Status st = create_field(&new_field); st != Status::OK()) { + Status st = create_field(&new_field); + DBUG_EXECUTE_IF( + "InvertedIndexColumnWriterImpl::add_array_values_create_field_error_2", + { + st = Status::Error( + "debug point: add_array_values_create_field_error_2"); + }) + if (st != Status::OK()) { LOG(ERROR) << "create field " << string(_field_name.begin(), _field_name.end()) << " error:" << st; @@ -443,6 +526,11 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { Status add_array_values(size_t field_size, const CollectionValue* values, size_t count) override { if constexpr (field_is_slice_type(field_type)) { + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_array_values_field_is_nullptr", + { _field = nullptr; }) + DBUG_EXECUTE_IF( + "InvertedIndexColumnWriterImpl::add_array_values_index_writer_is_nullptr", + { _index_writer = nullptr; }) if (_field == nullptr || _index_writer == nullptr) { LOG(ERROR) << "field or index writer is null in inverted index writer."; return Status::InternalError( @@ -461,7 +549,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { item_data_ptr = (uint8_t*)item_data_ptr + field_size; } auto value = join(strings, " "); - new_inverted_index_field(value.c_str(), value.length()); + RETURN_IF_ERROR(new_inverted_index_field(value.c_str(), value.length())); _rid++; RETURN_IF_ERROR(add_document()); values++; @@ -651,6 +739,8 @@ Status InvertedIndexColumnWriter::create(const Field* field, bool single_field = true; if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) { const auto* array_typeinfo = dynamic_cast(typeinfo); + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_array_typeinfo_is_nullptr", + { array_typeinfo = nullptr; }) if (array_typeinfo != nullptr) { typeinfo = array_typeinfo->item_type_info(); type = typeinfo->type(); @@ -661,6 +751,8 @@ Status InvertedIndexColumnWriter::create(const Field* field, } } + DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_unsupported_type_for_inverted_index", + { type = FieldType::OLAP_FIELD_TYPE_FLOAT; }) switch (type) { #define M(TYPE) \ case TYPE: \ diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.h b/be/src/olap/rowset/segment_v2/inverted_index_writer.h index 63c1e219e649e81..da90752db091684 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.h @@ -33,7 +33,6 @@ #include "io/fs/local_file_system.h" #include "olap/olap_common.h" #include "olap/options.h" -#include "olap/tablet_schema.h" namespace doris { class CollectionValue; @@ -41,6 +40,7 @@ class CollectionValue; class Field; class TabletIndex; +class TabletColumn; namespace segment_v2 { class InvertedIndexFileWriter; @@ -74,22 +74,7 @@ class InvertedIndexColumnWriter { // check if the column is valid for inverted index, some columns // are generated from variant, but not all of them are supported - static bool check_support_inverted_index(const TabletColumn& column) { - // bellow types are not supported in inverted index for extracted columns - static std::set invalid_types = { - FieldType::OLAP_FIELD_TYPE_DOUBLE, - FieldType::OLAP_FIELD_TYPE_JSONB, - FieldType::OLAP_FIELD_TYPE_ARRAY, - FieldType::OLAP_FIELD_TYPE_FLOAT, - }; - if (column.is_extracted_column() && (invalid_types.contains(column.type()))) { - return false; - } - if (column.is_variant_type()) { - return false; - } - return true; - } + static bool check_support_inverted_index(const TabletColumn& column); private: DISALLOW_COPY_AND_ASSIGN(InvertedIndexColumnWriter); diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h index 93ec03df452b6ce..33d1a24ece3947b 100644 --- a/be/src/olap/rowset/segment_v2/options.h +++ b/be/src/olap/rowset/segment_v2/options.h @@ -25,6 +25,7 @@ namespace segment_v2 { static constexpr size_t DEFAULT_PAGE_SIZE = 1024 * 1024; // default size: 1M constexpr long ROW_STORE_PAGE_SIZE_DEFAULT_VALUE = 16384; // default row store page size: 16KB +static constexpr size_t STORAGE_PAGE_SIZE_DEFAULT_VALUE = 65536; struct PageBuilderOptions { size_t data_page_size = DEFAULT_PAGE_SIZE; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 68fe3190b817a1e..0ad799683fc458e 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -86,10 +86,30 @@ std::string file_cache_key_str(const std::string& seg_path) { return file_cache_key_from_path(seg_path).to_string(); } -Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, - RowsetId rowset_id, TabletSchemaSPtr tablet_schema, +Status Segment::open(io::FileSystemSPtr fs, const std::string& path, int64_t tablet_id, + uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, const io::FileReaderOptions& reader_options, std::shared_ptr* output, InvertedIndexFileInfo idx_file_info) { + auto s = _open(fs, path, segment_id, rowset_id, tablet_schema, reader_options, output, + idx_file_info); + if (!s.ok()) { + if (!config::is_cloud_mode()) { + auto res = ExecEnv::get_tablet(tablet_id); + TabletSharedPtr tablet = + res.has_value() ? std::dynamic_pointer_cast(res.value()) : nullptr; + if (tablet) { + tablet->report_error(s); + } + } + } + + return s; +} + +Status Segment::_open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, + RowsetId rowset_id, TabletSchemaSPtr tablet_schema, + const io::FileReaderOptions& reader_options, std::shared_ptr* output, + InvertedIndexFileInfo idx_file_info) { io::FileReaderSPtr file_reader; RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options)); std::shared_ptr segment( @@ -462,7 +482,8 @@ Status Segment::_load_pk_bloom_filter() { }); } -Status Segment::load_pk_index_and_bf() { +Status Segment::load_pk_index_and_bf(OlapReaderStatistics* index_load_stats) { + _pk_index_load_stats = index_load_stats; RETURN_IF_ERROR(load_index()); RETURN_IF_ERROR(_load_pk_bloom_filter()); return Status::OK(); @@ -471,7 +492,7 @@ Status Segment::load_pk_index_and_bf() { Status Segment::load_index() { return _load_index_once.call([this] { if (_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr) { - _pk_index_reader = std::make_unique(); + _pk_index_reader = std::make_unique(_pk_index_load_stats); RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, *_pk_index_meta)); // _meta_mem_usage += _pk_index_reader->get_memory_size(); return Status::OK(); @@ -931,7 +952,7 @@ Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column, Status Segment::lookup_row_key(const Slice& key, const TabletSchema* latest_schema, bool with_seq_col, bool with_rowid, RowLocation* row_location, - std::string* encoded_seq_value) { + std::string* encoded_seq_value, OlapReaderStatistics* stats) { RETURN_IF_ERROR(load_pk_index_and_bf()); bool has_seq_col = latest_schema->has_sequence_col(); bool has_rowid = !latest_schema->cluster_key_idxes().empty(); @@ -951,7 +972,7 @@ Status Segment::lookup_row_key(const Slice& key, const TabletSchema* latest_sche } bool exact_match = false; std::unique_ptr index_iterator; - RETURN_IF_ERROR(_pk_index_reader->new_iterator(&index_iterator)); + RETURN_IF_ERROR(_pk_index_reader->new_iterator(&index_iterator, stats)); auto st = index_iterator->seek_at_or_after(&key_without_seq, &exact_match); if (!st.ok() && !st.is()) { return st; diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 13c8c86424f173d..bc5ab1e1fdc80ad 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -80,8 +80,8 @@ using SegmentSharedPtr = std::shared_ptr; // change finished, client should disable all cached Segment for old TabletSchema. class Segment : public std::enable_shared_from_this, public MetadataAdder { public: - static Status open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, - RowsetId rowset_id, TabletSchemaSPtr tablet_schema, + static Status open(io::FileSystemSPtr fs, const std::string& path, int64_t tablet_id, + uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, const io::FileReaderOptions& reader_options, std::shared_ptr* output, InvertedIndexFileInfo idx_file_info = {}); @@ -133,7 +133,8 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd Status lookup_row_key(const Slice& key, const TabletSchema* latest_schema, bool with_seq_col, bool with_rowid, RowLocation* row_location, - std::string* encoded_seq_value = nullptr); + std::string* encoded_seq_value = nullptr, + OlapReaderStatistics* stats = nullptr); Status read_key_by_rowid(uint32_t row_id, std::string* key); @@ -143,7 +144,7 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd Status load_index(); - Status load_pk_index_and_bf(); + Status load_pk_index_and_bf(OlapReaderStatistics* index_load_stats = nullptr); void update_healthy_status(Status new_status) { _healthy_status.update(new_status); } // The segment is loaded into SegmentCache and then will load indices, if there are something wrong @@ -214,6 +215,10 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd DISALLOW_COPY_AND_ASSIGN(Segment); Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, InvertedIndexFileInfo idx_file_info = InvertedIndexFileInfo()); + static Status _open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, + RowsetId rowset_id, TabletSchemaSPtr tablet_schema, + const io::FileReaderOptions& reader_options, + std::shared_ptr* output, InvertedIndexFileInfo idx_file_info); // open segment file and read the minimum amount of necessary information (footer) Status _open(); Status _parse_footer(SegmentFooterPB* footer); @@ -297,6 +302,7 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd InvertedIndexFileInfo _idx_file_info; int _be_exec_version = BeExecVersionManager::get_newest_version(); + OlapReaderStatistics* _pk_index_load_stats = nullptr; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 985cdc16e68f310..ae75ca470508437 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -497,7 +497,7 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra } Status SegmentIterator::_get_row_ranges_by_column_conditions() { - SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_ns); + SCOPED_RAW_TIMER(&_opts.stats->generate_row_ranges_ns); if (_row_bitmap.isEmpty()) { return Status::OK(); } @@ -565,7 +565,7 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row size_t pre_size = 0; { - SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_bf_ns); + SCOPED_RAW_TIMER(&_opts.stats->generate_row_ranges_by_bf_ns); // first filter data by bloom filter index // bloom filter index only use CondColumn RowRanges bf_row_ranges = RowRanges::create_single(num_rows()); @@ -588,7 +588,7 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row } { - SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_zonemap_ns); + SCOPED_RAW_TIMER(&_opts.stats->generate_row_ranges_by_zonemap_ns); RowRanges zone_map_row_ranges = RowRanges::create_single(num_rows()); // second filter data by zone map for (const auto& cid : cids) { @@ -652,7 +652,7 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row } { - SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_dict_ns); + SCOPED_RAW_TIMER(&_opts.stats->generate_row_ranges_by_dict_ns); /// Low cardinality optimization is currently not very stable, so to prevent data corruption, /// we are temporarily disabling its use in data compaction. if (_opts.io_ctx.reader_type == ReaderType::READER_QUERY) { @@ -1057,16 +1057,17 @@ Status SegmentIterator::_init_inverted_index_iterators() { return Status::OK(); } for (auto cid : _schema->column_ids()) { + // Use segment’s own index_meta, for compatibility with future indexing needs to default to lowercase. if (_inverted_index_iterators[cid] == nullptr) { - // Not check type valid, since we need to get inverted index for related variant type when reading the segment. - // If check type valid, we can not get inverted index for variant type, and result nullptr.The result for calling - // get_inverted_index with variant suffix should return corresponding inverted index meta. - bool check_inverted_index_by_type = false; - // Use segment’s own index_meta, for compatibility with future indexing needs to default to lowercase. + // In the _opts.tablet_schema, the sub-column type information for the variant is FieldType::OLAP_FIELD_TYPE_VARIANT. + // This is because the sub-column is created in create_materialized_variant_column. + // We use this column to locate the metadata for the inverted index, which requires a unique_id and path. + const auto& column = _opts.tablet_schema->column(cid); + int32_t col_unique_id = + column.is_extracted_column() ? column.parent_unique_id() : column.unique_id(); RETURN_IF_ERROR(_segment->new_inverted_index_iterator( - _opts.tablet_schema->column(cid), - _segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid), - check_inverted_index_by_type), + column, + _segment->_tablet_schema->inverted_index(col_unique_id, column.suffix_path()), _opts, &_inverted_index_iterators[cid])); } } @@ -1332,7 +1333,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); } - if (predicate->is_filter()) { + if (predicate->is_runtime_filter()) { _filter_info_id.push_back(predicate); } } @@ -1402,7 +1403,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { if (!_is_common_expr_column[cid]) { _non_predicate_columns.push_back(cid); } else { - _second_read_column_ids.push_back(cid); + _non_predicate_column_ids.push_back(cid); } } } @@ -1412,13 +1413,13 @@ Status SegmentIterator::_vec_init_lazy_materialization() { if (_lazy_materialization_read) { // insert pred cid to first_read_columns for (auto cid : pred_column_ids) { - _first_read_column_ids.push_back(cid); + _predicate_column_ids.push_back(cid); } } else if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) { // no pred exists, just read and output column for (int i = 0; i < _schema->num_column_ids(); i++) { auto cid = _schema->column_id(i); - _first_read_column_ids.push_back(cid); + _predicate_column_ids.push_back(cid); } } else { if (_is_need_vec_eval || _is_need_short_eval) { @@ -1430,18 +1431,18 @@ Status SegmentIterator::_vec_init_lazy_materialization() { _short_cir_pred_column_ids.end()); pred_id_set.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); - DCHECK(_second_read_column_ids.empty()); - // _second_read_column_ids must be empty. Otherwise _lazy_materialization_read must not false. + DCHECK(_non_predicate_column_ids.empty()); + // _non_predicate_column_ids must be empty. Otherwise _lazy_materialization_read must not false. for (int i = 0; i < _schema->num_column_ids(); i++) { auto cid = _schema->column_id(i); if (pred_id_set.find(cid) != pred_id_set.end()) { - _first_read_column_ids.push_back(cid); + _predicate_column_ids.push_back(cid); } // In the past, if schema columns > pred columns, the _lazy_materialization_read maybe == false, but // we make sure using _lazy_materialization_read= true now, so these logic may never happens. I comment // these lines and we could delete them in the future to make the code more clear. // else if (non_pred_set.find(cid) != non_pred_set.end()) { - // _first_read_column_ids.push_back(cid); + // _predicate_column_ids.push_back(cid); // // when _lazy_materialization_read = false, non-predicate column should also be filtered by sel idx, so we regard it as pred columns // _is_pred_column[cid] = true; // } @@ -1449,7 +1450,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { } else if (_is_need_expr_eval) { DCHECK(!_is_need_vec_eval && !_is_need_short_eval); for (auto cid : _common_expr_columns) { - _first_read_column_ids.push_back(cid); + _predicate_column_ids.push_back(cid); } } } @@ -1635,7 +1636,7 @@ void SegmentIterator::_output_non_pred_columns(vectorized::Block* block) { * 1. Reads a batch of rowids (up to the specified limit), and checks if they are continuous. * Continuous here means that the rowids form an unbroken sequence (e.g., 1, 2, 3, 4...). * - * 2. For each column that needs to be read (identified by _first_read_column_ids): + * 2. For each column that needs to be read (identified by _predicate_column_ids): * - If the rowids are continuous, the function uses seek_to_ordinal and next_batch * for efficient reading. * - If the rowids are not continuous, the function processes them in smaller batches @@ -1648,13 +1649,13 @@ void SegmentIterator::_output_non_pred_columns(vectorized::Block* block) { */ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32_t& nrows_read, bool set_block_rowid) { - SCOPED_RAW_TIMER(&_opts.stats->first_read_ns); + SCOPED_RAW_TIMER(&_opts.stats->predicate_column_read_ns); nrows_read = _range_iter->read_batch_rowids(_block_rowids.data(), nrows_read_limit); bool is_continuous = (nrows_read > 1) && (_block_rowids[nrows_read - 1] - _block_rowids[0] == nrows_read - 1); - for (auto cid : _first_read_column_ids) { + for (auto cid : _predicate_column_ids) { auto& column = _current_return_columns[cid]; if (_no_need_read_key_data(cid, column, nrows_read)) { continue; @@ -1679,9 +1680,9 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32 if (is_continuous) { size_t rows_read = nrows_read; - _opts.stats->block_first_read_seek_num += 1; + _opts.stats->predicate_column_read_seek_num += 1; if (_opts.runtime_state && _opts.runtime_state->enable_profile()) { - SCOPED_RAW_TIMER(&_opts.stats->block_first_read_seek_ns); + SCOPED_RAW_TIMER(&_opts.stats->predicate_column_read_seek_ns); RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_block_rowids[0])); } else { RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_block_rowids[0])); @@ -1703,9 +1704,9 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32 if (batch_continuous) { size_t rows_read = current_batch_size; - _opts.stats->block_first_read_seek_num += 1; + _opts.stats->predicate_column_read_seek_num += 1; if (_opts.runtime_state && _opts.runtime_state->enable_profile()) { - SCOPED_RAW_TIMER(&_opts.stats->block_first_read_seek_ns); + SCOPED_RAW_TIMER(&_opts.stats->predicate_column_read_seek_ns); RETURN_IF_ERROR( _column_iterators[cid]->seek_to_ordinal(_block_rowids[processed])); } else { @@ -1772,15 +1773,17 @@ uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_ } } + const uint16_t original_size = selected_size; //If all predicates are always_true, then return directly. if (all_pred_always_true || !_is_need_vec_eval) { - for (uint16_t i = 0; i < selected_size; ++i) { + for (uint16_t i = 0; i < original_size; ++i) { sel_rowid_idx[i] = i; } - return selected_size; + // All preds are always_true, so return immediately and update the profile statistics here. + _opts.stats->vec_cond_input_rows += original_size; + return original_size; } - uint16_t original_size = selected_size; _ret_flags.resize(original_size); DCHECK(!_pre_eval_block_predicate.empty()); bool is_first = true; @@ -1846,10 +1849,6 @@ uint16_t SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_ro selected_size = predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size); } - // collect profile - for (auto* p : _filter_info_id) { - _opts.stats->filter_info[p->get_filter_id()] = p->get_filtered_info(); - } _opts.stats->short_circuit_cond_input_rows += original_size; _opts.stats->rows_short_circuit_cond_filtered += original_size - selected_size; @@ -1861,6 +1860,17 @@ uint16_t SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_ro return selected_size; } +void SegmentIterator::_collect_runtime_filter_predicate() { + // collect profile + for (auto* p : _filter_info_id) { + // There is a situation, such as with in or minmax filters, + // where intermediate conversion to a key range or other types + // prevents obtaining the filter id. + if (p->get_filter_id() >= 0) { + _opts.stats->filter_info[p->get_filter_id()] = p->get_filtered_info(); + } + } +} Status SegmentIterator::_read_columns_by_rowids(std::vector& read_column_ids, std::vector& rowid_vector, uint16_t* sel_rowid_idx, size_t select_size, @@ -2068,8 +2078,8 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { RETURN_IF_ERROR(_read_columns_by_index( nrows_read_limit, _current_batch_rows_read, _lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval)); - if (std::find(_first_read_column_ids.begin(), _first_read_column_ids.end(), - _schema->version_col_idx()) != _first_read_column_ids.end()) { + if (std::find(_predicate_column_ids.begin(), _predicate_column_ids.end(), + _schema->version_col_idx()) != _predicate_column_ids.end()) { _replace_version_col(_current_batch_rows_read); } @@ -2094,7 +2104,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { if (_non_predicate_columns.empty()) { return Status::InternalError("_non_predicate_columns is empty"); } - RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids)); + RETURN_IF_ERROR(_convert_to_expected_type(_predicate_column_ids)); RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns)); _output_non_pred_columns(block); } else { @@ -2113,29 +2123,31 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { // In SSB test, it make no difference; So need more scenarios to test selected_size = _evaluate_short_circuit_predicate(_sel_rowid_idx.data(), selected_size); + _collect_runtime_filter_predicate(); if (selected_size > 0) { // step 3.1: output short circuit and predicate column - // when lazy materialization enables, _first_read_column_ids = distinct(_short_cir_pred_column_ids + _vec_pred_column_ids) + // when lazy materialization enables, _predicate_column_ids = distinct(_short_cir_pred_column_ids + _vec_pred_column_ids) // see _vec_init_lazy_materialization // todo(wb) need to tell input columnids from output columnids - RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, + RETURN_IF_ERROR(_output_column_by_sel_idx(block, _predicate_column_ids, _sel_rowid_idx.data(), selected_size)); // step 3.2: read remaining expr column and evaluate it. if (_is_need_expr_eval) { // The predicate column contains the remaining expr column, no need second read. - if (!_second_read_column_ids.empty()) { - SCOPED_RAW_TIMER(&_opts.stats->second_read_ns); + if (!_non_predicate_column_ids.empty()) { + SCOPED_RAW_TIMER(&_opts.stats->non_predicate_read_ns); RETURN_IF_ERROR(_read_columns_by_rowids( - _second_read_column_ids, _block_rowids, _sel_rowid_idx.data(), + _non_predicate_column_ids, _block_rowids, _sel_rowid_idx.data(), selected_size, &_current_return_columns)); - if (std::find(_second_read_column_ids.begin(), - _second_read_column_ids.end(), _schema->version_col_idx()) != - _second_read_column_ids.end()) { + if (std::find(_non_predicate_column_ids.begin(), + _non_predicate_column_ids.end(), + _schema->version_col_idx()) != + _non_predicate_column_ids.end()) { _replace_version_col(selected_size); } - RETURN_IF_ERROR(_convert_to_expected_type(_second_read_column_ids)); - for (auto cid : _second_read_column_ids) { + RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_column_ids)); + for (auto cid : _non_predicate_column_ids) { auto loc = _schema_block_id_map[cid]; block->replace_by_position(loc, std::move(_current_return_columns[cid])); @@ -2168,17 +2180,17 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } } } else if (_is_need_expr_eval) { - RETURN_IF_ERROR(_convert_to_expected_type(_second_read_column_ids)); - for (auto cid : _second_read_column_ids) { + RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_column_ids)); + for (auto cid : _non_predicate_column_ids) { auto loc = _schema_block_id_map[cid]; block->replace_by_position(loc, std::move(_current_return_columns[cid])); } } } else if (_is_need_expr_eval) { - DCHECK(!_first_read_column_ids.empty()); - RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids)); + DCHECK(!_predicate_column_ids.empty()); + RETURN_IF_ERROR(_convert_to_expected_type(_predicate_column_ids)); // first read all rows are insert block, initialize sel_rowid_idx to all rows. - for (auto cid : _first_read_column_ids) { + for (auto cid : _predicate_column_ids) { auto loc = _schema_block_id_map[cid]; block->replace_by_position(loc, std::move(_current_return_columns[cid])); } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index c2e2139e8ad4114..5588661302dd060 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -234,6 +234,7 @@ class SegmentIterator : public RowwiseIterator { uint32_t nrows_read_limit); uint16_t _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t selected_size); uint16_t _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, uint16_t selected_size); + void _collect_runtime_filter_predicate(); void _output_non_pred_columns(vectorized::Block* block); [[nodiscard]] Status _read_columns_by_rowids(std::vector& read_column_ids, std::vector& rowid_vector, @@ -431,8 +432,8 @@ class SegmentIterator : public RowwiseIterator { // first, read predicate columns by various index // second, read non-predicate columns // so we need a field to stand for columns first time to read - std::vector _first_read_column_ids; - std::vector _second_read_column_ids; + std::vector _predicate_column_ids; + std::vector _non_predicate_column_ids; std::vector _columns_to_filter; std::vector _converted_column_ids; std::vector _schema_block_id_map; // map from schema column id to column idx in Block diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 4301303dac9237e..db95d3b62e73d90 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -85,13 +85,14 @@ inline std::string segment_mem_tracker_name(uint32_t segment_id) { SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, DataDir* data_dir, const SegmentWriterOptions& opts, - io::FileWriterPtr inverted_file_writer) + InvertedIndexFileWriter* inverted_file_writer) : _segment_id(segment_id), _tablet_schema(std::move(tablet_schema)), _tablet(std::move(tablet)), _data_dir(data_dir), _opts(opts), _file_writer(file_writer), + _inverted_index_file_writer(inverted_file_writer), _mem_tracker(std::make_unique(segment_mem_tracker_name(segment_id))), _mow_context(std::move(opts.mow_ctx)) { CHECK_NOTNULL(file_writer); @@ -132,17 +133,6 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, } } } - if (_tablet_schema->has_inverted_index()) { - _inverted_index_file_writer = std::make_unique( - _opts.rowset_ctx->fs(), - std::string {InvertedIndexDescriptor::get_index_file_path_prefix( - file_writer->path().c_str())}, - _opts.rowset_ctx->rowset_id.to_string(), segment_id, - _tablet_schema->get_inverted_index_storage_format(), - std::move(inverted_file_writer)); - _inverted_index_file_writer->set_file_writer_opts( - _opts.rowset_ctx->get_file_writer_options()); - } } SegmentWriter::~SegmentWriter() { @@ -217,21 +207,21 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co if (_opts.write_type == DataWriteType::TYPE_DIRECT && schema->skip_write_index_on_load()) { skip_inverted_index = true; } - // indexes for this column - opts.indexes = schema->get_indexes_for_column(column); + if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) { opts.need_zone_map = false; opts.need_bloom_filter = false; opts.need_bitmap_index = false; } - opts.inverted_index_file_writer = _inverted_index_file_writer.get(); - for (const auto* index : opts.indexes) { - if (!skip_inverted_index && index->index_type() == IndexType::INVERTED) { - opts.inverted_index = index; - opts.need_inverted_index = true; - // TODO support multiple inverted index - break; - } + + // indexes for this column + if (const auto& index = schema->inverted_index(column); + index != nullptr && !skip_inverted_index) { + opts.inverted_index = index; + opts.need_inverted_index = true; + DCHECK(_inverted_index_file_writer != nullptr); + opts.inverted_index_file_writer = _inverted_index_file_writer; + // TODO support multiple inverted index } #define CHECK_FIELD_TYPE(TYPE, type_name) \ if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ @@ -255,6 +245,35 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co #undef CHECK_FIELD_TYPE + if (_opts.rowset_ctx != nullptr) { + int64_t storage_page_size = _opts.rowset_ctx->storage_page_size; + // storage_page_size must be between 4KB and 10MB. + if (storage_page_size >= 4096 && storage_page_size <= 10485760) { + opts.data_page_size = storage_page_size; + } + } + DBUG_EXECUTE_IF("VerticalSegmentWriter._create_column_writer.storage_page_size", { + auto table_id = DebugPoints::instance()->get_debug_param_or_default( + "VerticalSegmentWriter._create_column_writer.storage_page_size", "table_id", + INT_MIN); + auto target_data_page_size = DebugPoints::instance()->get_debug_param_or_default( + "VerticalSegmentWriter._create_column_writer.storage_page_size", + "storage_page_size", INT_MIN); + if (table_id == INT_MIN || target_data_page_size == INT_MIN) { + return Status::Error( + "Debug point parameters missing: either 'table_id' or 'storage_page_size' not " + "set."); + } + if (table_id == _tablet_schema->table_id() && + opts.data_page_size != target_data_page_size) { + return Status::Error( + "Mismatch in 'storage_page_size': expected size does not match the current " + "data page size. " + "Expected: " + + std::to_string(target_data_page_size) + + ", Actual: " + std::to_string(opts.data_page_size) + "."); + } + }) if (column.is_row_store_column()) { // smaller page size for row store column auto page_size = _tablet_schema->row_store_page_size(); @@ -1025,10 +1044,6 @@ Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) { if (*segment_file_size == 0) { return Status::Corruption("Bad segment, file size = 0"); } - if (_inverted_index_file_writer != nullptr) { - RETURN_IF_ERROR(_inverted_index_file_writer->close()); - _inverted_index_file_info = _inverted_index_file_writer->get_index_file_info(); - } return Status::OK(); } @@ -1269,13 +1284,6 @@ Status SegmentWriter::_generate_short_key_index( return Status::OK(); } -int64_t SegmentWriter::get_inverted_index_total_size() { - if (_inverted_index_file_writer != nullptr) { - return _inverted_index_file_writer->get_index_file_total_size(); - } - return 0; -} - inline bool SegmentWriter::_is_mow() { return _tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write; } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index bde087e0ed0d9e2..9a8af131087f92f 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -34,6 +34,7 @@ #include "gutil/strings/substitute.h" #include "olap/olap_define.h" #include "olap/rowset/segment_v2/column_writer.h" +#include "olap/rowset/segment_v2/inverted_index_file_writer.h" #include "olap/tablet.h" #include "olap/tablet_schema.h" #include "util/faststring.h" @@ -61,7 +62,6 @@ class FileWriter; } // namespace io namespace segment_v2 { -class InvertedIndexFileWriter; extern const char* k_segment_magic; extern const uint32_t k_segment_magic_length; @@ -84,7 +84,7 @@ class SegmentWriter { explicit SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, DataDir* data_dir, const SegmentWriterOptions& opts, - io::FileWriterPtr inverted_file_writer = nullptr); + InvertedIndexFileWriter* inverted_file_writer); ~SegmentWriter(); Status init(); @@ -113,9 +113,6 @@ class SegmentWriter { uint64_t estimate_segment_size(); - InvertedIndexFileInfo get_inverted_index_file_info() const { return _inverted_index_file_info; } - int64_t get_inverted_index_total_size(); - uint32_t num_rows_written() const { return _num_rows_written; } // for partial update @@ -147,6 +144,17 @@ class SegmentWriter { void set_mow_context(std::shared_ptr mow_context); + Status close_inverted_index(int64_t* inverted_index_file_size) { + // no inverted index + if (_inverted_index_file_writer == nullptr) { + *inverted_index_file_size = 0; + return Status::OK(); + } + RETURN_IF_ERROR(_inverted_index_file_writer->close()); + *inverted_index_file_size = _inverted_index_file_writer->get_index_file_total_size(); + return Status::OK(); + } + private: DISALLOW_COPY_AND_ASSIGN(SegmentWriter); Status _create_column_writer(uint32_t cid, const TabletColumn& column, @@ -202,13 +210,15 @@ class SegmentWriter { // Not owned. owned by RowsetWriter or SegmentFlusher io::FileWriter* _file_writer = nullptr; - std::unique_ptr _inverted_index_file_writer; + // Not owned. owned by RowsetWriter or SegmentFlusher + InvertedIndexFileWriter* _inverted_index_file_writer = nullptr; + SegmentFooterPB _footer; // for mow tables with cluster key, the sort key is the cluster keys not unique keys // for other tables, the sort key is the keys size_t _num_sort_key_columns; size_t _num_short_key_columns; - InvertedIndexFileInfo _inverted_index_file_info; + std::unique_ptr _short_key_index_builder; std::unique_ptr _primary_key_index_builder; std::vector> _column_writers; diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index ce033cdd0022d01..0ac9f349769cd40 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -90,13 +90,14 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, DataDir* data_dir, const VerticalSegmentWriterOptions& opts, - io::FileWriterPtr inverted_file_writer) + InvertedIndexFileWriter* inverted_file_writer) : _segment_id(segment_id), _tablet_schema(std::move(tablet_schema)), _tablet(std::move(tablet)), _data_dir(data_dir), _opts(opts), _file_writer(file_writer), + _inverted_index_file_writer(inverted_file_writer), _mem_tracker(std::make_unique( vertical_segment_writer_mem_tracker_name(segment_id))), _mow_context(std::move(opts.mow_ctx)) { @@ -138,17 +139,6 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 } } } - if (_tablet_schema->has_inverted_index()) { - _inverted_index_file_writer = std::make_unique( - _opts.rowset_ctx->fs(), - std::string {InvertedIndexDescriptor::get_index_file_path_prefix( - _opts.rowset_ctx->segment_path(segment_id))}, - _opts.rowset_ctx->rowset_id.to_string(), segment_id, - _tablet_schema->get_inverted_index_storage_format(), - std::move(inverted_file_writer)); - _inverted_index_file_writer->set_file_writer_opts( - _opts.rowset_ctx->get_file_writer_options()); - } } VerticalSegmentWriter::~VerticalSegmentWriter() { @@ -211,22 +201,20 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo tablet_schema->skip_write_index_on_load()) { skip_inverted_index = true; } - // indexes for this column - opts.indexes = tablet_schema->get_indexes_for_column(column); + if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) { opts.need_zone_map = false; opts.need_bloom_filter = false; opts.need_bitmap_index = false; } - for (const auto* index : opts.indexes) { - if (!skip_inverted_index && index->index_type() == IndexType::INVERTED) { - opts.inverted_index = index; - opts.need_inverted_index = true; - // TODO support multiple inverted index - break; - } + if (const auto& index = tablet_schema->inverted_index(column); + index != nullptr && !skip_inverted_index) { + opts.inverted_index = index; + opts.need_inverted_index = true; + DCHECK(_inverted_index_file_writer != nullptr); + opts.inverted_index_file_writer = _inverted_index_file_writer; + // TODO support multiple inverted index } - opts.inverted_index_file_writer = _inverted_index_file_writer.get(); #define CHECK_FIELD_TYPE(TYPE, type_name) \ if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ @@ -250,6 +238,35 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo #undef CHECK_FIELD_TYPE + if (_opts.rowset_ctx != nullptr) { + int64_t storage_page_size = _opts.rowset_ctx->storage_page_size; + // storage_page_size must be between 4KB and 10MB. + if (storage_page_size >= 4096 && storage_page_size <= 10485760) { + opts.data_page_size = storage_page_size; + } + } + DBUG_EXECUTE_IF("VerticalSegmentWriter._create_column_writer.storage_page_size", { + auto table_id = DebugPoints::instance()->get_debug_param_or_default( + "VerticalSegmentWriter._create_column_writer.storage_page_size", "table_id", + INT_MIN); + auto target_data_page_size = DebugPoints::instance()->get_debug_param_or_default( + "VerticalSegmentWriter._create_column_writer.storage_page_size", + "storage_page_size", INT_MIN); + if (table_id == INT_MIN || target_data_page_size == INT_MIN) { + return Status::Error( + "Debug point parameters missing: either 'table_id' or 'storage_page_size' not " + "set."); + } + if (table_id == _tablet_schema->table_id() && + opts.data_page_size != target_data_page_size) { + return Status::Error( + "Mismatch in 'storage_page_size': expected size does not match the current " + "data page size. " + "Expected: " + + std::to_string(target_data_page_size) + + ", Actual: " + std::to_string(opts.data_page_size) + "."); + } + }) if (column.is_row_store_column()) { // smaller page size for row store column auto page_size = _tablet_schema->row_store_page_size(); @@ -1052,8 +1069,10 @@ Status VerticalSegmentWriter::_append_block_with_variant_subcolumns(RowsInBlock& _opts.rowset_ctx->merged_tablet_schema = _opts.rowset_ctx->tablet_schema; } TabletSchemaSPtr update_schema; + bool check_schema_size = true; RETURN_IF_ERROR(vectorized::schema_util::get_least_common_schema( - {_opts.rowset_ctx->merged_tablet_schema, _flush_schema}, nullptr, update_schema)); + {_opts.rowset_ctx->merged_tablet_schema, _flush_schema}, nullptr, update_schema, + check_schema_size)); CHECK_GE(update_schema->num_columns(), _flush_schema->num_columns()) << "Rowset merge schema columns count is " << update_schema->num_columns() << ", but flush_schema is larger " << _flush_schema->num_columns() @@ -1386,9 +1405,6 @@ Status VerticalSegmentWriter::finalize_columns_index(uint64_t* index_size) { *index_size = _file_writer->bytes_appended() - index_start; } - if (_inverted_index_file_writer != nullptr) { - _inverted_index_file_info = _inverted_index_file_writer->get_index_file_info(); - } // reset all column writers and data_conveter clear(); @@ -1463,9 +1479,6 @@ Status VerticalSegmentWriter::_write_inverted_index() { for (auto& column_writer : _column_writers) { RETURN_IF_ERROR(column_writer->write_inverted_index()); } - if (_inverted_index_file_writer != nullptr) { - RETURN_IF_ERROR(_inverted_index_file_writer->close()); - } return Status::OK(); } @@ -1552,13 +1565,6 @@ void VerticalSegmentWriter::_set_max_key(const Slice& key) { _max_key.append(key.get_data(), key.get_size()); } -int64_t VerticalSegmentWriter::get_inverted_index_total_size() { - if (_inverted_index_file_writer != nullptr) { - return _inverted_index_file_writer->get_index_file_total_size(); - } - return 0; -} - inline bool VerticalSegmentWriter::_is_mow() { return _tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write; } diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h index 881a6cee5b41e16..951e9c2e2838c3d 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h @@ -34,6 +34,7 @@ #include "gutil/strings/substitute.h" #include "olap/olap_define.h" #include "olap/rowset/segment_v2/column_writer.h" +#include "olap/rowset/segment_v2/inverted_index_file_writer.h" #include "olap/tablet.h" #include "olap/tablet_schema.h" #include "util/faststring.h" @@ -82,7 +83,7 @@ class VerticalSegmentWriter { explicit VerticalSegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, DataDir* data_dir, const VerticalSegmentWriterOptions& opts, - io::FileWriterPtr inverted_file_writer = nullptr); + InvertedIndexFileWriter* inverted_file_writer); ~VerticalSegmentWriter(); VerticalSegmentWriter(const VerticalSegmentWriter&) = delete; @@ -99,9 +100,7 @@ class VerticalSegmentWriter { [[nodiscard]] std::string data_dir_path() const { return _data_dir == nullptr ? "" : _data_dir->path(); } - [[nodiscard]] InvertedIndexFileInfo get_inverted_index_file_info() const { - return _inverted_index_file_info; - } + [[nodiscard]] uint32_t num_rows_written() const { return _num_rows_written; } // for partial update @@ -122,10 +121,19 @@ class VerticalSegmentWriter { TabletSchemaSPtr flush_schema() const { return _flush_schema; }; - int64_t get_inverted_index_total_size(); - void clear(); + Status close_inverted_index(int64_t* inverted_index_file_size) { + // no inverted index + if (_inverted_index_file_writer == nullptr) { + *inverted_index_file_size = 0; + return Status::OK(); + } + RETURN_IF_ERROR(_inverted_index_file_writer->close()); + *inverted_index_file_size = _inverted_index_file_writer->get_index_file_total_size(); + return Status::OK(); + } + private: void _init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const TabletColumn& column); Status _create_column_writer(uint32_t cid, const TabletColumn& column, @@ -213,14 +221,15 @@ class VerticalSegmentWriter { // Not owned. owned by RowsetWriter io::FileWriter* _file_writer = nullptr; - std::unique_ptr _inverted_index_file_writer; + // Not owned. owned by RowsetWriter or SegmentFlusher + InvertedIndexFileWriter* _inverted_index_file_writer = nullptr; SegmentFooterPB _footer; // for mow tables with cluster key, the sort key is the cluster keys not unique keys // for other tables, the sort key is the keys size_t _num_sort_key_columns; size_t _num_short_key_columns; - InvertedIndexFileInfo _inverted_index_file_info; + std::unique_ptr _short_key_index_builder; std::unique_ptr _primary_key_index_builder; std::vector> _column_writers; diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index ced0fb880c41fba..46070f8dccd7ce1 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -138,7 +138,6 @@ Status VerticalBetaRowsetWriter::_flush_columns(segment_v2::SegmentWriter* se this->_segment_num_rows.resize(_cur_writer_idx + 1); this->_segment_num_rows[_cur_writer_idx] = _segment_writers[_cur_writer_idx]->row_count(); } - this->_total_index_size += static_cast(index_size); return Status::OK(); } @@ -164,26 +163,28 @@ Status VerticalBetaRowsetWriter::_create_segment_writer( int seg_id = this->_num_segment.fetch_add(1, std::memory_order_relaxed); - io::FileWriterPtr file_writer; - io::FileWriterOptions opts = this->_context.get_file_writer_options(); + io::FileWriterPtr segment_file_writer; + RETURN_IF_ERROR(BaseBetaRowsetWriter::create_file_writer(seg_id, segment_file_writer)); + DCHECK(segment_file_writer != nullptr); - auto path = context.segment_path(seg_id); - auto& fs = context.fs_ref(); - Status st = fs.create_file(path, &file_writer, &opts); - if (!st.ok()) { - LOG(WARNING) << "failed to create writable file. path=" << path << ", err: " << st; - return st; + InvertedIndexFileWriterPtr inverted_index_file_writer; + if (context.tablet_schema->has_inverted_index()) { + RETURN_IF_ERROR(RowsetWriter::create_inverted_index_file_writer( + seg_id, &inverted_index_file_writer)); } - DCHECK(file_writer != nullptr); segment_v2::SegmentWriterOptions writer_options; writer_options.enable_unique_key_merge_on_write = context.enable_unique_key_merge_on_write; writer_options.rowset_ctx = &context; writer_options.max_rows_per_segment = context.max_rows_per_segment; - *writer = std::make_unique(file_writer.get(), seg_id, - context.tablet_schema, context.tablet, - context.data_dir, writer_options); - RETURN_IF_ERROR(this->_seg_files.add(seg_id, std::move(file_writer))); + *writer = std::make_unique( + segment_file_writer.get(), seg_id, context.tablet_schema, context.tablet, + context.data_dir, writer_options, inverted_index_file_writer.get()); + + RETURN_IF_ERROR(this->_seg_files.add(seg_id, std::move(segment_file_writer))); + if (context.tablet_schema->has_inverted_index()) { + RETURN_IF_ERROR(this->_idx_files.add(seg_id, std::move(inverted_index_file_writer))); + } auto s = (*writer)->init(column_ids, is_key); if (!s.ok()) { @@ -205,10 +206,7 @@ Status VerticalBetaRowsetWriter::final_flush() { LOG(WARNING) << "Fail to finalize segment footer, " << st; return st; } - this->_total_data_size += segment_size + segment_writer->get_inverted_index_total_size(); - this->_total_index_size += segment_writer->get_inverted_index_total_size(); - this->_idx_files_info.add_file_info(segment_writer->get_segment_id(), - segment_writer->get_inverted_index_file_info()); + this->_total_data_size += segment_size; segment_writer.reset(); } return Status::OK(); @@ -217,6 +215,7 @@ Status VerticalBetaRowsetWriter::final_flush() { template requires std::is_base_of_v Status VerticalBetaRowsetWriter::_close_file_writers() { + RETURN_IF_ERROR(BaseBetaRowsetWriter::_close_inverted_index_file_writers()); return this->_seg_files.close(); } diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp index 112986d31d9bb8b..8141fad0a8d2809 100644 --- a/be/src/olap/rowset_builder.cpp +++ b/be/src/olap/rowset_builder.cpp @@ -232,6 +232,7 @@ Status RowsetBuilder::init() { context.mow_context = mow_context; context.write_file_cache = _req.write_file_cache; context.partial_update_info = _partial_update_info; + context.storage_page_size = _tablet->tablet_meta()->storage_page_size(); _rowset_writer = DORIS_TRY(_tablet->create_rowset_writer(context, false)); _pending_rs_guard = _engine.pending_local_rowsets().add(context.rowset_id); @@ -258,6 +259,17 @@ Status BaseRowsetBuilder::submit_calc_delete_bitmap_task() { } std::lock_guard l(_lock); SCOPED_TIMER(_submit_delete_bitmap_timer); + if (_partial_update_info && _partial_update_info->is_flexible_partial_update()) { + if (_rowset->num_segments() > 1) { + // in flexible partial update, when there are more one segment in one load, + // we need to do alignment process for same keys between segments, we haven't + // implemented it yet and just report an error when encouter this situation + return Status::NotSupported( + "too large input data in flexible partial update, Please " + "reduce the amount of data imported in a single load."); + } + } + // tablet is under alter process. The delete bitmap will be calculated after conversion. if (_tablet->tablet_state() == TABLET_NOTREADY) { LOG(INFO) << "tablet is under alter process, delete bitmap will be calculated later, " diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 3b77ac19e2e5d28..5ef85dbaf11c19a 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -81,6 +81,7 @@ #include "vec/columns/column.h" #include "vec/columns/column_nullable.h" #include "vec/common/assert_cast.h" +#include "vec/common/schema_util.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/exprs/vexpr.h" @@ -1367,13 +1368,9 @@ Status SchemaChangeJob::parse_request(const SchemaChangeParams& sc_params, *sc_directly = true; return Status::OK(); } else if (column_mapping->ref_column_idx >= 0) { - const auto& column_new = new_tablet_schema->column(i); - const auto& column_old = base_tablet_schema->column(column_mapping->ref_column_idx); // index changed - if (column_new.is_bf_column() != column_old.is_bf_column() || - column_new.has_bitmap_index() != column_old.has_bitmap_index() || - new_tablet_schema->has_inverted_index(column_new) != - base_tablet_schema->has_inverted_index(column_old)) { + if (vectorized::schema_util::has_schema_index_diff( + new_tablet_schema, base_tablet_schema, i, column_mapping->ref_column_idx)) { *sc_directly = true; return Status::OK(); } diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index abc82c6f3ee98d3..26ac54c699b81a4 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -17,6 +17,8 @@ #include "olap/segment_loader.h" +#include + #include "common/config.h" #include "common/status.h" #include "olap/olap_define.h" @@ -52,7 +54,8 @@ void SegmentCache::erase(const SegmentCache::CacheKey& key) { Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, bool use_cache, - bool need_load_pk_index_and_bf) { + bool need_load_pk_index_and_bf, + OlapReaderStatistics* index_load_stats) { if (cache_handle->is_inited()) { return Status::OK(); } @@ -70,7 +73,7 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, segment_v2::SegmentSharedPtr segment; RETURN_IF_ERROR(rowset->load_segment(i, &segment)); if (need_load_pk_index_and_bf) { - RETURN_IF_ERROR(segment->load_pk_index_and_bf()); + RETURN_IF_ERROR(segment->load_pk_index_and_bf(index_load_stats)); } if (use_cache && !config::disable_segment_cache) { // memory of SegmentCache::CacheValue will be handled by SegmentCache diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index b3b88fa7700409e..834906da93bf740 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -117,7 +117,8 @@ class SegmentLoader { // Load segments of "rowset", return the "cache_handle" which contains segments. // If use_cache is true, it will be loaded from _cache. Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, - bool use_cache = false, bool need_load_pk_index_and_bf = false); + bool use_cache = false, bool need_load_pk_index_and_bf = false, + OlapReaderStatistics* index_load_stats = nullptr); void erase_segment(const SegmentCache::CacheKey& key); diff --git a/be/src/olap/single_replica_compaction.cpp b/be/src/olap/single_replica_compaction.cpp index ef93ab25caeac9f..7470afe0ef62c72 100644 --- a/be/src/olap/single_replica_compaction.cpp +++ b/be/src/olap/single_replica_compaction.cpp @@ -149,11 +149,15 @@ Status SingleReplicaCompaction::_do_single_replica_compaction_impl() { LOG(INFO) << "succeed to do single replica compaction" << ". tablet=" << _tablet->tablet_id() << ", output_version=" << _output_version << ", current_max_version=" << current_max_version - << ", input_rowset_size=" << _input_rowsets_size + << ", input_rowsets_data_size=" << _input_rowsets_data_size + << ", input_rowsets_index_size=" << _input_rowsets_index_size + << ", input_rowsets_total_size=" << _input_rowsets_total_size << ", input_row_num=" << _input_row_num << ", input_segments_num=" << _input_num_segments - << ", _input_index_size=" << _input_index_size + << ", _input_index_size=" << _input_rowsets_index_size << ", output_rowset_data_size=" << _output_rowset->data_disk_size() + << ", output_rowset_index_size=" << _output_rowset->index_disk_size() + << ", output_rowset_total_size=" << _output_rowset->total_disk_size() << ", output_row_num=" << _output_rowset->num_rows() << ", output_segments_num=" << _output_rowset->num_segments(); return Status::OK(); @@ -264,10 +268,11 @@ bool SingleReplicaCompaction::_find_rowset_to_fetch(const std::vector& return false; } for (auto& rowset : _input_rowsets) { - _input_rowsets_size += rowset->data_disk_size(); + _input_rowsets_data_size += rowset->data_disk_size(); _input_row_num += rowset->num_rows(); _input_num_segments += rowset->num_segments(); - _input_index_size += rowset->index_disk_size(); + _input_rowsets_index_size += rowset->index_disk_size(); + _input_rowsets_total_size += rowset->data_disk_size() + rowset->index_disk_size(); } _output_version = *proper_version; } diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 2cfa9a8e8b763d0..67205835b53947b 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -698,11 +698,8 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet if (tablet_schema.get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : tablet_schema.indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : tablet_schema.inverted_indexes()) { + auto index_id = index->index_id(); auto index_file = ref_tablet->get_segment_index_filepath( rowset_id, segment_index, index_id); auto snapshot_segment_index_file_path = diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 9dfb7940dcc9168..b6b81811091d947 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ #include #include #include +#include #include #include "common/compiler_util.h" // IWYU pragma: keep @@ -86,6 +88,7 @@ #include "olap/rowset/beta_rowset.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_fwd.h" #include "olap/rowset/rowset_meta.h" #include "olap/rowset/rowset_meta_manager.h" #include "olap/rowset/rowset_writer.h" @@ -329,6 +332,7 @@ Status Tablet::init() { // should save tablet meta to remote meta store // if it's a primary replica void Tablet::save_meta() { + check_table_size_correctness(); auto res = _tablet_meta->save_meta(_data_dir); CHECK_EQ(res, Status::OK()) << "fail to save tablet_meta. res=" << res << ", root=" << _data_dir->path(); @@ -1201,10 +1205,6 @@ Status Tablet::_contains_version(const Version& version) { return Status::OK(); } -TabletInfo Tablet::get_tablet_info() const { - return TabletInfo(tablet_id(), tablet_uid()); -} - std::vector Tablet::pick_candidate_rowsets_to_cumulative_compaction() { std::vector candidate_rowsets; if (_cumulative_point == K_INVALID_CUMULATIVE_POINT) { @@ -1272,7 +1272,7 @@ std::vector Tablet::pick_candidate_rowsets_to_build_inverted_in std::shared_lock rlock(_meta_lock); auto has_alter_inverted_index = [&](RowsetSharedPtr rowset) -> bool { for (const auto& index_id : alter_index_uids) { - if (rowset->tablet_schema()->has_inverted_index_with_index_id(index_id, "")) { + if (rowset->tablet_schema()->has_inverted_index_with_index_id(index_id)) { return true; } } @@ -1694,6 +1694,19 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info, } } +void Tablet::report_error(const Status& st) { + if (st.is()) { + ++_io_error_times; + } else if (st.is()) { + _io_error_times = config::max_tablet_io_errors + 1; + } else if (st.is()) { + check_tablet_path_exists(); + if (!_is_tablet_path_exists.load(std::memory_order_relaxed)) { + _io_error_times = config::max_tablet_io_errors + 1; + } + } +} + Status Tablet::prepare_compaction_and_calculate_permits( CompactionType compaction_type, const TabletSharedPtr& tablet, std::shared_ptr& compaction, int64_t& permits) { @@ -2034,8 +2047,8 @@ Status Tablet::_cooldown_data(RowsetSharedPtr rowset) { LOG(INFO) << "Upload rowset " << old_rowset->version() << " " << new_rowset_id.to_string() << " to " << storage_resource.fs->root_path().native() << ", tablet_id=" << tablet_id() << ", duration=" << duration.count() - << ", capacity=" << old_rowset->data_disk_size() - << ", tp=" << old_rowset->data_disk_size() / duration.count() + << ", capacity=" << old_rowset->total_disk_size() + << ", tp=" << old_rowset->total_disk_size() / duration.count() << ", old rowset_id=" << old_rowset->rowset_id().to_string(); // gen a new rowset @@ -2414,7 +2427,7 @@ RowsetSharedPtr Tablet::need_cooldown(int64_t* cooldown_timestamp, size_t* file_ // current time or it's datatime is less than current time if (newest_cooldown_time != 0 && newest_cooldown_time < UnixSeconds()) { *cooldown_timestamp = newest_cooldown_time; - *file_size = rowset->data_disk_size(); + *file_size = rowset->total_disk_size(); VLOG_DEBUG << "tablet need cooldown, tablet id: " << tablet_id() << " file_size: " << *file_size; return rowset; @@ -2637,12 +2650,9 @@ void Tablet::gc_binlogs(int64_t version) { // add binlog segment files and index files for (int64_t i = 0; i < num_segments; ++i) { wait_for_deleted_binlog_files.emplace_back(get_segment_filepath(rowset_id, i)); - for (const auto& index : this->tablet_schema()->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } + for (const auto& index : this->tablet_schema()->inverted_indexes()) { wait_for_deleted_binlog_files.emplace_back( - get_segment_index_filepath(rowset_id, i, index.index_id())); + get_segment_index_filepath(rowset_id, i, index->index_id())); } } }; @@ -2724,4 +2734,124 @@ void Tablet::clear_cache() { } } +void Tablet::check_table_size_correctness() { + if (!config::enable_table_size_correctness_check) { + return; + } + const std::vector& all_rs_metas = _tablet_meta->all_rs_metas(); + for (const auto& rs_meta : all_rs_metas) { + int64_t total_segment_size = get_segment_file_size(rs_meta); + int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta); + if (rs_meta->data_disk_size() != total_segment_size || + rs_meta->index_disk_size() != total_inverted_index_size || + rs_meta->data_disk_size() + rs_meta->index_disk_size() != rs_meta->total_disk_size()) { + LOG(WARNING) << "[Local table table size check failed]:" + << " tablet id: " << rs_meta->tablet_id() + << ", rowset id:" << rs_meta->rowset_id() + << ", rowset data disk size:" << rs_meta->data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta->index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta->total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path( + rs_meta->tablet_id(), rs_meta->rowset_id().to_string(), 0); + DCHECK(false); + } + } +} + +std::string Tablet::get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t seg_id) { + std::string segment_path; + if (rs_meta->is_local()) { + segment_path = local_segment_path(_tablet_path, rs_meta->rowset_id().to_string(), seg_id); + } else { + segment_path = rs_meta->remote_storage_resource().value()->remote_segment_path( + rs_meta->tablet_id(), rs_meta->rowset_id().to_string(), seg_id); + } + return segment_path; +} + +int64_t Tablet::get_segment_file_size(const RowsetMetaSharedPtr& rs_meta) { + const auto& fs = rs_meta->fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id(); + } + int64_t total_segment_size = 0; + for (int64_t seg_id = 0; seg_id < rs_meta->num_segments(); seg_id++) { + std::string segment_path = get_segment_path(rs_meta, seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + LOG(WARNING) << "table size correctness check get segment size failed! msg:" + << st.to_string() << ", segment path:" << segment_path; + } + total_segment_size += segment_file_size; + } + return total_segment_size; +} + +int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta) { + const auto& fs = rs_meta->fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id(); + } + int64_t total_inverted_index_size = 0; + + if (rs_meta->tablet_schema()->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + const auto& indices = rs_meta->tablet_schema()->inverted_indexes(); + for (auto& index : indices) { + for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) { + std::string segment_path = get_segment_path(rs_meta, seg_id); + int64_t file_size = 0; + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), + index->index_id(), index->get_index_suffix()); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + LOG(WARNING) << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", table size correctness check get inverted index v1 " + "size failed! msg:" + << st.to_string() + << ", inverted index path:" << inverted_index_file_path; + } + total_inverted_index_size += file_size; + } + } + } else { + for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) { + int64_t file_size = 0; + std::string segment_path = get_segment_path(rs_meta, seg_id); + std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_path_v2( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + if (st.is()) { + LOG(INFO) << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", table size correctness check get inverted index v2 failed " + "because file not exist:" + << inverted_index_file_path; + } else { + LOG(WARNING) << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", table size correctness check get inverted index v2 " + "size failed! msg:" + << st.to_string() + << ", inverted index path:" << inverted_index_file_path; + } + } + total_inverted_index_size += file_size; + } + } + return total_inverted_index_size; +} + } // namespace doris diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 2b4daa5a4c35ac8..f5866c67641581c 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -115,7 +115,6 @@ class Tablet final : public BaseTablet { DataDir* data_dir() const { return _data_dir; } int64_t replica_id() const { return _tablet_meta->replica_id(); } - TabletUid tablet_uid() const { return _tablet_meta->tablet_uid(); } const std::string& tablet_path() const { return _tablet_path; } @@ -279,8 +278,6 @@ class Tablet final : public BaseTablet { void check_tablet_path_exists(); - TabletInfo get_tablet_info() const; - std::vector pick_candidate_rowsets_to_cumulative_compaction(); std::vector pick_candidate_rowsets_to_base_compaction(); std::vector pick_candidate_rowsets_to_full_compaction(); @@ -451,13 +448,7 @@ class Tablet final : public BaseTablet { void gc_binlogs(int64_t version); Status ingest_binlog_metas(RowsetBinlogMetasPB* metas_pb); - inline void report_error(const Status& st) { - if (st.is()) { - ++_io_error_times; - } else if (st.is()) { - _io_error_times = config::max_tablet_io_errors + 1; - } - } + void report_error(const Status& st); inline int64_t get_io_error_times() const { return _io_error_times; } @@ -540,6 +531,10 @@ class Tablet final : public BaseTablet { //////////////////////////////////////////////////////////////////////////// void _clear_cache_by_rowset(const BetaRowsetSharedPtr& rowset); + void check_table_size_correctness(); + std::string get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t seg_id); + int64_t get_segment_file_size(const RowsetMetaSharedPtr& rs_meta); + int64_t get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta); public: static const int64_t K_INVALID_CUMULATIVE_POINT = -1; diff --git a/be/src/olap/tablet_column_object_pool.cpp b/be/src/olap/tablet_column_object_pool.cpp new file mode 100644 index 000000000000000..6e07fb4e831e60e --- /dev/null +++ b/be/src/olap/tablet_column_object_pool.cpp @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/tablet_column_object_pool.h" + +#include +#include + +#include "olap/tablet_schema.h" + +namespace doris { + +bvar::Adder g_tablet_column_cache_count("tablet_column_cache_count"); +bvar::Adder g_tablet_column_cache_hit_count("tablet_column_cache_hit_count"); + +std::pair TabletColumnObjectPool::insert(const std::string& key) { + auto* lru_handle = lookup(key); + TabletColumnPtr tablet_column_ptr; + if (lru_handle) { + auto* value = (CacheValue*)LRUCachePolicy::value(lru_handle); + tablet_column_ptr = value->tablet_column; + VLOG_DEBUG << "reuse column "; + g_tablet_column_cache_hit_count << 1; + } else { + auto* value = new CacheValue; + tablet_column_ptr = std::make_shared(); + ColumnPB pb; + pb.ParseFromString(key); + tablet_column_ptr->init_from_pb(pb); + VLOG_DEBUG << "create column "; + value->tablet_column = tablet_column_ptr; + lru_handle = LRUCachePolicy::insert(key, value, 1, 0, CachePriority::NORMAL); + g_tablet_column_cache_count << 1; + } + DCHECK(lru_handle != nullptr); + return {lru_handle, tablet_column_ptr}; +} + +TabletColumnObjectPool::CacheValue::~CacheValue() { + g_tablet_column_cache_count << -1; +} + +} // namespace doris diff --git a/be/src/olap/tablet_column_object_pool.h b/be/src/olap/tablet_column_object_pool.h new file mode 100644 index 000000000000000..1eead6a25c9609a --- /dev/null +++ b/be/src/olap/tablet_column_object_pool.h @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "olap/tablet_fwd.h" +#include "olap/tablet_schema.h" +#include "runtime/exec_env.h" +#include "runtime/memory/lru_cache_policy.h" + +namespace doris { + +// TabletColumnObjectPool is a cache for TabletColumn objects. It is used to reduce memory consumption +// when there are a large number of identical TabletColumns in the cluster, which usually occurs +// when VARIANT type columns are modified and added, each Rowset has an individual TabletSchema. +// Excessive TabletSchemas can lead to significant memory overhead. Reusing memory for identical +// TabletColumns would greatly reduce this memory consumption. + +class TabletColumnObjectPool : public LRUCachePolicy { +public: + TabletColumnObjectPool(size_t capacity) + : LRUCachePolicy(CachePolicy::CacheType::TABLET_COLUMN_OBJECT_POOL, capacity, + LRUCacheType::NUMBER, config::tablet_schema_cache_recycle_interval) {} + + static TabletColumnObjectPool* create_global_column_cache(size_t capacity) { + auto* res = new TabletColumnObjectPool(capacity); + return res; + } + + static TabletColumnObjectPool* instance() { + return ExecEnv::GetInstance()->get_tablet_column_object_pool(); + } + + std::pair insert(const std::string& key); + +private: + class CacheValue : public LRUCacheValueBase { + public: + ~CacheValue() override; + TabletColumnPtr tablet_column; + }; +}; + +} // namespace doris diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 64eb408c9e3dbde..b853401855ce940 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -101,7 +101,9 @@ TabletManager::TabletManager(StorageEngine& engine, int32_t tablet_map_lock_shar } TabletManager::~TabletManager() { +#ifndef BE_TEST DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption); +#endif } Status TabletManager::_add_tablet_unlocked(TTabletId tablet_id, const TabletSharedPtr& tablet, diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 97e74211504d58b..d153069babdfccc 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -97,7 +97,8 @@ TabletMetaSharedPtr TabletMeta::create( request.time_series_compaction_file_count_threshold, request.time_series_compaction_time_threshold_seconds, request.time_series_compaction_empty_rowsets_threshold, - request.time_series_compaction_level_threshold, inverted_index_file_storage_format); + request.time_series_compaction_level_threshold, inverted_index_file_storage_format, + request.storage_page_size); } TabletMeta::TabletMeta() @@ -118,7 +119,8 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id int64_t time_series_compaction_time_threshold_seconds, int64_t time_series_compaction_empty_rowsets_threshold, int64_t time_series_compaction_level_threshold, - TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format) + TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format, + int64_t storage_page_size) : _tablet_uid(0, 0), _schema(new TabletSchema), _delete_bitmap(new DeleteBitmap(tablet_id)) { @@ -150,6 +152,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id time_series_compaction_empty_rowsets_threshold); tablet_meta_pb.set_time_series_compaction_level_threshold( time_series_compaction_level_threshold); + tablet_meta_pb.set_storage_page_size(storage_page_size); TabletSchemaPB* schema = tablet_meta_pb.mutable_schema(); schema->set_num_short_key_columns(tablet_schema.short_key_column_count); schema->set_num_rows_per_row_block(config::default_num_rows_per_column_file_block); @@ -377,7 +380,8 @@ TabletMeta::TabletMeta(const TabletMeta& b) b._time_series_compaction_time_threshold_seconds), _time_series_compaction_empty_rowsets_threshold( b._time_series_compaction_empty_rowsets_threshold), - _time_series_compaction_level_threshold(b._time_series_compaction_level_threshold) {}; + _time_series_compaction_level_threshold(b._time_series_compaction_level_threshold), + _storage_page_size(b._storage_page_size) {}; void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, ColumnPB* column) { @@ -685,6 +689,7 @@ void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { tablet_meta_pb.time_series_compaction_empty_rowsets_threshold(); _time_series_compaction_level_threshold = tablet_meta_pb.time_series_compaction_level_threshold(); + _storage_page_size = tablet_meta_pb.storage_page_size(); } void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { @@ -776,6 +781,7 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { time_series_compaction_empty_rowsets_threshold()); tablet_meta_pb->set_time_series_compaction_level_threshold( time_series_compaction_level_threshold()); + tablet_meta_pb->set_storage_page_size(storage_page_size()); } int64_t TabletMeta::mem_size() const { @@ -983,6 +989,7 @@ bool operator==(const TabletMeta& a, const TabletMeta& b) { return false; if (a._time_series_compaction_level_threshold != b._time_series_compaction_level_threshold) return false; + if (a._storage_page_size != b._storage_page_size) return false; return true; } @@ -1189,6 +1196,9 @@ void DeleteBitmap::add_to_remove_queue( } void DeleteBitmap::remove_stale_delete_bitmap_from_queue(const std::vector& vector) { + if (!config::enable_delete_bitmap_merge_on_compaction) { + return; + } std::shared_lock l(stale_delete_bitmap_lock); // std::vector> to_delete; diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 3c87fecb83cbd74..0d9645e01901e53 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -114,11 +114,17 @@ class TabletMeta : public MetadataAdder { int64_t time_series_compaction_empty_rowsets_threshold = 5, int64_t time_series_compaction_level_threshold = 1, TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format = - TInvertedIndexFileStorageFormat::V2); + TInvertedIndexFileStorageFormat::V2, + int64_t storage_page_size = 65536); // If need add a filed in TableMeta, filed init copy in copy construct function TabletMeta(const TabletMeta& tablet_meta); TabletMeta(TabletMeta&& tablet_meta) = delete; +// UT +#ifdef BE_TEST + TabletMeta(TabletSchemaSPtr tablet_schema) : _schema(tablet_schema) {} +#endif + // Function create_from_file is used to be compatible with previous tablet_meta. // Previous tablet_meta is a physical file in tablet dir, which is not stored in rocksdb. Status create_from_file(const std::string& file_path); @@ -288,6 +294,11 @@ class TabletMeta : public MetadataAdder { int64_t avg_rs_meta_serialize_size() const { return _avg_rs_meta_serialize_size; } + void set_storage_page_size(int64_t storage_page_size) { + _storage_page_size = storage_page_size; + } + int64_t storage_page_size() const { return _storage_page_size; } + private: Status _save_meta(DataDir* data_dir); @@ -348,6 +359,8 @@ class TabletMeta : public MetadataAdder { // cloud int64_t _ttl_seconds = 0; + int64_t _storage_page_size = segment_v2::STORAGE_PAGE_SIZE_DEFAULT_VALUE; + mutable std::shared_mutex _meta_lock; }; @@ -637,7 +650,7 @@ inline size_t TabletMeta::num_rows() const { inline size_t TabletMeta::tablet_footprint() const { size_t total_size = 0; for (auto& rs : _rs_metas) { - total_size += rs->data_disk_size(); + total_size += rs->total_disk_size(); } return total_size; } @@ -646,7 +659,7 @@ inline size_t TabletMeta::tablet_local_size() const { size_t total_size = 0; for (auto& rs : _rs_metas) { if (rs->is_local()) { - total_size += rs->data_disk_size(); + total_size += rs->total_disk_size(); } } return total_size; @@ -656,7 +669,7 @@ inline size_t TabletMeta::tablet_remote_size() const { size_t total_size = 0; for (auto& rs : _rs_metas) { if (!rs->is_local()) { - total_size += rs->data_disk_size(); + total_size += rs->total_disk_size(); } } return total_size; diff --git a/be/src/olap/tablet_reader.h b/be/src/olap/tablet_reader.h index 87af3bb08eb36e1..dd9d39d9decee02 100644 --- a/be/src/olap/tablet_reader.h +++ b/be/src/olap/tablet_reader.h @@ -167,7 +167,7 @@ class TabletReader { // used for compaction to record row ids bool record_rowids = false; - RowIdConversion* rowid_conversion; + RowIdConversion* rowid_conversion = nullptr; std::vector topn_filter_source_node_ids; int topn_filter_target_node_id = -1; // used for special optimization for query : ORDER BY key LIMIT n diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index c88a23a0c360cf9..36610f909748f22 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -38,8 +38,10 @@ #include "exec/tablet_info.h" #include "olap/inverted_index_parser.h" #include "olap/olap_define.h" +#include "olap/tablet_column_object_pool.h" #include "olap/types.h" #include "olap/utils.h" +#include "runtime/memory/lru_cache_policy.h" #include "runtime/thread_context.h" #include "tablet_meta.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" @@ -747,7 +749,15 @@ void TabletIndex::init_from_thrift(const TOlapTableIndex& index, if (column_idx >= 0) { col_unique_ids[i] = tablet_schema.column(column_idx).unique_id(); } else { - col_unique_ids[i] = -1; + // if column unique id not found by column name, find by column unique id + // column unique id can not bigger than tablet schema column size, if bigger than column size means + // this column is a new column added by light schema change + if (index.__isset.column_unique_ids && + index.column_unique_ids[i] < tablet_schema.num_columns()) { + col_unique_ids[i] = index.column_unique_ids[i]; + } else { + col_unique_ids[i] = -1; + } } } _col_unique_ids = std::move(col_unique_ids); @@ -845,7 +855,9 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const { TabletSchema::TabletSchema() = default; -TabletSchema::~TabletSchema() = default; +TabletSchema::~TabletSchema() { + clear_column_cache_handlers(); +} int64_t TabletSchema::get_metadata_size() const { return sizeof(TabletSchema) + _vl_field_mem_size; @@ -894,14 +906,13 @@ void TabletColumn::append_sparse_column(TabletColumn column) { _num_sparse_columns++; } -void TabletSchema::append_index(TabletIndex index) { +void TabletSchema::append_index(TabletIndex&& index) { _indexes.push_back(std::move(index)); } void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) { int32_t col_unique_id = col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; + const std::string& suffix_path = escape_for_path_name(col.suffix_path()); for (size_t i = 0; i < _indexes.size(); i++) { for (int32_t id : _indexes[i].col_unique_ids()) { if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { @@ -940,9 +951,18 @@ void TabletSchema::clear_columns() { _num_null_columns = 0; _num_key_columns = 0; _cols.clear(); + clear_column_cache_handlers(); } -void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns) { +void TabletSchema::clear_column_cache_handlers() { + for (auto* cache_handle : _column_cache_handlers) { + TabletColumnObjectPool::instance()->release(cache_handle); + } + _column_cache_handlers.clear(); +} + +void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns, + bool reuse_cache_column) { _keys_type = schema.keys_type(); _num_columns = 0; _num_variant_columns = 0; @@ -953,25 +973,34 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _field_name_to_index.clear(); _field_id_to_index.clear(); _cluster_key_idxes.clear(); + clear_column_cache_handlers(); for (const auto& i : schema.cluster_key_idxes()) { _cluster_key_idxes.push_back(i); } for (auto& column_pb : schema.column()) { - TabletColumn column; - column.init_from_pb(column_pb); - if (ignore_extracted_columns && column.is_extracted_column()) { + TabletColumnPtr column; + if (reuse_cache_column) { + auto pair = TabletColumnObjectPool::instance()->insert( + deterministic_string_serialize(column_pb)); + column = pair.second; + _column_cache_handlers.push_back(pair.first); + } else { + column = std::make_shared(); + column->init_from_pb(column_pb); + } + if (ignore_extracted_columns && column->is_extracted_column()) { continue; } - if (column.is_key()) { + if (column->is_key()) { _num_key_columns++; } - if (column.is_nullable()) { + if (column->is_nullable()) { _num_null_columns++; } - if (column.is_variant_type()) { + if (column->is_variant_type()) { ++_num_variant_columns; } - _cols.emplace_back(std::make_shared(std::move(column))); + _cols.emplace_back(std::move(column)); _vl_field_mem_size += sizeof(StringRef) + sizeof(char) * _cols.back()->name().size() + sizeof(size_t); _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); @@ -1090,6 +1119,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _version_col_idx = -1; _skip_bitmap_col_idx = -1; _cluster_key_idxes.clear(); + clear_column_cache_handlers(); for (const auto& i : ori_tablet_schema._cluster_key_idxes) { _cluster_key_idxes.push_back(i); } @@ -1329,28 +1359,6 @@ Result TabletSchema::column(const std::string& field_name) return _cols[it->second].get(); } -std::vector TabletSchema::get_indexes_for_column( - const TabletColumn& col) const { - std::vector indexes_for_column; - // Some columns (Float, Double, JSONB ...) from the variant do not support index, but they are listed in TabltetIndex. - if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { - return indexes_for_column; - } - int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; - // TODO use more efficient impl - for (size_t i = 0; i < _indexes.size(); i++) { - for (int32_t id : _indexes[i].col_unique_ids()) { - if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { - indexes_for_column.push_back(&(_indexes[i])); - } - } - } - - return indexes_for_column; -} - void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema, const std::vector& t_columns) { copy_from(tablet_schema); @@ -1362,49 +1370,17 @@ void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema, } } -bool TabletSchema::has_inverted_index(const TabletColumn& col) const { - // TODO use more efficient impl - int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; - for (size_t i = 0; i < _indexes.size(); i++) { - if (_indexes[i].index_type() == IndexType::INVERTED) { - for (int32_t id : _indexes[i].col_unique_ids()) { - if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { - return true; - } - } - } - } - - return false; -} - -bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id, - const std::string& suffix_name) const { +bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const { for (size_t i = 0; i < _indexes.size(); i++) { - if (_indexes[i].index_type() == IndexType::INVERTED && - _indexes[i].get_index_suffix() == suffix_name && _indexes[i].index_id() == index_id) { + if (_indexes[i].index_type() == IndexType::INVERTED && _indexes[i].index_id() == index_id) { return true; } } return false; } -const TabletIndex* TabletSchema::get_inverted_index_with_index_id( - int64_t index_id, const std::string& suffix_name) const { - for (size_t i = 0; i < _indexes.size(); i++) { - if (_indexes[i].index_type() == IndexType::INVERTED && - _indexes[i].get_index_suffix() == suffix_name && _indexes[i].index_id() == index_id) { - return &(_indexes[i]); - } - } - - return nullptr; -} - -const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id, - const std::string& suffix_path) const { +const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id, + const std::string& suffix_path) const { for (size_t i = 0; i < _indexes.size(); i++) { if (_indexes[i].index_type() == IndexType::INVERTED) { for (int32_t id : _indexes[i].col_unique_ids()) { @@ -1418,19 +1394,15 @@ const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id, return nullptr; } -const TabletIndex* TabletSchema::get_inverted_index(const TabletColumn& col, - bool check_valid) const { - // With check_valid set to true by default +const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const { // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index - if (check_valid && !segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { + if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { return nullptr; } // TODO use more efficient impl // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; - return get_inverted_index(col_unique_id, suffix_path); + return inverted_index(col_unique_id, escape_for_path_name(col.suffix_path())); } bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { @@ -1565,13 +1537,4 @@ bool operator!=(const TabletSchema& a, const TabletSchema& b) { return !(a == b); } -std::string TabletSchema::deterministic_string_serialize(const TabletSchemaPB& schema_pb) { - std::string output; - google::protobuf::io::StringOutputStream string_output_stream(&output); - google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); - output_stream.SetSerializationDeterministic(true); - schema_pb.SerializeToCodedStream(&output_stream); - return output; -} - } // namespace doris diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index ebe2c63c7f30d20..2ca75fad356ee19 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -40,6 +40,7 @@ #include "olap/rowset/segment_v2/options.h" #include "runtime/define_primitive_type.h" #include "runtime/descriptors.h" +#include "runtime/memory/lru_cache_policy.h" #include "util/string_util.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/common/string_ref.h" @@ -164,6 +165,9 @@ class TabletColumn : public MetadataAdder { bool is_extracted_column() const { return _column_path != nullptr && !_column_path->empty() && _parent_col_unique_id > 0; }; + std::string suffix_path() const { + return is_extracted_column() ? _column_path->get_path() : ""; + } bool is_nested_subcolumn() const { return _column_path != nullptr && _column_path->has_nested_part(); } @@ -224,13 +228,16 @@ class TabletColumn : public MetadataAdder { bool _has_bitmap_index = false; bool _visible = true; - int32_t _parent_col_unique_id = -1; + std::vector _sub_columns; uint32_t _sub_column_count = 0; bool _result_is_nullable = false; int _be_exec_version = -1; - vectorized::PathInDataPtr _column_path; + + // The extracted sub-columns from "variant" contain the following information: + int32_t _parent_col_unique_id = -1; // "variant" -> col_unique_id + vectorized::PathInDataPtr _column_path; // the path of the sub-columns themselves // Record information about columns merged into a sparse column within a variant // `{"id": 100, "name" : "jack", "point" : 3.9}` @@ -298,13 +305,25 @@ class TabletSchema : public MetadataAdder { TabletSchema(); virtual ~TabletSchema(); - void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false); + // Init from pb + // ignore_extracted_columns: ignore the extracted columns from variant column + // reuse_cached_column: reuse the cached column in the schema if they are the same, to reduce memory usage + void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false, + bool reuse_cached_column = false); // Notice: Use deterministic way to serialize protobuf, // since serialize Map in protobuf may could lead to un-deterministic by default - static std::string deterministic_string_serialize(const TabletSchemaPB& schema_pb); + template + static std::string deterministic_string_serialize(const PbType& pb) { + std::string output; + google::protobuf::io::StringOutputStream string_output_stream(&output); + google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); + output_stream.SetSerializationDeterministic(true); + pb.SerializeToCodedStream(&output_stream); + return output; + } void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const; void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL); - void append_index(TabletIndex index); + void append_index(TabletIndex&& index); void update_index(const TabletColumn& column, TabletIndex index); void remove_index(int64_t index_id); void clear_index(); @@ -376,7 +395,15 @@ class TabletSchema : public MetadataAdder { void set_row_store_page_size(long page_size) { _row_store_page_size = page_size; } long row_store_page_size() const { return _row_store_page_size; } - const std::vector& indexes() const { return _indexes; } + const std::vector inverted_indexes() const { + std::vector inverted_indexes; + for (const auto& index : _indexes) { + if (index.index_type() == IndexType::INVERTED) { + inverted_indexes.emplace_back(&index); + } + } + return inverted_indexes; + } bool has_inverted_index() const { for (const auto& index : _indexes) { if (index.index_type() == IndexType::INVERTED) { @@ -385,17 +412,15 @@ class TabletSchema : public MetadataAdder { } return false; } - std::vector get_indexes_for_column(const TabletColumn& col) const; - bool has_inverted_index(const TabletColumn& col) const; - bool has_inverted_index_with_index_id(int64_t index_id, const std::string& suffix_path) const; - const TabletIndex* get_inverted_index_with_index_id(int64_t index_id, - const std::string& suffix_name) const; - // check_valid: check if this column supports inverted index + bool has_inverted_index_with_index_id(int64_t index_id) const; + // Check whether this column supports inverted index // Some columns (Float, Double, JSONB ...) from the variant do not support index, but they are listed in TabletIndex. - // If returned, the index file will not be found. - const TabletIndex* get_inverted_index(const TabletColumn& col, bool check_valid = true) const; - const TabletIndex* get_inverted_index(int32_t col_unique_id, - const std::string& suffix_path) const; + const TabletIndex* inverted_index(const TabletColumn& col) const; + + // Regardless of whether this column supports inverted index + // TabletIndex information will be returned as long as it exists. + const TabletIndex* inverted_index(int32_t col_unique_id, + const std::string& suffix_path = "") const; bool has_ngram_bf_index(int32_t col_unique_id) const; const TabletIndex* get_ngram_bf_index(int32_t col_unique_id) const; void update_indexes_from_thrift(const std::vector& indexes); @@ -507,10 +532,13 @@ class TabletSchema : public MetadataAdder { friend bool operator==(const TabletSchema& a, const TabletSchema& b); friend bool operator!=(const TabletSchema& a, const TabletSchema& b); + void clear_column_cache_handlers(); + KeysType _keys_type = DUP_KEYS; SortType _sort_type = SortType::LEXICAL; size_t _sort_col_num = 0; std::vector _cols; + std::vector _column_cache_handlers; std::vector _indexes; std::unordered_map _field_name_to_index; diff --git a/be/src/olap/tablet_schema_cache.cpp b/be/src/olap/tablet_schema_cache.cpp index e339c947bb97a4e..fd238fa5affb3f9 100644 --- a/be/src/olap/tablet_schema_cache.cpp +++ b/be/src/olap/tablet_schema_cache.cpp @@ -18,30 +18,45 @@ #include "olap/tablet_schema_cache.h" #include +#include +#include #include "bvar/bvar.h" #include "olap/tablet_schema.h" +#include "util/sha.h" bvar::Adder g_tablet_schema_cache_count("tablet_schema_cache_count"); bvar::Adder g_tablet_schema_cache_columns_count("tablet_schema_cache_columns_count"); +bvar::Adder g_tablet_schema_cache_hit_count("tablet_schema_cache_hit_count"); namespace doris { +// to reduce the memory consumption of the serialized TabletSchema as key. +// use sha256 to prevent from hash collision +static std::string get_key_signature(const std::string& origin) { + SHA256Digest digest; + digest.reset(origin.data(), origin.length()); + return std::string {digest.digest().data(), digest.digest().length()}; +} + std::pair TabletSchemaCache::insert(const std::string& key) { - auto* lru_handle = lookup(key); + std::string key_signature = get_key_signature(key); + auto* lru_handle = lookup(key_signature); TabletSchemaSPtr tablet_schema_ptr; if (lru_handle) { auto* value = (CacheValue*)LRUCachePolicy::value(lru_handle); tablet_schema_ptr = value->tablet_schema; + g_tablet_schema_cache_hit_count << 1; } else { auto* value = new CacheValue; tablet_schema_ptr = std::make_shared(); TabletSchemaPB pb; pb.ParseFromString(key); - tablet_schema_ptr->init_from_pb(pb); + // We should reuse the memory of the same TabletColumn object, set reuse_cached_column to true + tablet_schema_ptr->init_from_pb(pb, false, true); value->tablet_schema = tablet_schema_ptr; - lru_handle = LRUCachePolicy::insert(key, value, tablet_schema_ptr->num_columns(), 0, - CachePriority::NORMAL); + lru_handle = LRUCachePolicy::insert(key_signature, value, tablet_schema_ptr->num_columns(), + 0, CachePriority::NORMAL); g_tablet_schema_cache_count << 1; g_tablet_schema_cache_columns_count << tablet_schema_ptr->num_columns(); } diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index d0c4b0e45f468ef..05ecfc0401b6d04 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -93,7 +93,7 @@ Status EngineChecksumTask::_compute_checksum() { } size_t input_size = 0; for (const auto& rowset : input_rowsets) { - input_size += rowset->data_disk_size(); + input_size += rowset->total_disk_size(); } auto res = reader.init(reader_params); diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 21be34a334dd8df..a300e6e0f09fa3d 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -407,11 +407,8 @@ Status EngineStorageMigrationTask::_copy_index_and_data_files( if (tablet_schema.get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : tablet_schema.indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : tablet_schema.inverted_indexes()) { + auto index_id = index->index_id(); auto index_file = _tablet->get_segment_index_filepath(rowset_id, segment_index, index_id); auto snapshot_segment_index_file_path = diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 38a52d1d2118aa6..d7c450d7e44156a 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -68,8 +68,11 @@ Status IndexBuilder::update_inverted_index_info() { _output_rowsets.reserve(_input_rowsets.size()); _pending_rs_guards.reserve(_input_rowsets.size()); for (auto&& input_rowset : _input_rowsets) { - if (!input_rowset->is_local()) [[unlikely]] { - DCHECK(false) << _tablet->tablet_id() << ' ' << input_rowset->rowset_id(); + bool is_local_rowset = input_rowset->is_local(); + DBUG_EXECUTE_IF("IndexBuilder::update_inverted_index_info_is_local_rowset", + { is_local_rowset = false; }) + if (!is_local_rowset) [[unlikely]] { + // DCHECK(false) << _tablet->tablet_id() << ' ' << input_rowset->rowset_id(); return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", _tablet->tablet_id(), input_rowset->rowset_id().to_string()); @@ -81,6 +84,9 @@ Status IndexBuilder::update_inverted_index_info() { size_t total_index_size = 0; auto* beta_rowset = reinterpret_cast(input_rowset.get()); auto size_st = beta_rowset->get_inverted_index_size(&total_index_size); + DBUG_EXECUTE_IF("IndexBuilder::update_inverted_index_info_size_st_not_ok", { + size_st = Status::Error("debug point: get fs failed"); + }) if (!size_st.ok() && !size_st.is() && !size_st.is()) { return size_st; @@ -94,13 +100,19 @@ Status IndexBuilder::update_inverted_index_info() { auto column_name = t_inverted_index.columns[0]; auto column_idx = output_rs_tablet_schema->field_index(column_name); if (column_idx < 0) { - LOG(WARNING) << "referenced column was missing. " - << "[column=" << column_name << " referenced_column=" << column_idx - << "]"; - continue; + if (!t_inverted_index.column_unique_ids.empty()) { + auto column_unique_id = t_inverted_index.column_unique_ids[0]; + column_idx = output_rs_tablet_schema->field_index(column_unique_id); + } + if (column_idx < 0) { + LOG(WARNING) << "referenced column was missing. " + << "[column=" << column_name + << " referenced_column=" << column_idx << "]"; + continue; + } } auto column = output_rs_tablet_schema->column(column_idx); - const auto* index_meta = output_rs_tablet_schema->get_inverted_index(column); + const auto* index_meta = output_rs_tablet_schema->inverted_index(column); if (index_meta == nullptr) { LOG(ERROR) << "failed to find column: " << column_name << " index_id: " << t_inverted_index.index_id; @@ -136,12 +148,7 @@ Status IndexBuilder::update_inverted_index_info() { return Status::Error( "indexes count cannot be negative"); } - int32_t indexes_size = 0; - for (auto index : output_rs_tablet_schema->indexes()) { - if (index.index_type() == IndexType::INVERTED) { - indexes_size++; - } - } + int32_t indexes_size = output_rs_tablet_schema->inverted_indexes().size(); if (indexes_count != indexes_size) { return Status::Error( "indexes count not equal to expected"); @@ -159,11 +166,11 @@ Status IndexBuilder::update_inverted_index_info() { LOG(WARNING) << "referenced column was missing. " << "[column=" << t_inverted_index.columns[0] << " referenced_column=" << column_uid << "]"; - output_rs_tablet_schema->append_index(index); + output_rs_tablet_schema->append_index(std::move(index)); continue; } const TabletColumn& col = output_rs_tablet_schema->column_by_uid(column_uid); - const TabletIndex* exist_index = output_rs_tablet_schema->get_inverted_index(col); + const TabletIndex* exist_index = output_rs_tablet_schema->inverted_index(col); if (exist_index && exist_index->index_id() != index.index_id()) { LOG(WARNING) << fmt::format( "column: {} has a exist inverted index, but the index id not equal " @@ -173,7 +180,7 @@ Status IndexBuilder::update_inverted_index_info() { without_index_uids.insert(exist_index->index_id()); output_rs_tablet_schema->remove_index(exist_index->index_id()); } - output_rs_tablet_schema->append_index(index); + output_rs_tablet_schema->append_index(std::move(index)); } } // construct input rowset reader @@ -207,13 +214,12 @@ Status IndexBuilder::update_inverted_index_info() { InvertedIndexStorageFormatPB::V1) { if (_is_drop_op) { VLOG_DEBUG << "data_disk_size:" << input_rowset_meta->data_disk_size() - << " total_disk_size:" << input_rowset_meta->data_disk_size() + << " total_disk_size:" << input_rowset_meta->total_disk_size() << " index_disk_size:" << input_rowset_meta->index_disk_size() << " drop_index_size:" << drop_index_size; rowset_meta->set_total_disk_size(input_rowset_meta->total_disk_size() - drop_index_size); - rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size() - - drop_index_size); + rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size()); rowset_meta->set_index_disk_size(input_rowset_meta->index_disk_size() - drop_index_size); } else { @@ -229,6 +235,11 @@ Status IndexBuilder::update_inverted_index_info() { std::string {InvertedIndexDescriptor::get_index_file_path_prefix(seg_path)}, output_rs_tablet_schema->get_inverted_index_storage_format()); auto st = idx_file_reader->init(); + DBUG_EXECUTE_IF( + "IndexBuilder::update_inverted_index_info_index_file_reader_init_not_ok", { + st = Status::Error( + "debug point: reader init error"); + }) if (!st.ok() && !st.is()) { return st; } @@ -238,7 +249,7 @@ Status IndexBuilder::update_inverted_index_info() { } rowset_meta->set_total_disk_size(input_rowset_meta->total_disk_size() - total_index_size); - rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size() - total_index_size); + rowset_meta->set_data_disk_size(input_rowset_meta->data_disk_size()); rowset_meta->set_index_disk_size(input_rowset_meta->index_disk_size() - total_index_size); } @@ -262,8 +273,11 @@ Status IndexBuilder::update_inverted_index_info() { Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, std::vector& segments) { - if (!output_rowset_meta->is_local()) [[unlikely]] { - DCHECK(false) << _tablet->tablet_id() << ' ' << output_rowset_meta->rowset_id(); + bool is_local_rowset = output_rowset_meta->is_local(); + DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_is_local_rowset", + { is_local_rowset = false; }) + if (!is_local_rowset) [[unlikely]] { + // DCHECK(false) << _tablet->tablet_id() << ' ' << output_rowset_meta->rowset_id(); return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", _tablet->tablet_id(), output_rowset_meta->rowset_id().to_string()); @@ -280,6 +294,8 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta for (auto& seg_ptr : segments) { auto idx_file_reader_iter = _inverted_index_file_readers.find( std::make_pair(output_rowset_meta->rowset_id().to_string(), seg_ptr->id())); + DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_can_not_find_reader_drop_op", + { idx_file_reader_iter = _inverted_index_file_readers.end(); }) if (idx_file_reader_iter == _inverted_index_file_readers.end()) { LOG(ERROR) << "idx_file_reader_iter" << output_rowset_meta->rowset_id() << ":" << seg_ptr->id() << " cannot be found"; @@ -292,10 +308,20 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta _tablet->tablet_path(), output_rowset_meta->rowset_id().to_string(), seg_ptr->id()))}; + std::string index_path = + InvertedIndexDescriptor::get_index_file_path_v2(index_path_prefix); + io::FileWriterPtr file_writer; + Status st = fs->create_file(index_path, &file_writer); + if (!st.ok()) { + LOG(WARNING) << "failed to create writable file. path=" << index_path + << ", err: " << st; + return st; + } auto inverted_index_file_writer = std::make_unique( fs, std::move(index_path_prefix), output_rowset_meta->rowset_id().to_string(), seg_ptr->id(), - output_rowset_schema->get_inverted_index_storage_format()); + output_rowset_schema->get_inverted_index_storage_format(), + std::move(file_writer)); RETURN_IF_ERROR(inverted_index_file_writer->initialize(dirs)); // create inverted index writer for (auto& index_meta : _dropped_inverted_indexes) { @@ -313,8 +339,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta inverted_index_size += inverted_index_writer->get_index_file_total_size(); } _inverted_index_file_writers.clear(); - output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size() + - inverted_index_size); + output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size()); output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() + inverted_index_size); output_rowset_meta->set_index_disk_size(output_rowset_meta->index_disk_size() + @@ -341,15 +366,27 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta InvertedIndexStorageFormatPB::V2) { auto idx_file_reader_iter = _inverted_index_file_readers.find( std::make_pair(output_rowset_meta->rowset_id().to_string(), seg_ptr->id())); + DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_can_not_find_reader", + { idx_file_reader_iter = _inverted_index_file_readers.end(); }) if (idx_file_reader_iter == _inverted_index_file_readers.end()) { LOG(ERROR) << "idx_file_reader_iter" << output_rowset_meta->rowset_id() << ":" << seg_ptr->id() << " cannot be found"; continue; } + std::string index_path = + InvertedIndexDescriptor::get_index_file_path_v2(index_path_prefix); + io::FileWriterPtr file_writer; + Status st = fs->create_file(index_path, &file_writer); + if (!st.ok()) { + LOG(WARNING) << "failed to create writable file. path=" << index_path + << ", err: " << st; + return st; + } auto dirs = DORIS_TRY(idx_file_reader_iter->second->get_all_directories()); inverted_index_file_writer = std::make_unique( fs, index_path_prefix, output_rowset_meta->rowset_id().to_string(), - seg_ptr->id(), output_rowset_schema->get_inverted_index_storage_format()); + seg_ptr->id(), output_rowset_schema->get_inverted_index_storage_format(), + std::move(file_writer)); RETURN_IF_ERROR(inverted_index_file_writer->initialize(dirs)); } else { inverted_index_file_writer = std::make_unique( @@ -363,25 +400,42 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta auto column_name = inverted_index.columns[0]; auto column_idx = output_rowset_schema->field_index(column_name); if (column_idx < 0) { - LOG(WARNING) << "referenced column was missing. " - << "[column=" << column_name << " referenced_column=" << column_idx - << "]"; - continue; + if (!inverted_index.column_unique_ids.empty()) { + column_idx = output_rowset_schema->field_index( + inverted_index.column_unique_ids[0]); + } + if (column_idx < 0) { + LOG(WARNING) << "referenced column was missing. " + << "[column=" << column_name + << " referenced_column=" << column_idx << "]"; + continue; + } } auto column = output_rowset_schema->column(column_idx); - if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) { + // variant column is not support for building index + auto is_support_inverted_index = + InvertedIndexColumnWriter::check_support_inverted_index(column); + DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_support_inverted_index", + { is_support_inverted_index = false; }) + if (!is_support_inverted_index) { continue; } - DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id, "")); + DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id)); _olap_data_convertor->add_column_data_convertor(column); return_columns.emplace_back(column_idx); std::unique_ptr field(FieldFactory::create(column)); - const auto* index_meta = output_rowset_schema->get_inverted_index(column); + const auto* index_meta = output_rowset_schema->inverted_index(column); std::unique_ptr inverted_index_builder; try { RETURN_IF_ERROR(segment_v2::InvertedIndexColumnWriter::create( field.get(), &inverted_index_builder, inverted_index_file_writer.get(), index_meta)); + DBUG_EXECUTE_IF( + "IndexBuilder::handle_single_rowset_index_column_writer_create_error", { + _CLTHROWA(CL_ERR_IO, + "debug point: " + "handle_single_rowset_index_column_writer_create_error"); + }) } catch (const std::exception& e) { return Status::Error( "CLuceneError occured: {}", e.what()); @@ -412,6 +466,10 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta std::make_shared(output_rowset_schema->columns(), return_columns); std::unique_ptr iter; auto res = seg_ptr->new_iterator(schema, read_options, &iter); + DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_create_iterator_error", { + res = Status::Error( + "debug point: handle_single_rowset_create_iterator_error"); + }) if (!res.ok()) { LOG(WARNING) << "failed to create iterator[" << seg_ptr->id() << "]: " << res.to_string(); @@ -422,7 +480,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta output_rowset_schema->create_block(return_columns)); while (true) { auto status = iter->next_batch(block.get()); - DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset", { + DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_iterator_next_batch_error", { status = Status::Error( "next_batch fault injection"); }); @@ -437,8 +495,15 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta } // write inverted index data - if (_write_inverted_index_data(output_rowset_schema, iter->data_id(), - block.get()) != Status::OK()) { + status = _write_inverted_index_data(output_rowset_schema, iter->data_id(), + block.get()); + DBUG_EXECUTE_IF( + "IndexBuilder::handle_single_rowset_write_inverted_index_data_error", { + status = Status::Error( + "debug point: " + "handle_single_rowset_write_inverted_index_data_error"); + }) + if (!status.ok()) { return Status::Error( "failed to write block."); } @@ -451,6 +516,10 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta if (_inverted_index_builders[writer_sign]) { RETURN_IF_ERROR(_inverted_index_builders[writer_sign]->finish()); } + DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_index_build_finish_error", { + _CLTHROWA(CL_ERR_IO, + "debug point: handle_single_rowset_index_build_finish_error"); + }) } catch (const std::exception& e) { return Status::Error( "CLuceneError occured: {}", e.what()); @@ -461,6 +530,10 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta } for (auto&& [seg_id, inverted_index_file_writer] : _inverted_index_file_writers) { auto st = inverted_index_file_writer->close(); + DBUG_EXECUTE_IF("IndexBuilder::handle_single_rowset_file_writer_close_error", { + st = Status::Error( + "debug point: handle_single_rowset_file_writer_close_error"); + }) if (!st.ok()) { LOG(ERROR) << "close inverted_index_writer error:" << st; return st; @@ -469,8 +542,7 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta } _inverted_index_builders.clear(); _inverted_index_file_writers.clear(); - output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size() + - inverted_index_size); + output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size()); output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() + inverted_index_size); output_rowset_meta->set_index_disk_size(output_rowset_meta->index_disk_size() + @@ -491,15 +563,28 @@ Status IndexBuilder::_write_inverted_index_data(TabletSchemaSPtr tablet_schema, auto index_id = inverted_index.index_id; auto column_name = inverted_index.columns[0]; auto column_idx = tablet_schema->field_index(column_name); + DBUG_EXECUTE_IF("IndexBuilder::_write_inverted_index_data_column_idx_is_negative", + { column_idx = -1; }) if (column_idx < 0) { - LOG(WARNING) << "referenced column was missing. " - << "[column=" << column_name << " referenced_column=" << column_idx << "]"; - continue; + if (!inverted_index.column_unique_ids.empty()) { + auto column_unique_id = inverted_index.column_unique_ids[0]; + column_idx = tablet_schema->field_index(column_unique_id); + } + if (column_idx < 0) { + LOG(WARNING) << "referenced column was missing. " + << "[column=" << column_name << " referenced_column=" << column_idx + << "]"; + continue; + } } auto column = tablet_schema->column(column_idx); auto writer_sign = std::make_pair(segment_idx, index_id); std::unique_ptr field(FieldFactory::create(column)); auto converted_result = _olap_data_convertor->convert_column_data(i); + DBUG_EXECUTE_IF("IndexBuilder::_write_inverted_index_data_convert_column_data_error", { + converted_result.first = Status::Error( + "debug point: _write_inverted_index_data_convert_column_data_error"); + }) if (converted_result.first != Status::OK()) { LOG(WARNING) << "failed to convert block, errcode: " << converted_result.first; return converted_result.first; @@ -551,10 +636,20 @@ Status IndexBuilder::_add_nullable(const std::string& column_name, field->get_sub_field(0)->size(), reinterpret_cast(data), reinterpret_cast(nested_null_map), offsets_ptr, num_rows)); } + DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_add_array_values_error", { + _CLTHROWA(CL_ERR_IO, "debug point: _add_nullable_add_array_values_error"); + }) } catch (const std::exception& e) { return Status::Error( "CLuceneError occured: {}", e.what()); } + // we should refresh nullmap for array + for (int row_id = 0; row_id < num_rows; row_id++) { + if (null_map && null_map[row_id] == 1) { + RETURN_IF_ERROR( + _inverted_index_builders[index_writer_sign]->add_array_nulls(row_id)); + } + } return Status::OK(); } @@ -569,6 +664,8 @@ Status IndexBuilder::_add_nullable(const std::string& column_name, } *ptr += field->size() * step; offset += step; + DBUG_EXECUTE_IF("IndexBuilder::_add_nullable_throw_exception", + { _CLTHROWA(CL_ERR_IO, "debug point: _add_nullable_throw_exception"); }) } while (offset < num_rows); } catch (const std::exception& e) { return Status::Error("CLuceneError occured: {}", @@ -601,6 +698,8 @@ Status IndexBuilder::_add_data(const std::string& column_name, RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values( column_name, *ptr, num_rows)); } + DBUG_EXECUTE_IF("IndexBuilder::_add_data_throw_exception", + { _CLTHROWA(CL_ERR_IO, "debug point: _add_data_throw_exception"); }) } catch (const std::exception& e) { return Status::Error("CLuceneError occured: {}", e.what()); @@ -626,6 +725,8 @@ Status IndexBuilder::handle_inverted_index_data() { Status IndexBuilder::do_build_inverted_index() { LOG(INFO) << "begin to do_build_inverted_index, tablet=" << _tablet->tablet_id() << ", is_drop_op=" << _is_drop_op; + DBUG_EXECUTE_IF("IndexBuilder::do_build_inverted_index_alter_inverted_indexes_empty", + { _alter_inverted_indexes.clear(); }) if (_alter_inverted_indexes.empty()) { return Status::OK(); } @@ -692,6 +793,10 @@ Status IndexBuilder::do_build_inverted_index() { // modify rowsets in memory st = modify_rowsets(); + DBUG_EXECUTE_IF("IndexBuilder::do_build_inverted_index_modify_rowsets_status_error", { + st = Status::Error( + "debug point: do_build_inverted_index_modify_rowsets_status_error"); + }) if (!st.ok()) { LOG(WARNING) << "failed to modify rowsets in memory. " << "tablet=" << _tablet->tablet_id() << ", error=" << st; @@ -749,7 +854,10 @@ Status IndexBuilder::modify_rowsets(const Merger::Statistics* stats) { void IndexBuilder::gc_output_rowset() { for (auto&& output_rowset : _output_rowsets) { - if (!output_rowset->is_local()) { + auto is_local_rowset = output_rowset->is_local(); + DBUG_EXECUTE_IF("IndexBuilder::gc_output_rowset_is_local_rowset", + { is_local_rowset = false; }) + if (!is_local_rowset) { _tablet->record_unused_remote_rowset(output_rowset->rowset_id(), output_rowset->rowset_meta()->resource_id(), output_rowset->num_segments()); diff --git a/be/src/pipeline/common/agg_utils.h b/be/src/pipeline/common/agg_utils.h index a3cc175b1ed0a25..135bc67712345f9 100644 --- a/be/src/pipeline/common/agg_utils.h +++ b/be/src/pipeline/common/agg_utils.h @@ -80,23 +80,19 @@ using AggregatedMethodVariants = std::variant< vectorized::UInt256, AggDataNullable>>, vectorized::MethodSingleNullableColumn< vectorized::MethodStringNoCache>, - vectorized::MethodKeysFixed, false>, - vectorized::MethodKeysFixed, true>, - vectorized::MethodKeysFixed, false>, - vectorized::MethodKeysFixed, true>, - vectorized::MethodKeysFixed, false>, - vectorized::MethodKeysFixed, true>, - vectorized::MethodKeysFixed, false>, - vectorized::MethodKeysFixed, true>>; + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>>; struct AggregatedDataVariants : public DataVariants { + vectorized::MethodOneNumber, vectorized::DataWithNullKey> { AggregatedDataWithoutKey without_key = nullptr; - template void init(const std::vector& data_types, HashKeyType type) { + bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); + switch (type) { case HashKeyType::without_key: break; @@ -104,28 +100,28 @@ struct AggregatedDataVariants method_variant.emplace>(); break; case HashKeyType::int8_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int16_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int32_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int32_key_phase2: - emplace_single(); + emplace_single(nullable); break; case HashKeyType::int64_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int64_key_phase2: - emplace_single(); + emplace_single(nullable); break; case HashKeyType::int128_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int256_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::string_key: if (nullable) { @@ -138,24 +134,20 @@ struct AggregatedDataVariants } break; case HashKeyType::fixed64: - method_variant - .emplace, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant - .emplace, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant - .emplace, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant - .emplace, nullable>>( - get_key_sizes(data_types)); + method_variant.emplace>>( + get_key_sizes(data_types)); break; default: throw Exception(ErrorCode::INTERNAL_ERROR, diff --git a/be/src/pipeline/common/distinct_agg_utils.h b/be/src/pipeline/common/distinct_agg_utils.h index c7ecbd2142c7f0f..806039d5a36a4b4 100644 --- a/be/src/pipeline/common/distinct_agg_utils.h +++ b/be/src/pipeline/common/distinct_agg_utils.h @@ -72,48 +72,43 @@ using DistinctMethodVariants = std::variant< vectorized::DataWithNullKey>>>, vectorized::MethodSingleNullableColumn>>, - vectorized::MethodKeysFixed, false>, - vectorized::MethodKeysFixed, true>, - vectorized::MethodKeysFixed, false>, - vectorized::MethodKeysFixed, true>, - vectorized::MethodKeysFixed, false>, - vectorized::MethodKeysFixed, true>, - vectorized::MethodKeysFixed, false>, - vectorized::MethodKeysFixed, true>>; + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>>; struct DistinctDataVariants : public DataVariants { - template + vectorized::MethodOneNumber, vectorized::DataWithNullKey> { void init(const std::vector& data_types, HashKeyType type) { + bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); switch (type) { case HashKeyType::serialized: method_variant.emplace>(); break; case HashKeyType::int8_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int16_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int32_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int32_key_phase2: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int64_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int64_key_phase2: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int128_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::int256_key: - emplace_single, nullable>(); + emplace_single>(nullable); break; case HashKeyType::string_key: if (nullable) { @@ -126,23 +121,19 @@ struct DistinctDataVariants } break; case HashKeyType::fixed64: - method_variant.emplace< - vectorized::MethodKeysFixed, nullable>>( + method_variant.emplace>>( get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant.emplace< - vectorized::MethodKeysFixed, nullable>>( + method_variant.emplace>>( get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant.emplace< - vectorized::MethodKeysFixed, nullable>>( + method_variant.emplace>>( get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant.emplace< - vectorized::MethodKeysFixed, nullable>>( + method_variant.emplace>>( get_key_sizes(data_types)); break; default: diff --git a/be/src/pipeline/common/join_utils.h b/be/src/pipeline/common/join_utils.h index 5be3e4af2f374bd..e214d1a52931a90 100644 --- a/be/src/pipeline/common/join_utils.h +++ b/be/src/pipeline/common/join_utils.h @@ -36,43 +36,29 @@ using JoinOpVariants = std::integral_constant, std::integral_constant>; -using SerializedHashTableContext = vectorized::MethodSerialized>; -using I8HashTableContext = vectorized::PrimaryTypeHashTableContext; -using I16HashTableContext = vectorized::PrimaryTypeHashTableContext; -using I32HashTableContext = vectorized::PrimaryTypeHashTableContext; -using I64HashTableContext = vectorized::PrimaryTypeHashTableContext; -using I128HashTableContext = vectorized::PrimaryTypeHashTableContext; -using I256HashTableContext = vectorized::PrimaryTypeHashTableContext; -using MethodOneString = vectorized::MethodStringNoCache>; -template -using I64FixedKeyHashTableContext = - vectorized::FixedKeyHashTableContext; - -template -using I128FixedKeyHashTableContext = - vectorized::FixedKeyHashTableContext; +template +using PrimaryTypeHashTableContext = vectorized::MethodOneNumber>>; -template -using I256FixedKeyHashTableContext = - vectorized::FixedKeyHashTableContext; +template +using FixedKeyHashTableContext = vectorized::MethodKeysFixed>>; -template -using I136FixedKeyHashTableContext = - vectorized::FixedKeyHashTableContext; +using SerializedHashTableContext = vectorized::MethodSerialized>; +using MethodOneString = vectorized::MethodStringNoCache>; -using HashTableVariants = - std::variant, - I64FixedKeyHashTableContext, I128FixedKeyHashTableContext, - I128FixedKeyHashTableContext, I256FixedKeyHashTableContext, - I256FixedKeyHashTableContext, I136FixedKeyHashTableContext, - I136FixedKeyHashTableContext, MethodOneString>; +using HashTableVariants = std::variant< + std::monostate, SerializedHashTableContext, PrimaryTypeHashTableContext, + PrimaryTypeHashTableContext, + PrimaryTypeHashTableContext, + PrimaryTypeHashTableContext, + PrimaryTypeHashTableContext, + PrimaryTypeHashTableContext, + FixedKeyHashTableContext, FixedKeyHashTableContext, + FixedKeyHashTableContext, + FixedKeyHashTableContext, MethodOneString>; struct JoinDataVariants { HashTableVariants method_variant; - template void init(const std::vector& data_types, HashKeyType type) { // todo: support single column nullable context switch (type) { @@ -80,69 +66,40 @@ struct JoinDataVariants { method_variant.emplace(); break; case HashKeyType::int8_key: - if (nullable) { - method_variant.emplace>( - get_key_sizes(data_types)); - } else { - method_variant.emplace(); - } + method_variant.emplace>(); break; case HashKeyType::int16_key: - if (nullable) { - method_variant.emplace>( - get_key_sizes(data_types)); - } else { - method_variant.emplace(); - } + method_variant.emplace>(); break; case HashKeyType::int32_key: - if (nullable) { - method_variant.emplace>( - get_key_sizes(data_types)); - } else { - method_variant.emplace(); - } + method_variant.emplace>(); break; case HashKeyType::int64_key: - if (nullable) { - method_variant.emplace>( - get_key_sizes(data_types)); - } else { - method_variant.emplace(); - } + method_variant.emplace>(); break; case HashKeyType::int128_key: - if (nullable) { - method_variant.emplace>( - get_key_sizes(data_types)); - } else { - method_variant.emplace(); - } + method_variant.emplace>(); break; case HashKeyType::int256_key: - if (nullable) { - method_variant.emplace(); - } else { - method_variant.emplace(); - } + method_variant.emplace>(); break; case HashKeyType::string_key: method_variant.emplace(); break; case HashKeyType::fixed64: - method_variant.emplace>( + method_variant.emplace>( get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant.emplace>( + method_variant.emplace>( get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant.emplace>( + method_variant.emplace>( get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant.emplace>( + method_variant.emplace>( get_key_sizes(data_types)); break; default: diff --git a/be/src/pipeline/common/partition_sort_utils.h b/be/src/pipeline/common/partition_sort_utils.h index 38bc8744dc14623..9317a783ba68bfe 100644 --- a/be/src/pipeline/common/partition_sort_utils.h +++ b/be/src/pipeline/common/partition_sort_utils.h @@ -123,57 +123,41 @@ struct PartitionBlocks { using PartitionDataPtr = PartitionBlocks*; using PartitionDataWithStringKey = PHHashMap; using PartitionDataWithShortStringKey = StringHashMap; -using PartitionDataWithUInt8Key = PHHashMap; -using PartitionDataWithUInt16Key = PHHashMap; -using PartitionDataWithUInt32Key = - PHHashMap>; -using PartitionDataWithUInt64Key = - PHHashMap>; -using PartitionDataWithUInt128Key = - PHHashMap>; -using PartitionDataWithUInt256Key = - PHHashMap>; -using PartitionDataWithUInt136Key = - PHHashMap>; + +template +using PartitionData = PHHashMap>; + +template +using PartitionDataSingle = vectorized::MethodOneNumber>; + +template +using PartitionDataSingleNullable = vectorized::MethodSingleNullableColumn< + vectorized::MethodOneNumber>>>; using PartitionedMethodVariants = std::variant< std::monostate, vectorized::MethodSerialized, - vectorized::MethodOneNumber, - vectorized::MethodOneNumber, - vectorized::MethodOneNumber, - vectorized::MethodOneNumber, - vectorized::MethodOneNumber, - vectorized::MethodOneNumber, - vectorized::MethodSingleNullableColumn>>, - vectorized::MethodSingleNullableColumn>>, - vectorized::MethodSingleNullableColumn>>, - vectorized::MethodSingleNullableColumn>>, - vectorized::MethodSingleNullableColumn>>, - vectorized::MethodSingleNullableColumn>>, - vectorized::MethodKeysFixed, - vectorized::MethodKeysFixed, - vectorized::MethodKeysFixed, - vectorized::MethodKeysFixed, - vectorized::MethodKeysFixed, - vectorized::MethodKeysFixed, - vectorized::MethodKeysFixed, - vectorized::MethodKeysFixed, + PartitionDataSingle, PartitionDataSingle, + PartitionDataSingle, PartitionDataSingle, + PartitionDataSingle, PartitionDataSingle, + PartitionDataSingleNullable, + PartitionDataSingleNullable, + PartitionDataSingleNullable, + PartitionDataSingleNullable, + PartitionDataSingleNullable, + PartitionDataSingleNullable, + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>, + vectorized::MethodKeysFixed>, vectorized::MethodStringNoCache, vectorized::MethodSingleNullableColumn>>>; struct PartitionedHashMapVariants : public DataVariants { - template + vectorized::MethodOneNumber, vectorized::DataWithNullKey> { void init(const std::vector& data_types, HashKeyType type) { + bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); switch (type) { case HashKeyType::without_key: { break; @@ -183,27 +167,27 @@ struct PartitionedHashMapVariants break; } case HashKeyType::int8_key: { - emplace_single(); + emplace_single>(nullable); break; } case HashKeyType::int16_key: { - emplace_single(); + emplace_single>(nullable); break; } case HashKeyType::int32_key: { - emplace_single(); + emplace_single>(nullable); break; } case HashKeyType::int64_key: { - emplace_single(); + emplace_single>(nullable); break; } case HashKeyType::int128_key: { - emplace_single(); + emplace_single>(nullable); break; } case HashKeyType::int256_key: { - emplace_single(); + emplace_single>(nullable); break; } case HashKeyType::string_key: { @@ -218,24 +202,20 @@ struct PartitionedHashMapVariants break; } case HashKeyType::fixed64: - method_variant - .emplace>( - get_key_sizes(data_types)); + method_variant.emplace>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant - .emplace>( - get_key_sizes(data_types)); + method_variant.emplace>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant - .emplace>( - get_key_sizes(data_types)); + method_variant.emplace>>( + get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant - .emplace>( - get_key_sizes(data_types)); + method_variant.emplace>>( + get_key_sizes(data_types)); break; default: throw Exception(ErrorCode::INTERNAL_ERROR, diff --git a/be/src/pipeline/common/set_utils.h b/be/src/pipeline/common/set_utils.h index 014546be124ced7..ed64035fb4289da 100644 --- a/be/src/pipeline/common/set_utils.h +++ b/be/src/pipeline/common/set_utils.h @@ -25,10 +25,9 @@ namespace doris { -template +template using SetFixedKeyHashTableContext = - vectorized::MethodKeysFixed>, - has_null>; + vectorized::MethodKeysFixed>>; template using SetPrimaryTypeHashTableContext = @@ -47,59 +46,84 @@ using SetHashTableVariants = SetPrimaryTypeHashTableContext, SetPrimaryTypeHashTableContext, SetPrimaryTypeHashTableContext, - SetFixedKeyHashTableContext, - SetFixedKeyHashTableContext, - SetFixedKeyHashTableContext, - SetFixedKeyHashTableContext, - SetFixedKeyHashTableContext, - SetFixedKeyHashTableContext, - SetFixedKeyHashTableContext, - SetFixedKeyHashTableContext>; + SetFixedKeyHashTableContext, + SetFixedKeyHashTableContext, + SetFixedKeyHashTableContext, + SetFixedKeyHashTableContext>; struct SetDataVariants { SetHashTableVariants method_variant; - template void init(const std::vector& data_types, HashKeyType type) { + bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); switch (type) { case HashKeyType::serialized: method_variant.emplace(); break; case HashKeyType::int8_key: - method_variant.emplace>(); + if (nullable) { + method_variant.emplace>( + get_key_sizes(data_types)); + } else { + method_variant.emplace>(); + } break; case HashKeyType::int16_key: - method_variant.emplace>(); + if (nullable) { + method_variant.emplace>( + get_key_sizes(data_types)); + } else { + method_variant.emplace>(); + } break; case HashKeyType::int32_key: - method_variant.emplace>(); + if (nullable) { + method_variant.emplace>( + get_key_sizes(data_types)); + } else { + method_variant.emplace>(); + } break; case HashKeyType::int64_key: - method_variant.emplace>(); + if (nullable) { + method_variant.emplace>( + get_key_sizes(data_types)); + } else { + method_variant.emplace>(); + } break; case HashKeyType::int128_key: - method_variant.emplace>(); + if (nullable) { + method_variant.emplace>( + get_key_sizes(data_types)); + } else { + method_variant.emplace>(); + } break; case HashKeyType::int256_key: - method_variant.emplace>(); + if (nullable) { + method_variant.emplace(); + } else { + method_variant.emplace>(); + } break; case HashKeyType::string_key: method_variant.emplace(); break; case HashKeyType::fixed64: - method_variant.emplace>( + method_variant.emplace>( get_key_sizes(data_types)); break; case HashKeyType::fixed128: - method_variant.emplace>( + method_variant.emplace>( get_key_sizes(data_types)); break; case HashKeyType::fixed136: - method_variant.emplace>( + method_variant.emplace>( get_key_sizes(data_types)); break; case HashKeyType::fixed256: - method_variant.emplace>( + method_variant.emplace>( get_key_sizes(data_types)); break; default: diff --git a/be/src/pipeline/dependency.cpp b/be/src/pipeline/dependency.cpp index 1d450d164a1efb7..5fef018423df25d 100644 --- a/be/src/pipeline/dependency.cpp +++ b/be/src/pipeline/dependency.cpp @@ -32,15 +32,16 @@ #include "vec/spill/spill_stream_manager.h" namespace doris::pipeline { - +#include "common/compile_check_begin.h" Dependency* BasicSharedState::create_source_dependency(int operator_id, int node_id, - std::string name) { + const std::string& name) { source_deps.push_back(std::make_shared(operator_id, node_id, name + "_DEPENDENCY")); source_deps.back()->set_shared_state(this); return source_deps.back().get(); } -Dependency* BasicSharedState::create_sink_dependency(int dest_id, int node_id, std::string name) { +Dependency* BasicSharedState::create_sink_dependency(int dest_id, int node_id, + const std::string& name) { sink_deps.push_back(std::make_shared(dest_id, node_id, name + "_DEPENDENCY", true)); sink_deps.back()->set_shared_state(this); return sink_deps.back().get(); @@ -105,16 +106,6 @@ std::string RuntimeFilterDependency::debug_string(int indentation_level) { return fmt::to_string(debug_string_buffer); } -Dependency* RuntimeFilterDependency::is_blocked_by(PipelineTask* task) { - std::unique_lock lc(_task_lock); - auto ready = _ready.load(); - if (!ready && task) { - _add_block_task(task); - task->_blocked_dep = this; - } - return ready ? nullptr : this; -} - void RuntimeFilterTimer::call_timeout() { _parent->set_ready(); } @@ -267,8 +258,8 @@ bool AggSharedState::do_limit_filter(vectorized::Block* block, size_t num_rows, need_computes.data()); } - auto set_computes_arr = [](auto* __restrict res, auto* __restrict computes, int rows) { - for (int i = 0; i < rows; ++i) { + auto set_computes_arr = [](auto* __restrict res, auto* __restrict computes, size_t rows) { + for (size_t i = 0; i < rows; ++i) { computes[i] = computes[i] == res[i]; } }; diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index 8060ee8362dede3..4cc3aceaeebdfae 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include @@ -27,7 +28,6 @@ #include #include "common/logging.h" -#include "concurrentqueue.h" #include "gutil/integral_types.h" #include "pipeline/common/agg_utils.h" #include "pipeline/common/join_utils.h" @@ -46,7 +46,7 @@ class VSlotRef; } // namespace doris::vectorized namespace doris::pipeline { - +#include "common/compile_check_begin.h" class Dependency; class PipelineTask; struct BasicSharedState; @@ -81,17 +81,15 @@ struct BasicSharedState { virtual ~BasicSharedState() = default; - Dependency* create_source_dependency(int operator_id, int node_id, std::string name); + Dependency* create_source_dependency(int operator_id, int node_id, const std::string& name); - Dependency* create_sink_dependency(int dest_id, int node_id, std::string name); + Dependency* create_sink_dependency(int dest_id, int node_id, const std::string& name); }; class Dependency : public std::enable_shared_from_this { public: ENABLE_FACTORY_CREATOR(Dependency); - Dependency(int id, int node_id, std::string name) - : _id(id), _node_id(node_id), _name(std::move(name)), _ready(false) {} - Dependency(int id, int node_id, std::string name, bool ready) + Dependency(int id, int node_id, std::string name, bool ready = false) : _id(id), _node_id(node_id), _name(std::move(name)), _ready(ready) {} virtual ~Dependency() = default; @@ -278,8 +276,6 @@ class RuntimeFilterDependency final : public Dependency { : Dependency(id, node_id, name), _runtime_filter(runtime_filter) {} std::string debug_string(int indentation_level = 0) override; - Dependency* is_blocked_by(PipelineTask* task) override; - private: const IRuntimeFilter* _runtime_filter = nullptr; }; @@ -504,7 +500,7 @@ struct SpillSortSharedState : public BasicSharedState, ~SpillSortSharedState() override = default; // This number specifies the maximum size of sub blocks - static constexpr int SORT_BLOCK_SPILL_BATCH_BYTES = 8 * 1024 * 1024; + static constexpr size_t SORT_BLOCK_SPILL_BATCH_BYTES = 8 * 1024 * 1024; void update_spill_block_batch_row_count(const vectorized::Block* block) { auto rows = block->rows(); if (rows > 0 && 0 == avg_row_bytes) { @@ -525,7 +521,7 @@ struct SpillSortSharedState : public BasicSharedState, std::deque sorted_streams; size_t avg_row_bytes = 0; - int spill_block_batch_row_count; + size_t spill_block_batch_row_count; }; struct UnionSharedState : public BasicSharedState { @@ -606,8 +602,9 @@ struct HashJoinSharedState : public JoinSharedState { ENABLE_FACTORY_CREATOR(HashJoinSharedState) // mark the join column whether support null eq std::vector is_null_safe_eq_join; + // mark the build hash table whether it needs to store null value - std::vector store_null_in_hash_table; + std::vector serialize_null_into_key; std::shared_ptr arena = std::make_shared(); // maybe share hash table with other fragment instances @@ -677,7 +674,7 @@ struct SetSharedState : public BasicSharedState { std::vector child_exprs_lists; /// init in build side - int child_quantity; + size_t child_quantity; vectorized::VExprContextSPtrs build_child_exprs; std::vector probe_finished_children_dependency; @@ -867,5 +864,5 @@ struct LocalMergeExchangeSharedState : public LocalExchangeSharedState { std::vector _queues_mem_usage; const int64_t _each_queue_limit; }; - +#include "common/compile_check_end.h" } // namespace doris::pipeline diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp b/be/src/pipeline/exec/aggregation_sink_operator.cpp index 5fb14c025850b43..27400fba474eefa 100644 --- a/be/src/pipeline/exec/aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp @@ -20,6 +20,7 @@ #include #include +#include "common/cast_set.h" #include "common/status.h" #include "pipeline/exec/operator.h" #include "runtime/primitive_type.h" @@ -63,17 +64,13 @@ Status AggSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) { Base::profile(), "MemoryUsageSerializeKeyArena", TUnit::BYTES, 1); _build_timer = ADD_TIMER(Base::profile(), "BuildTime"); - _serialize_key_timer = ADD_TIMER(Base::profile(), "SerializeKeyTime"); - _exec_timer = ADD_TIMER(Base::profile(), "ExecTime"); _merge_timer = ADD_TIMER(Base::profile(), "MergeTime"); _expr_timer = ADD_TIMER(Base::profile(), "ExprTime"); - _serialize_data_timer = ADD_TIMER(Base::profile(), "SerializeDataTime"); _deserialize_data_timer = ADD_TIMER(Base::profile(), "DeserializeAndMergeTime"); _hash_table_compute_timer = ADD_TIMER(Base::profile(), "HashTableComputeTime"); _hash_table_limit_compute_timer = ADD_TIMER(Base::profile(), "DoLimitComputeTime"); _hash_table_emplace_timer = ADD_TIMER(Base::profile(), "HashTableEmplaceTime"); _hash_table_input_counter = ADD_COUNTER(Base::profile(), "HashTableInputCount", TUnit::UNIT); - _max_row_size_counter = ADD_COUNTER(Base::profile(), "MaxRowSizeInBytes", TUnit::UNIT); return Status::OK(); } @@ -737,7 +734,7 @@ Status AggSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(vectorized::AggFnEvaluator::create( _pool, tnode.agg_node.aggregate_functions[i], tnode.agg_node.__isset.agg_sort_infos ? tnode.agg_node.agg_sort_infos[i] : dummy, - &evaluator)); + tnode.agg_node.grouping_exprs.empty(), &evaluator)); _aggregate_evaluators.push_back(evaluator); } @@ -818,7 +815,8 @@ Status AggSinkOperatorX::open(RuntimeState* state) { // check output type if (_needs_finalize) { RETURN_IF_ERROR(vectorized::AggFnEvaluator::check_agg_fn_output( - _probe_expr_ctxs.size(), _aggregate_evaluators, _agg_fn_output_row_descriptor)); + cast_set(_probe_expr_ctxs.size()), _aggregate_evaluators, + _agg_fn_output_row_descriptor)); } RETURN_IF_ERROR(vectorized::VExpr::open(_probe_expr_ctxs, state)); diff --git a/be/src/pipeline/exec/aggregation_sink_operator.h b/be/src/pipeline/exec/aggregation_sink_operator.h index 8271f1451b43205..21ee640613789ee 100644 --- a/be/src/pipeline/exec/aggregation_sink_operator.h +++ b/be/src/pipeline/exec/aggregation_sink_operator.h @@ -102,11 +102,8 @@ class AggSinkLocalState : public PipelineXSinkLocalState { RuntimeProfile::Counter* _hash_table_input_counter = nullptr; RuntimeProfile::Counter* _build_timer = nullptr; RuntimeProfile::Counter* _expr_timer = nullptr; - RuntimeProfile::Counter* _serialize_key_timer = nullptr; RuntimeProfile::Counter* _merge_timer = nullptr; - RuntimeProfile::Counter* _serialize_data_timer = nullptr; RuntimeProfile::Counter* _deserialize_data_timer = nullptr; - RuntimeProfile::Counter* _max_row_size_counter = nullptr; RuntimeProfile::Counter* _hash_table_memory_usage = nullptr; RuntimeProfile::Counter* _hash_table_size_counter = nullptr; RuntimeProfile::Counter* _serialize_key_arena_memory_usage = nullptr; @@ -152,7 +149,6 @@ class AggSinkOperatorX final : public DataSinkOperatorX { : DataDistribution(ExchangeType::HASH_SHUFFLE, _partition_exprs); } bool require_data_distribution() const override { return _is_colocate; } - bool require_shuffled_data_distribution() const override { return !_probe_expr_ctxs.empty(); } size_t get_revocable_mem_size(RuntimeState* state) const; AggregatedDataVariants* get_agg_data(RuntimeState* state) { diff --git a/be/src/pipeline/exec/aggregation_source_operator.cpp b/be/src/pipeline/exec/aggregation_source_operator.cpp index 6d4cd291079cb66..9feb3493068f979 100644 --- a/be/src/pipeline/exec/aggregation_source_operator.cpp +++ b/be/src/pipeline/exec/aggregation_source_operator.cpp @@ -30,20 +30,18 @@ namespace doris::pipeline { AggLocalState::AggLocalState(RuntimeState* state, OperatorXBase* parent) : Base(state, parent), _get_results_timer(nullptr), - _serialize_result_timer(nullptr), _hash_table_iterate_timer(nullptr), _insert_keys_to_column_timer(nullptr), - _serialize_data_timer(nullptr) {} + _insert_values_to_column_timer(nullptr) {} Status AggLocalState::init(RuntimeState* state, LocalStateInfo& info) { RETURN_IF_ERROR(Base::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); _get_results_timer = ADD_TIMER(profile(), "GetResultsTime"); - _serialize_result_timer = ADD_TIMER(profile(), "SerializeResultTime"); _hash_table_iterate_timer = ADD_TIMER(profile(), "HashTableIterateTime"); _insert_keys_to_column_timer = ADD_TIMER(profile(), "InsertKeysToColumnTime"); - _serialize_data_timer = ADD_TIMER(profile(), "SerializeDataTime"); + _insert_values_to_column_timer = ADD_TIMER(profile(), "InsertValuesToColumnTime"); _merge_timer = ADD_TIMER(Base::profile(), "MergeTime"); _deserialize_data_timer = ADD_TIMER(Base::profile(), "DeserializeAndMergeTime"); @@ -58,7 +56,7 @@ Status AggLocalState::init(RuntimeState* state, LocalStateInfo& info) { std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); } else { - _executor.get_result = std::bind(&AggLocalState::_serialize_without_key, this, + _executor.get_result = std::bind(&AggLocalState::_get_results_without_key, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); } @@ -69,8 +67,8 @@ Status AggLocalState::init(RuntimeState* state, LocalStateInfo& info) { std::placeholders::_2, std::placeholders::_3); } else { _executor.get_result = std::bind( - &AggLocalState::_serialize_with_serialized_key_result, this, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + &AggLocalState::_get_results_with_serialized_key, this, std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); } } @@ -94,18 +92,9 @@ Status AggLocalState::_create_agg_status(vectorized::AggregateDataPtr data) { return Status::OK(); } -Status AggLocalState::_destroy_agg_status(vectorized::AggregateDataPtr data) { - auto& shared_state = *Base::_shared_state; - for (int i = 0; i < shared_state.aggregate_evaluators.size(); ++i) { - shared_state.aggregate_evaluators[i]->function()->destroy( - data + shared_state.offsets_of_aggregate_states[i]); - } - return Status::OK(); -} - -Status AggLocalState::_serialize_with_serialized_key_result(RuntimeState* state, - vectorized::Block* block, bool* eos) { - SCOPED_TIMER(_serialize_result_timer); +Status AggLocalState::_get_results_with_serialized_key(RuntimeState* state, + vectorized::Block* block, bool* eos) { + SCOPED_TIMER(_get_results_timer); auto& shared_state = *_shared_state; size_t key_size = _shared_state->probe_expr_ctxs.size(); size_t agg_size = _shared_state->aggregate_evaluators.size(); @@ -125,7 +114,6 @@ Status AggLocalState::_serialize_with_serialized_key_result(RuntimeState* state, } } - SCOPED_TIMER(_get_results_timer); std::visit( vectorized::Overload { [&](std::monostate& arg) -> void { @@ -181,7 +169,7 @@ Status AggLocalState::_serialize_with_serialized_key_result(RuntimeState* state, } { - SCOPED_TIMER(_serialize_data_timer); + SCOPED_TIMER(_insert_values_to_column_timer); for (size_t i = 0; i < shared_state.aggregate_evaluators.size(); ++i) { value_data_types[i] = shared_state.aggregate_evaluators[i] ->function() @@ -333,13 +321,13 @@ Status AggLocalState::_get_with_serialized_key_result(RuntimeState* state, vecto return Status::OK(); } -Status AggLocalState::_serialize_without_key(RuntimeState* state, vectorized::Block* block, - bool* eos) { +Status AggLocalState::_get_results_without_key(RuntimeState* state, vectorized::Block* block, + bool* eos) { + SCOPED_TIMER(_get_results_timer); auto& shared_state = *_shared_state; // 1. `child(0)->rows_returned() == 0` mean not data from child // in level two aggregation node should return NULL result // level one aggregation node set `eos = true` return directly - SCOPED_TIMER(_serialize_result_timer); if (UNLIKELY(_shared_state->input_num_rows == 0)) { *eos = true; return Status::OK(); @@ -573,17 +561,6 @@ template Status AggSourceOperatorX::merge_with_serialized_key_helper( template Status AggSourceOperatorX::merge_with_serialized_key_helper( RuntimeState* state, vectorized::Block* block); -size_t AggLocalState::_get_hash_table_size() { - return std::visit( - vectorized::Overload {[&](std::monostate& arg) -> size_t { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "uninited hash table"); - return 0; - }, - [&](auto& agg_method) { return agg_method.hash_table->size(); }}, - _shared_state->agg_data->method_variant); -} - void AggLocalState::_emplace_into_hash_table(vectorized::AggregateDataPtr* places, vectorized::ColumnRawPtrs& key_columns, size_t num_rows) { diff --git a/be/src/pipeline/exec/aggregation_source_operator.h b/be/src/pipeline/exec/aggregation_source_operator.h index 473a051ae3574df..6de2bf93dbc758f 100644 --- a/be/src/pipeline/exec/aggregation_source_operator.h +++ b/be/src/pipeline/exec/aggregation_source_operator.h @@ -47,13 +47,12 @@ class AggLocalState final : public PipelineXLocalState { friend class AggSourceOperatorX; Status _get_without_key_result(RuntimeState* state, vectorized::Block* block, bool* eos); - Status _serialize_without_key(RuntimeState* state, vectorized::Block* block, bool* eos); + Status _get_results_without_key(RuntimeState* state, vectorized::Block* block, bool* eos); Status _get_with_serialized_key_result(RuntimeState* state, vectorized::Block* block, bool* eos); - Status _serialize_with_serialized_key_result(RuntimeState* state, vectorized::Block* block, - bool* eos); + Status _get_results_with_serialized_key(RuntimeState* state, vectorized::Block* block, + bool* eos); Status _create_agg_status(vectorized::AggregateDataPtr data); - Status _destroy_agg_status(vectorized::AggregateDataPtr data); void _make_nullable_output_key(vectorized::Block* block) { if (block->rows() != 0) { auto& shared_state = *Base ::_shared_state; @@ -68,16 +67,14 @@ class AggLocalState final : public PipelineXLocalState { vectorized::ColumnRawPtrs& key_columns, size_t num_rows); void _emplace_into_hash_table(vectorized::AggregateDataPtr* places, vectorized::ColumnRawPtrs& key_columns, size_t num_rows); - size_t _get_hash_table_size(); vectorized::PODArray _places; std::vector _deserialize_buffer; RuntimeProfile::Counter* _get_results_timer = nullptr; - RuntimeProfile::Counter* _serialize_result_timer = nullptr; RuntimeProfile::Counter* _hash_table_iterate_timer = nullptr; RuntimeProfile::Counter* _insert_keys_to_column_timer = nullptr; - RuntimeProfile::Counter* _serialize_data_timer = nullptr; + RuntimeProfile::Counter* _insert_values_to_column_timer = nullptr; RuntimeProfile::Counter* _hash_table_compute_timer = nullptr; RuntimeProfile::Counter* _hash_table_emplace_timer = nullptr; diff --git a/be/src/pipeline/exec/analytic_sink_operator.cpp b/be/src/pipeline/exec/analytic_sink_operator.cpp index afe9aeab8fdb847..abde34a1d0255bc 100644 --- a/be/src/pipeline/exec/analytic_sink_operator.cpp +++ b/be/src/pipeline/exec/analytic_sink_operator.cpp @@ -30,8 +30,10 @@ Status AnalyticSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& inf RETURN_IF_ERROR(PipelineXSinkLocalState::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); - _blocks_memory_usage = ADD_COUNTER_WITH_LEVEL(_profile, "MemoryUsageBlocks", TUnit::BYTES, 1); - _evaluation_timer = ADD_TIMER(profile(), "EvaluationTime"); + _evaluation_timer = ADD_TIMER(profile(), "GetPartitionBoundTime"); + _compute_agg_data_timer = ADD_TIMER(profile(), "ComputeAggDataTime"); + _compute_partition_by_timer = ADD_TIMER(profile(), "ComputePartitionByTime"); + _compute_order_by_timer = ADD_TIMER(profile(), "ComputeOrderByTime"); return Status::OK(); } @@ -288,35 +290,41 @@ Status AnalyticSinkOperatorX::sink(doris::RuntimeState* state, vectorized::Block } } - for (size_t i = 0; i < _agg_functions_size; - ++i) { //insert _agg_input_columns, execute calculate for its - for (size_t j = 0; j < local_state._agg_expr_ctxs[i].size(); ++j) { - RETURN_IF_ERROR(_insert_range_column( - input_block, local_state._agg_expr_ctxs[i][j], - local_state._shared_state->agg_input_columns[i][j].get(), block_rows)); + { + SCOPED_TIMER(local_state._compute_agg_data_timer); + for (size_t i = 0; i < _agg_functions_size; + ++i) { //insert _agg_input_columns, execute calculate for its + for (size_t j = 0; j < local_state._agg_expr_ctxs[i].size(); ++j) { + RETURN_IF_ERROR(_insert_range_column( + input_block, local_state._agg_expr_ctxs[i][j], + local_state._shared_state->agg_input_columns[i][j].get(), block_rows)); + } } } - //record column idx in block - for (size_t i = 0; i < local_state._shared_state->partition_by_eq_expr_ctxs.size(); ++i) { - int result_col_id = -1; - RETURN_IF_ERROR(local_state._shared_state->partition_by_eq_expr_ctxs[i]->execute( - input_block, &result_col_id)); - DCHECK_GE(result_col_id, 0); - local_state._shared_state->partition_by_column_idxs[i] = result_col_id; + { + SCOPED_TIMER(local_state._compute_partition_by_timer); + for (size_t i = 0; i < local_state._shared_state->partition_by_eq_expr_ctxs.size(); ++i) { + int result_col_id = -1; + RETURN_IF_ERROR(local_state._shared_state->partition_by_eq_expr_ctxs[i]->execute( + input_block, &result_col_id)); + DCHECK_GE(result_col_id, 0); + local_state._shared_state->partition_by_column_idxs[i] = result_col_id; + } } - for (size_t i = 0; i < local_state._shared_state->order_by_eq_expr_ctxs.size(); ++i) { - int result_col_id = -1; - RETURN_IF_ERROR(local_state._shared_state->order_by_eq_expr_ctxs[i]->execute( - input_block, &result_col_id)); - DCHECK_GE(result_col_id, 0); - local_state._shared_state->ordey_by_column_idxs[i] = result_col_id; + { + SCOPED_TIMER(local_state._compute_order_by_timer); + for (size_t i = 0; i < local_state._shared_state->order_by_eq_expr_ctxs.size(); ++i) { + int result_col_id = -1; + RETURN_IF_ERROR(local_state._shared_state->order_by_eq_expr_ctxs[i]->execute( + input_block, &result_col_id)); + DCHECK_GE(result_col_id, 0); + local_state._shared_state->ordey_by_column_idxs[i] = result_col_id; + } } - int64_t block_mem_usage = input_block->allocated_bytes(); - COUNTER_UPDATE(local_state._memory_used_counter, block_mem_usage); + COUNTER_UPDATE(local_state._memory_used_counter, input_block->allocated_bytes()); COUNTER_SET(local_state._peak_memory_usage_counter, local_state._memory_used_counter->value()); - COUNTER_UPDATE(local_state._blocks_memory_usage, block_mem_usage); //TODO: if need improvement, the is a tips to maintain a free queue, //so the memory could reuse, no need to new/delete again; diff --git a/be/src/pipeline/exec/analytic_sink_operator.h b/be/src/pipeline/exec/analytic_sink_operator.h index 1a0a671cf9fcaab..e04b220ee351e7f 100644 --- a/be/src/pipeline/exec/analytic_sink_operator.h +++ b/be/src/pipeline/exec/analytic_sink_operator.h @@ -58,7 +58,9 @@ class AnalyticSinkLocalState : public PipelineXSinkLocalState _agg_expr_ctxs; }; @@ -88,9 +90,6 @@ class AnalyticSinkOperatorX final : public DataSinkOperatorXAddHighWaterMarkCounter("MemoryUsageBlocks", TUnit::BYTES, "", 1); - _evaluation_timer = ADD_TIMER(profile(), "EvaluationTime"); + _evaluation_timer = ADD_TIMER(profile(), "GetPartitionBoundTime"); + _execute_timer = ADD_TIMER(profile(), "ExecuteTime"); + _get_next_timer = ADD_TIMER(profile(), "GetNextTime"); + _get_result_timer = ADD_TIMER(profile(), "GetResultsTime"); return Status::OK(); } @@ -233,12 +236,6 @@ Status AnalyticLocalState::open(RuntimeState* state) { std::placeholders::_1); } } - _executor.insert_result = - std::bind(&AnalyticLocalState::_insert_result_info, this, std::placeholders::_1); - _executor.execute = - std::bind(&AnalyticLocalState::_execute_for_win_func, this, std::placeholders::_1, - std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); - _create_agg_status(); return Status::OK(); } @@ -282,6 +279,7 @@ void AnalyticLocalState::_destroy_agg_status() { void AnalyticLocalState::_execute_for_win_func(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end) { + SCOPED_TIMER(_execute_timer); for (size_t i = 0; i < _agg_functions_size; ++i) { std::vector agg_columns; for (int j = 0; j < _shared_state->agg_input_columns[i].size(); ++j) { @@ -300,6 +298,7 @@ void AnalyticLocalState::_execute_for_win_func(int64_t partition_start, int64_t } void AnalyticLocalState::_insert_result_info(int64_t current_block_rows) { + SCOPED_TIMER(_get_result_timer); int64_t current_block_row_pos = _shared_state->input_block_first_row_positions[_output_block_index]; int64_t get_result_start = _shared_state->current_row_position - current_block_row_pos; @@ -344,6 +343,7 @@ void AnalyticLocalState::_insert_result_info(int64_t current_block_rows) { } Status AnalyticLocalState::_get_next_for_rows(size_t current_block_rows) { + SCOPED_TIMER(_get_next_timer); while (_shared_state->current_row_position < _shared_state->partition_by_end.pos && _window_end_position < current_block_rows) { int64_t range_start, range_end; @@ -367,31 +367,33 @@ Status AnalyticLocalState::_get_next_for_rows(size_t current_block_rows) { // Make sure range_start <= range_end range_start = std::min(range_start, range_end); } - _executor.execute(_partition_by_start.pos, _shared_state->partition_by_end.pos, range_start, - range_end); - _executor.insert_result(current_block_rows); + _execute_for_win_func(_partition_by_start.pos, _shared_state->partition_by_end.pos, + range_start, range_end); + _insert_result_info(current_block_rows); } return Status::OK(); } Status AnalyticLocalState::_get_next_for_partition(size_t current_block_rows) { + SCOPED_TIMER(_get_next_timer); if (_next_partition) { - _executor.execute(_partition_by_start.pos, _shared_state->partition_by_end.pos, - _partition_by_start.pos, _shared_state->partition_by_end.pos); + _execute_for_win_func(_partition_by_start.pos, _shared_state->partition_by_end.pos, + _partition_by_start.pos, _shared_state->partition_by_end.pos); } - _executor.insert_result(current_block_rows); + _insert_result_info(current_block_rows); return Status::OK(); } Status AnalyticLocalState::_get_next_for_range(size_t current_block_rows) { + SCOPED_TIMER(_get_next_timer); while (_shared_state->current_row_position < _shared_state->partition_by_end.pos && _window_end_position < current_block_rows) { if (_shared_state->current_row_position >= _order_by_end.pos) { _update_order_by_range(); - _executor.execute(_partition_by_start.pos, _shared_state->partition_by_end.pos, - _order_by_start.pos, _order_by_end.pos); + _execute_for_win_func(_partition_by_start.pos, _shared_state->partition_by_end.pos, + _order_by_start.pos, _order_by_end.pos); } - _executor.insert_result(current_block_rows); + _insert_result_info(current_block_rows); } return Status::OK(); } @@ -500,11 +502,13 @@ Status AnalyticSourceOperatorX::init(const TPlanNode& tnode, RuntimeState* state RETURN_IF_ERROR(OperatorX::init(tnode, state)); const TAnalyticNode& analytic_node = tnode.analytic_node; size_t agg_size = analytic_node.analytic_functions.size(); - for (int i = 0; i < agg_size; ++i) { vectorized::AggFnEvaluator* evaluator = nullptr; + // Window function treats all NullableAggregateFunction as AlwaysNullable. + // Its behavior is same with executed without group by key. + // https://github.com/apache/doris/pull/40693 RETURN_IF_ERROR(vectorized::AggFnEvaluator::create( - _pool, analytic_node.analytic_functions[i], {}, &evaluator)); + _pool, analytic_node.analytic_functions[i], {}, /*wihout_key*/ true, &evaluator)); _agg_functions.emplace_back(evaluator); } @@ -536,7 +540,7 @@ Status AnalyticSourceOperatorX::get_block(RuntimeState* state, vectorized::Block local_state.init_result_columns(); size_t current_block_rows = local_state._shared_state->input_blocks[local_state._output_block_index].rows(); - static_cast(local_state._executor.get_next(current_block_rows)); + RETURN_IF_ERROR(local_state._executor.get_next(current_block_rows)); if (local_state._window_end_position == current_block_rows) { break; } diff --git a/be/src/pipeline/exec/analytic_source_operator.h b/be/src/pipeline/exec/analytic_source_operator.h index 0080ad5e03c8b02..8f44b77f567e559 100644 --- a/be/src/pipeline/exec/analytic_source_operator.h +++ b/be/src/pipeline/exec/analytic_source_operator.h @@ -96,17 +96,15 @@ class AnalyticLocalState final : public PipelineXLocalState std::vector _agg_functions; RuntimeProfile::Counter* _evaluation_timer = nullptr; + RuntimeProfile::Counter* _execute_timer = nullptr; + RuntimeProfile::Counter* _get_next_timer = nullptr; + RuntimeProfile::Counter* _get_result_timer = nullptr; RuntimeProfile::HighWaterMarkCounter* _blocks_memory_usage = nullptr; - using vectorized_execute = std::function; using vectorized_get_next = std::function; - using vectorized_get_result = std::function; struct executor { - vectorized_execute execute; vectorized_get_next get_next; - vectorized_get_result insert_result; }; executor _executor; diff --git a/be/src/pipeline/exec/cache_source_operator.cpp b/be/src/pipeline/exec/cache_source_operator.cpp index 2e9b21976f841ae..cace8465fc2d463 100644 --- a/be/src/pipeline/exec/cache_source_operator.cpp +++ b/be/src/pipeline/exec/cache_source_operator.cpp @@ -65,7 +65,7 @@ Status CacheSourceLocalState::init(RuntimeState* state, LocalStateInfo& info) { // 3. lookup the cache and find proper slot order hit_cache = QueryCache::instance()->lookup(_cache_key, _version, &_query_cache_handle); - _runtime_profile->add_info_string("HitCache", hit_cache ? "1" : "0"); + _runtime_profile->add_info_string("HitCache", std::to_string(hit_cache)); if (hit_cache && !cache_param.force_refresh_query_cache) { _hit_cache_results = _query_cache_handle.get_cache_result(); auto hit_cache_slot_orders = _query_cache_handle.get_cache_slot_orders(); @@ -125,13 +125,16 @@ Status CacheSourceOperatorX::get_block(RuntimeState* state, vectorized::Block* b if (local_state._hit_cache_results == nullptr) { Defer insert_cache([&] { - if (*eos && local_state._need_insert_cache) { - local_state._runtime_profile->add_info_string("InsertCache", "1"); - local_state._global_cache->insert(local_state._cache_key, local_state._version, - local_state._local_cache_blocks, - local_state._slot_orders, - local_state._current_query_cache_bytes); - local_state._local_cache_blocks.clear(); + if (*eos) { + local_state._runtime_profile->add_info_string( + "InsertCache", std::to_string(local_state._need_insert_cache)); + if (local_state._need_insert_cache) { + local_state._global_cache->insert(local_state._cache_key, local_state._version, + local_state._local_cache_blocks, + local_state._slot_orders, + local_state._current_query_cache_bytes); + local_state._local_cache_blocks.clear(); + } } }); @@ -162,7 +165,6 @@ Status CacheSourceOperatorX::get_block(RuntimeState* state, vectorized::Block* b // over the max bytes, pass through the data, no need to do cache local_state._local_cache_blocks.clear(); local_state._need_insert_cache = false; - local_state._runtime_profile->add_info_string("InsertCache", "0"); } else { local_state._local_cache_blocks.emplace_back(std::move(output_block)); } diff --git a/be/src/pipeline/exec/datagen_operator.cpp b/be/src/pipeline/exec/datagen_operator.cpp index faa6359e87490be..d400953799e5bbb 100644 --- a/be/src/pipeline/exec/datagen_operator.cpp +++ b/be/src/pipeline/exec/datagen_operator.cpp @@ -36,7 +36,9 @@ DataGenSourceOperatorX::DataGenSourceOperatorX(ObjectPool* pool, const TPlanNode : OperatorX(pool, tnode, operator_id, descs), _tuple_id(tnode.data_gen_scan_node.tuple_id), _tuple_desc(nullptr), - _runtime_filter_descs(tnode.runtime_filters) {} + _runtime_filter_descs(tnode.runtime_filters) { + _is_serial_operator = tnode.__isset.is_serial_operator && tnode.is_serial_operator; +} Status DataGenSourceOperatorX::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(OperatorX::init(tnode, state)); @@ -68,17 +70,25 @@ Status DataGenSourceOperatorX::get_block(RuntimeState* state, vectorized::Block* RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); - Status res = local_state._table_func->get_next(state, block, eos); - RETURN_IF_ERROR(vectorized::VExprContext::filter_block(local_state._conjuncts, block, - block->columns())); + { + SCOPED_TIMER(local_state._table_function_execution_timer); + RETURN_IF_ERROR(local_state._table_func->get_next(state, block, eos)); + } + { + SCOPED_TIMER(local_state._filter_timer); + RETURN_IF_ERROR(vectorized::VExprContext::filter_block(local_state._conjuncts, block, + block->columns())); + } local_state.reached_limit(block, eos); - return res; + return Status::OK(); } Status DataGenLocalState::init(RuntimeState* state, LocalStateInfo& info) { SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); RETURN_IF_ERROR(PipelineXLocalState<>::init(state, info)); + _table_function_execution_timer = ADD_TIMER(profile(), "TableFunctionExecutionTime"); + _filter_timer = ADD_TIMER(profile(), "FilterTime"); auto& p = _parent->cast(); _table_func = std::make_shared(p._tuple_id, p._tuple_desc); _table_func->set_tuple_desc(p._tuple_desc); @@ -87,8 +97,8 @@ Status DataGenLocalState::init(RuntimeState* state, LocalStateInfo& info) { // TODO: use runtime filter to filte result block, maybe this node need derive from vscan_node. for (const auto& filter_desc : p._runtime_filter_descs) { std::shared_ptr runtime_filter; - RETURN_IF_ERROR(state->register_consumer_runtime_filter( - filter_desc, p.ignore_data_distribution(), p.node_id(), &runtime_filter)); + RETURN_IF_ERROR(state->register_consumer_runtime_filter(filter_desc, p.is_serial_operator(), + p.node_id(), &runtime_filter)); runtime_filter->init_profile(_runtime_profile.get()); } return Status::OK(); diff --git a/be/src/pipeline/exec/datagen_operator.h b/be/src/pipeline/exec/datagen_operator.h index c63ef97bb7a40f6..bada5ec4080d08b 100644 --- a/be/src/pipeline/exec/datagen_operator.h +++ b/be/src/pipeline/exec/datagen_operator.h @@ -44,6 +44,8 @@ class DataGenLocalState final : public PipelineXLocalState<> { private: friend class DataGenSourceOperatorX; std::shared_ptr _table_func; + RuntimeProfile::Counter* _table_function_execution_timer = nullptr; + RuntimeProfile::Counter* _filter_timer = nullptr; }; class DataGenSourceOperatorX final : public OperatorX { diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp index a59af8ce7b474ab..bb282fd118e5c0b 100644 --- a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp +++ b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp @@ -71,7 +71,6 @@ Status DistinctStreamingAggLocalState::init(RuntimeState* state, LocalStateInfo& SCOPED_TIMER(Base::exec_time_counter()); SCOPED_TIMER(Base::_init_timer); _build_timer = ADD_TIMER(Base::profile(), "BuildTime"); - _exec_timer = ADD_TIMER(Base::profile(), "ExecTime"); _hash_table_compute_timer = ADD_TIMER(Base::profile(), "HashTableComputeTime"); _hash_table_emplace_timer = ADD_TIMER(Base::profile(), "HashTableEmplaceTime"); _hash_table_input_counter = ADD_COUNTER(Base::profile(), "HashTableInputCount", TUnit::UNIT); @@ -355,7 +354,7 @@ Status DistinctStreamingAggOperatorX::init(const TPlanNode& tnode, RuntimeState* RETURN_IF_ERROR(vectorized::AggFnEvaluator::create( _pool, tnode.agg_node.aggregate_functions[i], tnode.agg_node.__isset.agg_sort_infos ? tnode.agg_node.agg_sort_infos[i] : dummy, - &evaluator)); + tnode.agg_node.grouping_exprs.empty(), &evaluator)); _aggregate_evaluators.push_back(evaluator); } diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h index 1f7a21190ad7696..4c5fcd5efa74b99 100644 --- a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h +++ b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h @@ -116,9 +116,6 @@ class DistinctStreamingAggOperatorX final } bool require_data_distribution() const override { return _is_colocate; } - bool require_shuffled_data_distribution() const override { - return _needs_finalize || (!_probe_expr_ctxs.empty() && !_is_streaming_preagg); - } private: friend class DistinctStreamingAggLocalState; diff --git a/be/src/pipeline/exec/es_scan_operator.cpp b/be/src/pipeline/exec/es_scan_operator.cpp index 7b846e715f32d26..2cb3cd5e0b29ce1 100644 --- a/be/src/pipeline/exec/es_scan_operator.cpp +++ b/be/src/pipeline/exec/es_scan_operator.cpp @@ -44,12 +44,10 @@ static std::string get_host_and_port(const std::vector& Status EsScanLocalState::_init_profile() { RETURN_IF_ERROR(Base::_init_profile()); - _es_profile.reset(new RuntimeProfile("EsIterator")); - Base::_scanner_profile->add_child(_es_profile.get(), true, nullptr); - _rows_read_counter = ADD_COUNTER(_es_profile, "RowsRead", TUnit::UNIT); - _read_timer = ADD_TIMER(_es_profile, "TotalRawReadTime(*)"); - _materialize_timer = ADD_TIMER(_es_profile, "MaterializeTupleTime(*)"); + _blocks_read_counter = ADD_COUNTER(_runtime_profile, "BlocksRead", TUnit::UNIT); + _read_timer = ADD_TIMER(_runtime_profile, "TotalRawReadTime(*)"); + _materialize_timer = ADD_TIMER(_runtime_profile, "MaterializeTupleTime(*)"); return Status::OK(); } diff --git a/be/src/pipeline/exec/es_scan_operator.h b/be/src/pipeline/exec/es_scan_operator.h index 4e80150d0ba8c67..2ae562e4fc7f328 100644 --- a/be/src/pipeline/exec/es_scan_operator.h +++ b/be/src/pipeline/exec/es_scan_operator.h @@ -52,13 +52,12 @@ class EsScanLocalState final : public ScanLocalState { Status _init_scanners(std::list* scanners) override; std::vector> _scan_ranges; - std::unique_ptr _es_profile; // FIXME: non-static data member '_rows_read_counter' of 'EsScanLocalState' shadows member inherited from type 'ScanLocalStateBase' #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wshadow-field" #endif - RuntimeProfile::Counter* _rows_read_counter = nullptr; + RuntimeProfile::Counter* _blocks_read_counter = nullptr; #ifdef __clang__ #pragma clang diagnostic pop #endif diff --git a/be/src/pipeline/exec/exchange_sink_buffer.cpp b/be/src/pipeline/exec/exchange_sink_buffer.cpp index 016802f8f73bd8e..7163299d766f4e8 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.cpp +++ b/be/src/pipeline/exec/exchange_sink_buffer.cpp @@ -235,7 +235,7 @@ Status ExchangeSinkBuffer::_send_rpc(InstanceLoId id) { auto send_callback = request.channel->get_send_callback(id, request.eos); send_callback->cntl_->set_timeout_ms(request.channel->_brpc_timeout_ms); - if (config::exchange_sink_ignore_eovercrowded) { + if (config::execution_ignore_eovercrowded) { send_callback->cntl_->ignore_eovercrowded(); } send_callback->addFailedHandler([&, weak_task_ctx = weak_task_exec_ctx()]( @@ -313,7 +313,7 @@ Status ExchangeSinkBuffer::_send_rpc(InstanceLoId id) { } auto send_callback = request.channel->get_send_callback(id, request.eos); send_callback->cntl_->set_timeout_ms(request.channel->_brpc_timeout_ms); - if (config::exchange_sink_ignore_eovercrowded) { + if (config::execution_ignore_eovercrowded) { send_callback->cntl_->ignore_eovercrowded(); } send_callback->addFailedHandler([&, weak_task_ctx = weak_task_exec_ctx()]( diff --git a/be/src/pipeline/exec/exchange_sink_buffer.h b/be/src/pipeline/exec/exchange_sink_buffer.h index 2ff7a20086470a1..13692532a335a42 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.h +++ b/be/src/pipeline/exec/exchange_sink_buffer.h @@ -195,7 +195,6 @@ class ExchangeSinkBuffer final : public HasTaskExecutionCtx { private: friend class ExchangeSinkLocalState; - void _set_ready_to_finish(bool all_done); phmap::flat_hash_map> _instance_to_package_queue_mutex; diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp index 3f12b4458cdbdec..1f91af01aa1f6bb 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.cpp +++ b/be/src/pipeline/exec/exchange_sink_operator.cpp @@ -38,12 +38,7 @@ #include "vec/exprs/vexpr.h" namespace doris::pipeline { - -Status ExchangeSinkLocalState::serialize_block(vectorized::Block* src, PBlock* dest, - int num_receivers) { - return _parent->cast().serialize_block(*this, src, dest, num_receivers); -} - +#include "common/compile_check_begin.h" bool ExchangeSinkLocalState::transfer_large_data_by_brpc() const { return _parent->cast()._transfer_large_data_by_brpc; } @@ -61,14 +56,10 @@ Status ExchangeSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& inf _local_sent_rows = ADD_COUNTER(_profile, "LocalSentRows", TUnit::UNIT); _serialize_batch_timer = ADD_TIMER(_profile, "SerializeBatchTime"); _compress_timer = ADD_TIMER(_profile, "CompressTime"); - _brpc_send_timer = ADD_TIMER(_profile, "BrpcSendTime"); - _brpc_wait_timer = ADD_TIMER(_profile, "BrpcSendTime.Wait"); _local_send_timer = ADD_TIMER(_profile, "LocalSendTime"); _split_block_hash_compute_timer = ADD_TIMER(_profile, "SplitBlockHashComputeTime"); - _split_block_distribute_by_channel_timer = - ADD_TIMER(_profile, "SplitBlockDistributeByChannelTime"); + _distribute_rows_into_channels_timer = ADD_TIMER(_profile, "DistributeRowsIntoChannelsTime"); _blocks_sent_counter = ADD_COUNTER_WITH_LEVEL(_profile, "BlocksProduced", TUnit::UNIT, 1); - _rows_sent_counter = ADD_COUNTER_WITH_LEVEL(_profile, "RowsProduced", TUnit::UNIT, 1); _overall_throughput = _profile->add_derived_counter( "OverallThroughput", TUnit::BYTES_PER_SECOND, [this]() { @@ -141,7 +132,7 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) { std::mt19937 g(rd()); shuffle(channels.begin(), channels.end(), g); } - int local_size = 0; + size_t local_size = 0; for (int i = 0; i < channels.size(); ++i) { RETURN_IF_ERROR(channels[i]->open(state)); if (channels[i]->is_local()) { @@ -151,6 +142,8 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) { } only_local_exchange = local_size == channels.size(); + _rpc_channels_num = channels.size() - local_size; + PUniqueId id; id.set_hi(_state->query_id().hi); id.set_lo(_state->query_id().lo); @@ -288,7 +281,7 @@ Status ExchangeSinkLocalState::_send_new_partition_batch() { vectorized::Block tmp_block = _row_distribution._batching_block->to_block(); // Borrow out, for lval ref auto& p = _parent->cast(); - // these order is only. + // these order is unique. // 1. clear batching stats(and flag goes true) so that we won't make a new batching process in dealing batched block. // 2. deal batched block // 3. now reuse the column of lval block. cuz write doesn't real adjust it. it generate a new block from that. @@ -389,7 +382,6 @@ void ExchangeSinkOperatorX::_handle_eof_channel(RuntimeState* state, ChannelPtrT Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block, bool eos) { auto& local_state = get_local_state(state); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); - COUNTER_UPDATE(local_state.rows_sent_counter(), (int64_t)block->rows()); SCOPED_TIMER(local_state.exec_time_counter()); bool all_receiver_eof = true; for (auto& channel : local_state.channels) { @@ -431,14 +423,15 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block { bool serialized = false; RETURN_IF_ERROR(local_state._serializer.next_serialized_block( - block, block_holder->get_block(), local_state.channels.size(), &serialized, - eos)); + block, block_holder->get_block(), local_state._rpc_channels_num, + &serialized, eos)); if (serialized) { auto cur_block = local_state._serializer.get_block()->to_block(); if (!cur_block.empty()) { + DCHECK(eos || local_state._serializer.is_local()) << debug_string(state, 0); RETURN_IF_ERROR(local_state._serializer.serialize_block( &cur_block, block_holder->get_block(), - local_state.channels.size())); + local_state._rpc_channels_num)); } else { block_holder->reset_block(); } @@ -504,10 +497,12 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block old_channel_mem_usage += channel->mem_usage(); } if (_part_type == TPartitionType::HASH_PARTITIONED) { + SCOPED_TIMER(local_state._distribute_rows_into_channels_timer); RETURN_IF_ERROR(channel_add_rows( state, local_state.channels, local_state._partition_count, local_state._partitioner->get_channel_ids().get(), rows, block, eos)); } else { + SCOPED_TIMER(local_state._distribute_rows_into_channels_timer); RETURN_IF_ERROR(channel_add_rows( state, local_state.channels, local_state._partition_count, local_state._partitioner->get_channel_ids().get(), rows, block, eos)); @@ -556,10 +551,13 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block local_state._row_distribution._deal_batched = true; RETURN_IF_ERROR(local_state._send_new_partition_batch()); } - // the convert_block maybe different with block after execute exprs - // when send data we still use block - RETURN_IF_ERROR(channel_add_rows_with_idx(state, local_state.channels, num_channels, - channel2rows, block, eos)); + { + SCOPED_TIMER(local_state._distribute_rows_into_channels_timer); + // the convert_block maybe different with block after execute exprs + // when send data we still use block + RETURN_IF_ERROR(channel_add_rows_with_idx(state, local_state.channels, num_channels, + channel2rows, block, eos)); + } int64_t new_channel_mem_usage = 0; for (const auto& channel : local_state.channels) { new_channel_mem_usage += channel->mem_usage(); @@ -579,8 +577,12 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block } std::vector> assignments = local_state.scale_writer_partitioning_exchanger->accept(block); - RETURN_IF_ERROR(channel_add_rows_with_idx( - state, local_state.channels, local_state.channels.size(), assignments, block, eos)); + { + SCOPED_TIMER(local_state._distribute_rows_into_channels_timer); + RETURN_IF_ERROR(channel_add_rows_with_idx(state, local_state.channels, + local_state.channels.size(), assignments, + block, eos)); + } int64_t new_channel_mem_usage = 0; for (const auto& channel : local_state.channels) { @@ -635,24 +637,6 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block return final_st; } -Status ExchangeSinkOperatorX::serialize_block(ExchangeSinkLocalState& state, vectorized::Block* src, - PBlock* dest, int num_receivers) { - { - SCOPED_TIMER(state.serialize_batch_timer()); - dest->Clear(); - size_t uncompressed_bytes = 0; - size_t compressed_bytes = 0; - RETURN_IF_ERROR(src->serialize(_state->be_exec_version(), dest, &uncompressed_bytes, - &compressed_bytes, _compression_type, - _transfer_large_data_by_brpc)); - COUNTER_UPDATE(state.bytes_sent_counter(), compressed_bytes * num_receivers); - COUNTER_UPDATE(state.uncompressed_bytes_counter(), uncompressed_bytes * num_receivers); - COUNTER_UPDATE(state.compress_timer(), src->get_compress_time()); - } - - return Status::OK(); -} - void ExchangeSinkLocalState::register_channels(pipeline::ExchangeSinkBuffer* buffer) { for (auto& channel : channels) { channel->register_exchange_buffer(buffer); @@ -661,7 +645,7 @@ void ExchangeSinkLocalState::register_channels(pipeline::ExchangeSinkBuffer* buf Status ExchangeSinkOperatorX::channel_add_rows( RuntimeState* state, std::vector>& channels, - int num_channels, const uint32_t* __restrict channel_ids, int rows, + size_t num_channels, const uint32_t* __restrict channel_ids, size_t rows, vectorized::Block* block, bool eos) { std::vector> channel2rows; channel2rows.resize(num_channels); @@ -676,7 +660,7 @@ Status ExchangeSinkOperatorX::channel_add_rows( Status ExchangeSinkOperatorX::channel_add_rows_with_idx( RuntimeState* state, std::vector>& channels, - int num_channels, std::vector>& channel2rows, + size_t num_channels, std::vector>& channel2rows, vectorized::Block* block, bool eos) { Status status = Status::OK(); for (int i = 0; i < num_channels; ++i) { diff --git a/be/src/pipeline/exec/exchange_sink_operator.h b/be/src/pipeline/exec/exchange_sink_operator.h index 6b936d4b12ce119..63d502900054703 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.h +++ b/be/src/pipeline/exec/exchange_sink_operator.h @@ -77,27 +77,13 @@ class ExchangeSinkLocalState final : public PipelineXSinkLocalState<> { Status open(RuntimeState* state) override; Status close(RuntimeState* state, Status exec_status) override; Dependency* finishdependency() override { return _finish_dependency.get(); } - Status serialize_block(vectorized::Block* src, PBlock* dest, int num_receivers = 1); void register_channels(pipeline::ExchangeSinkBuffer* buffer); - RuntimeProfile::Counter* brpc_wait_timer() { return _brpc_wait_timer; } RuntimeProfile::Counter* blocks_sent_counter() { return _blocks_sent_counter; } - RuntimeProfile::Counter* rows_sent_counter() { return _rows_sent_counter; } RuntimeProfile::Counter* local_send_timer() { return _local_send_timer; } RuntimeProfile::Counter* local_bytes_send_counter() { return _local_bytes_send_counter; } RuntimeProfile::Counter* local_sent_rows() { return _local_sent_rows; } - RuntimeProfile::Counter* brpc_send_timer() { return _brpc_send_timer; } - RuntimeProfile::Counter* serialize_batch_timer() { return _serialize_batch_timer; } - RuntimeProfile::Counter* split_block_distribute_by_channel_timer() { - return _split_block_distribute_by_channel_timer; - } - RuntimeProfile::Counter* bytes_sent_counter() { return _bytes_sent_counter; } - RuntimeProfile::Counter* split_block_hash_compute_timer() { - return _split_block_hash_compute_timer; - } RuntimeProfile::Counter* merge_block_timer() { return _merge_block_timer; } - RuntimeProfile::Counter* compress_timer() { return _compress_timer; } - RuntimeProfile::Counter* uncompressed_bytes_counter() { return _uncompressed_bytes_counter; } [[nodiscard]] bool transfer_large_data_by_brpc() const; bool is_finished() const override { return _reach_limit.load(); } void set_reach_limit() { _reach_limit = true; }; @@ -129,16 +115,13 @@ class ExchangeSinkLocalState final : public PipelineXSinkLocalState<> { std::unique_ptr _sink_buffer = nullptr; RuntimeProfile::Counter* _serialize_batch_timer = nullptr; RuntimeProfile::Counter* _compress_timer = nullptr; - RuntimeProfile::Counter* _brpc_send_timer = nullptr; - RuntimeProfile::Counter* _brpc_wait_timer = nullptr; RuntimeProfile::Counter* _bytes_sent_counter = nullptr; RuntimeProfile::Counter* _uncompressed_bytes_counter = nullptr; RuntimeProfile::Counter* _local_sent_rows = nullptr; RuntimeProfile::Counter* _local_send_timer = nullptr; RuntimeProfile::Counter* _split_block_hash_compute_timer = nullptr; - RuntimeProfile::Counter* _split_block_distribute_by_channel_timer = nullptr; + RuntimeProfile::Counter* _distribute_rows_into_channels_timer = nullptr; RuntimeProfile::Counter* _blocks_sent_counter = nullptr; - RuntimeProfile::Counter* _rows_sent_counter = nullptr; // Throughput per total time spent in sender RuntimeProfile::Counter* _overall_throughput = nullptr; // Used to counter send bytes under local data exchange @@ -153,6 +136,7 @@ class ExchangeSinkLocalState final : public PipelineXSinkLocalState<> { int _sender_id; std::shared_ptr _broadcast_pb_mem_limiter; + size_t _rpc_channels_num = 0; vectorized::BlockSerializer _serializer; std::shared_ptr _queue_dependency = nullptr; @@ -178,7 +162,7 @@ class ExchangeSinkLocalState final : public PipelineXSinkLocalState<> { */ std::vector> _local_channels_dependency; std::unique_ptr _partitioner; - int _partition_count; + size_t _partition_count; std::shared_ptr _finish_dependency; @@ -221,8 +205,6 @@ class ExchangeSinkOperatorX final : public DataSinkOperatorX>& channels, - int num_channels, const uint32_t* channel_ids, int rows, - vectorized::Block* block, bool eos); + size_t num_channels, const uint32_t* __restrict channel_ids, + size_t rows, vectorized::Block* block, bool eos); Status channel_add_rows_with_idx(RuntimeState* state, std::vector>& channels, - int num_channels, + size_t num_channels, std::vector>& channel2rows, vectorized::Block* block, bool eos); RuntimeState* _state = nullptr; diff --git a/be/src/pipeline/exec/exchange_source_operator.cpp b/be/src/pipeline/exec/exchange_source_operator.cpp index 844e6decd646a9c..eafefa2e4c06bbe 100644 --- a/be/src/pipeline/exec/exchange_source_operator.cpp +++ b/be/src/pipeline/exec/exchange_source_operator.cpp @@ -78,6 +78,10 @@ Status ExchangeLocalState::init(RuntimeState* state, LocalStateInfo& info) { TUnit ::TIME_NS, timer_name, 1); } + get_data_from_recvr_timer = ADD_TIMER(_runtime_profile, "GetDataFromRecvrTime"); + filter_timer = ADD_TIMER(_runtime_profile, "FilterTime"); + create_merger_timer = ADD_TIMER(_runtime_profile, "CreateMergerTime"); + return Status::OK(); } @@ -105,7 +109,9 @@ ExchangeSourceOperatorX::ExchangeSourceOperatorX(ObjectPool* pool, const TPlanNo std::vector(tnode.nullable_tuples.begin(), tnode.nullable_tuples.begin() + tnode.exchange_node.input_row_tuples.size())), - _offset(tnode.exchange_node.__isset.offset ? tnode.exchange_node.offset : 0) {} + _offset(tnode.exchange_node.__isset.offset ? tnode.exchange_node.offset : 0) { + _is_serial_operator = tnode.__isset.is_serial_operator && tnode.is_serial_operator; +} Status ExchangeSourceOperatorX::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(OperatorX::init(tnode, state)); @@ -142,15 +148,22 @@ Status ExchangeSourceOperatorX::get_block(RuntimeState* state, vectorized::Block }); SCOPED_TIMER(local_state.exec_time_counter()); if (_is_merging && !local_state.is_ready) { + SCOPED_TIMER(local_state.create_merger_timer); RETURN_IF_ERROR(local_state.stream_recvr->create_merger( local_state.vsort_exec_exprs.lhs_ordering_expr_ctxs(), _is_asc_order, _nulls_first, state->batch_size(), _limit, _offset)); local_state.is_ready = true; return Status::OK(); } - auto status = local_state.stream_recvr->get_next(block, eos); - RETURN_IF_ERROR(doris::vectorized::VExprContext::filter_block(local_state.conjuncts(), block, - block->columns())); + { + SCOPED_TIMER(local_state.get_data_from_recvr_timer); + RETURN_IF_ERROR(local_state.stream_recvr->get_next(block, eos)); + } + { + SCOPED_TIMER(local_state.filter_timer); + RETURN_IF_ERROR(doris::vectorized::VExprContext::filter_block(local_state.conjuncts(), + block, block->columns())); + } // In vsortrunmerger, it will set eos=true, and block not empty // so that eos==true, could not make sure that block not have valid data if (!*eos || block->rows() > 0) { @@ -174,7 +187,7 @@ Status ExchangeSourceOperatorX::get_block(RuntimeState* state, vectorized::Block local_state.set_num_rows_returned(_limit); } } - return status; + return Status::OK(); } Status ExchangeLocalState::close(RuntimeState* state) { diff --git a/be/src/pipeline/exec/exchange_source_operator.h b/be/src/pipeline/exec/exchange_source_operator.h index 0fe3dcbb590b7d6..f938f5007d16430 100644 --- a/be/src/pipeline/exec/exchange_source_operator.h +++ b/be/src/pipeline/exec/exchange_source_operator.h @@ -59,6 +59,9 @@ class ExchangeLocalState final : public PipelineXLocalState<> { std::vector> deps; std::vector metrics; + RuntimeProfile::Counter* get_data_from_recvr_timer = nullptr; + RuntimeProfile::Counter* filter_timer = nullptr; + RuntimeProfile::Counter* create_merger_timer = nullptr; }; class ExchangeSourceOperatorX final : public OperatorX { @@ -81,7 +84,7 @@ class ExchangeSourceOperatorX final : public OperatorX { [[nodiscard]] bool is_merging() const { return _is_merging; } DataDistribution required_data_distribution() const override { - if (OperatorX::ignore_data_distribution()) { + if (OperatorX::is_serial_operator()) { return {ExchangeType::NOOP}; } return _partition_type == TPartitionType::HASH_PARTITIONED diff --git a/be/src/pipeline/exec/group_commit_block_sink_operator.cpp b/be/src/pipeline/exec/group_commit_block_sink_operator.cpp index e0171b41ab1ee8b..9f99d55d3ea9893 100644 --- a/be/src/pipeline/exec/group_commit_block_sink_operator.cpp +++ b/be/src/pipeline/exec/group_commit_block_sink_operator.cpp @@ -64,6 +64,7 @@ Status GroupCommitBlockSinkLocalState::open(RuntimeState* state) { } Status GroupCommitBlockSinkLocalState::_initialize_load_queue() { + SCOPED_TIMER(_init_load_queue_timer); auto& p = _parent->cast(); if (_state->exec_env()->wal_mgr()->is_running()) { RETURN_IF_ERROR(_state->exec_env()->group_commit_mgr()->get_first_block_load_queue( @@ -238,6 +239,17 @@ Status GroupCommitBlockSinkLocalState::_add_blocks(RuntimeState* state, return Status::OK(); } +Status GroupCommitBlockSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) { + RETURN_IF_ERROR(Base::init(state, info)); + SCOPED_TIMER(exec_time_counter()); + SCOPED_TIMER(_init_timer); + _init_load_queue_timer = ADD_TIMER(_profile, "InitLoadQueueTime"); + _valid_and_convert_block_timer = ADD_TIMER(_profile, "ValidAndConvertBlockTime"); + _find_partition_timer = ADD_TIMER(_profile, "FindPartitionTime"); + _append_blocks_timer = ADD_TIMER(_profile, "AppendBlocksTime"); + return Status::OK(); +} + Status GroupCommitBlockSinkOperatorX::init(const TDataSink& t_sink) { RETURN_IF_ERROR(Base::init(t_sink)); DCHECK(t_sink.__isset.olap_table_sink); @@ -318,10 +330,15 @@ Status GroupCommitBlockSinkOperatorX::sink(RuntimeState* state, vectorized::Bloc std::shared_ptr block; bool has_filtered_rows = false; - RETURN_IF_ERROR(local_state._block_convertor->validate_and_convert_block( - state, input_block, block, local_state._output_vexpr_ctxs, rows, has_filtered_rows)); + { + SCOPED_TIMER(local_state._valid_and_convert_block_timer); + RETURN_IF_ERROR(local_state._block_convertor->validate_and_convert_block( + state, input_block, block, local_state._output_vexpr_ctxs, rows, + has_filtered_rows)); + } local_state._has_filtered_rows = false; if (!local_state._vpartition->is_auto_partition()) { + SCOPED_TIMER(local_state._find_partition_timer); //reuse vars for find_partition local_state._partitions.assign(rows, nullptr); local_state._filter_bitmap.Reset(rows); @@ -351,23 +368,26 @@ Status GroupCommitBlockSinkOperatorX::sink(RuntimeState* state, vectorized::Bloc } } } - - if (local_state._block_convertor->num_filtered_rows() > 0 || local_state._has_filtered_rows) { - auto cloneBlock = block->clone_without_columns(); - auto res_block = vectorized::MutableBlock::build_mutable_block(&cloneBlock); - for (int i = 0; i < rows; ++i) { - if (local_state._block_convertor->filter_map()[i]) { - continue; - } - if (local_state._filter_bitmap.Get(i)) { - continue; + { + SCOPED_TIMER(local_state._append_blocks_timer); + if (local_state._block_convertor->num_filtered_rows() > 0 || + local_state._has_filtered_rows) { + auto cloneBlock = block->clone_without_columns(); + auto res_block = vectorized::MutableBlock::build_mutable_block(&cloneBlock); + for (int i = 0; i < rows; ++i) { + if (local_state._block_convertor->filter_map()[i]) { + continue; + } + if (local_state._filter_bitmap.Get(i)) { + continue; + } + res_block.add_row(block.get(), i); } - res_block.add_row(block.get(), i); + block->swap(res_block.to_block()); } - block->swap(res_block.to_block()); + // add block into block queue + RETURN_IF_ERROR(local_state._add_block(state, block)); } - // add block into block queue - RETURN_IF_ERROR(local_state._add_block(state, block)); return wind_up(); } diff --git a/be/src/pipeline/exec/group_commit_block_sink_operator.h b/be/src/pipeline/exec/group_commit_block_sink_operator.h index 32ca0613652ae47..e469aee8df595c0 100644 --- a/be/src/pipeline/exec/group_commit_block_sink_operator.h +++ b/be/src/pipeline/exec/group_commit_block_sink_operator.h @@ -42,8 +42,8 @@ class GroupCommitBlockSinkLocalState final : public PipelineXSinkLocalState dependencies() const override { @@ -79,6 +79,11 @@ class GroupCommitBlockSinkLocalState final : public PipelineXSinkLocalState _finish_dependency; std::shared_ptr _create_plan_dependency = nullptr; std::shared_ptr _put_block_dependency = nullptr; + + RuntimeProfile::Counter* _init_load_queue_timer = nullptr; + RuntimeProfile::Counter* _valid_and_convert_block_timer = nullptr; + RuntimeProfile::Counter* _find_partition_timer = nullptr; + RuntimeProfile::Counter* _append_blocks_timer = nullptr; }; class GroupCommitBlockSinkOperatorX final diff --git a/be/src/pipeline/exec/group_commit_scan_operator.cpp b/be/src/pipeline/exec/group_commit_scan_operator.cpp index 9577639813a7604..141a5e7bf770c56 100644 --- a/be/src/pipeline/exec/group_commit_scan_operator.cpp +++ b/be/src/pipeline/exec/group_commit_scan_operator.cpp @@ -31,6 +31,7 @@ GroupCommitOperatorX::GroupCommitOperatorX(ObjectPool* pool, const TPlanNode& tn Status GroupCommitOperatorX::get_block(RuntimeState* state, vectorized::Block* block, bool* eos) { auto& local_state = get_local_state(state); + SCOPED_TIMER(local_state.exec_time_counter()); bool find_node = false; while (!find_node && !*eos) { RETURN_IF_ERROR(local_state.load_block_queue->get_block(state, block, &find_node, eos, diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index 5ead4ba13a389c1..37de9ac93d839f5 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -43,7 +43,7 @@ Status HashJoinBuildSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo _shared_state->join_op_variants = p._join_op_variants; _shared_state->is_null_safe_eq_join = p._is_null_safe_eq_join; - _shared_state->store_null_in_hash_table = p._store_null_in_hash_table; + _shared_state->serialize_null_into_key = p._serialize_null_into_key; _build_expr_ctxs.resize(p._build_expr_ctxs.size()); for (size_t i = 0; i < _build_expr_ctxs.size(); i++) { RETURN_IF_ERROR(p._build_expr_ctxs[i]->clone(state, _build_expr_ctxs[i])); @@ -51,19 +51,19 @@ Status HashJoinBuildSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo _shared_state->build_exprs_size = _build_expr_ctxs.size(); _should_build_hash_table = true; + profile()->add_info_string("BroadcastJoin", std::to_string(p._is_broadcast_join)); if (p._is_broadcast_join) { - profile()->add_info_string("BroadcastJoin", "true"); if (state->enable_share_hash_table_for_broadcast_join()) { _should_build_hash_table = info.task_idx == 0; if (_should_build_hash_table) { - profile()->add_info_string("ShareHashTableEnabled", "true"); p._shared_hashtable_controller->set_builder_and_consumers( state->fragment_instance_id(), p.node_id()); } - } else { - profile()->add_info_string("ShareHashTableEnabled", "false"); } } + profile()->add_info_string("BuildShareHashTable", std::to_string(_should_build_hash_table)); + profile()->add_info_string("ShareHashTableEnabled", + std::to_string(state->enable_share_hash_table_for_broadcast_join())); if (!_should_build_hash_table) { _dependency->block(); _finish_dependency->block(); @@ -72,6 +72,7 @@ Status HashJoinBuildSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo _finish_dependency->shared_from_this()); } + _runtime_filter_init_timer = ADD_TIMER(profile(), "RuntimeFilterInitTime"); _build_blocks_memory_usage = ADD_COUNTER_WITH_LEVEL(profile(), "MemoryUsageBuildBlocks", TUnit::BYTES, 1); _hash_table_memory_usage = @@ -81,13 +82,10 @@ Status HashJoinBuildSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo // Build phase auto* record_profile = _should_build_hash_table ? profile() : faker_runtime_profile(); - _build_table_timer = ADD_TIMER(profile(), "BuildTableTime"); - _build_side_merge_block_timer = ADD_TIMER(profile(), "BuildSideMergeBlockTime"); + _build_table_timer = ADD_TIMER(profile(), "BuildHashTableTime"); + _build_side_merge_block_timer = ADD_TIMER(profile(), "MergeBuildBlockTime"); _build_table_insert_timer = ADD_TIMER(record_profile, "BuildTableInsertTime"); _build_expr_call_timer = ADD_TIMER(record_profile, "BuildExprCallTime"); - _build_side_compute_hash_timer = ADD_TIMER(record_profile, "BuildSideHashComputingTime"); - - _allocate_resource_timer = ADD_TIMER(profile(), "AllocateResourceTime"); // Hash Table Init RETURN_IF_ERROR(_hash_table_init(state)); @@ -227,33 +225,22 @@ Status HashJoinBuildSinkLocalState::_extract_join_column( vectorized::Block& block, vectorized::ColumnUInt8::MutablePtr& null_map, vectorized::ColumnRawPtrs& raw_ptrs, const std::vector& res_col_ids) { auto& shared_state = *_shared_state; - auto& p = _parent->cast(); for (size_t i = 0; i < shared_state.build_exprs_size; ++i) { - if (p._should_convert_to_nullable[i]) { + const auto* column = block.get_by_position(res_col_ids[i]).column.get(); + if (!column->is_nullable() && shared_state.serialize_null_into_key[i]) { _key_columns_holder.emplace_back( vectorized::make_nullable(block.get_by_position(res_col_ids[i]).column)); raw_ptrs[i] = _key_columns_holder.back().get(); - continue; - } - - if (shared_state.is_null_safe_eq_join[i]) { - raw_ptrs[i] = block.get_by_position(res_col_ids[i]).column.get(); + } else if (const auto* nullable = check_and_get_column(*column); + !shared_state.serialize_null_into_key[i] && nullable) { + // update nulllmap and split nested out of ColumnNullable when serialize_null_into_key is false and column is nullable + const auto& col_nested = nullable->get_nested_column(); + const auto& col_nullmap = nullable->get_null_map_data(); + DCHECK(null_map != nullptr); + vectorized::VectorizedUtils::update_null_map(null_map->get_data(), col_nullmap); + raw_ptrs[i] = &col_nested; } else { - const auto* column = block.get_by_position(res_col_ids[i]).column.get(); - if (const auto* nullable = check_and_get_column(*column)) { - const auto& col_nested = nullable->get_nested_column(); - const auto& col_nullmap = nullable->get_null_map_data(); - - if (shared_state.store_null_in_hash_table[i]) { - raw_ptrs[i] = nullable; - } else { - DCHECK(null_map != nullptr); - vectorized::VectorizedUtils::update_null_map(null_map->get_data(), col_nullmap); - raw_ptrs[i] = &col_nested; - } - } else { - raw_ptrs[i] = column; - } + raw_ptrs[i] = column; } } return Status::OK(); @@ -267,7 +254,6 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, if (UNLIKELY(rows == 0)) { return Status::OK(); } - COUNTER_UPDATE(_build_rows_counter, rows); block.replace_if_overflow(); vectorized::ColumnRawPtrs raw_ptrs(_build_expr_ctxs.size()); @@ -284,13 +270,9 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, .data()[0] = 1; } } - // TODO: Now we are not sure whether a column is nullable only by ExecNode's `row_desc` - // so we have to initialize this flag by the first build block. - if (!_has_set_need_null_map_for_build) { - _has_set_need_null_map_for_build = true; - _set_build_ignore_flag(block, _build_col_ids); - } - if (p._short_circuit_for_null_in_build_side || _build_side_ignore_null) { + + _set_build_side_has_external_nullmap(block, _build_col_ids); + if (_build_side_has_external_nullmap) { null_map_val = vectorized::ColumnUInt8::create(); null_map_val->get_data().assign(rows, (uint8_t)0); } @@ -300,27 +282,23 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, st = std::visit( vectorized::Overload { - [&](std::monostate& arg, auto join_op, auto has_null_value, + [&](std::monostate& arg, auto join_op, auto short_circuit_for_null_in_build_side, auto with_other_conjuncts) -> Status { LOG(FATAL) << "FATAL: uninited hash table"; __builtin_unreachable(); return Status::OK(); }, - [&](auto&& arg, auto&& join_op, auto has_null_value, - auto short_circuit_for_null_in_build_side, + [&](auto&& arg, auto&& join_op, auto short_circuit_for_null_in_build_side, auto with_other_conjuncts) -> Status { using HashTableCtxType = std::decay_t; using JoinOpType = std::decay_t; ProcessHashTableBuild hash_table_build_process( rows, raw_ptrs, this, state->batch_size(), state); auto st = hash_table_build_process.template run< - JoinOpType::value, has_null_value, - short_circuit_for_null_in_build_side, with_other_conjuncts>( - arg, - has_null_value || short_circuit_for_null_in_build_side - ? &null_map_val->get_data() - : nullptr, + JoinOpType::value, short_circuit_for_null_in_build_side, + with_other_conjuncts>( + arg, null_map_val ? &null_map_val->get_data() : nullptr, &_shared_state->_has_null_in_build_side); COUNTER_SET(_memory_used_counter, _build_blocks_memory_usage->value() + @@ -330,22 +308,24 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, return st; }}, _shared_state->hash_table_variants->method_variant, _shared_state->join_op_variants, - vectorized::make_bool_variant(_build_side_ignore_null), vectorized::make_bool_variant(p._short_circuit_for_null_in_build_side), vectorized::make_bool_variant((p._have_other_join_conjunct))); return st; } -void HashJoinBuildSinkLocalState::_set_build_ignore_flag(vectorized::Block& block, - const std::vector& res_col_ids) { +void HashJoinBuildSinkLocalState::_set_build_side_has_external_nullmap( + vectorized::Block& block, const std::vector& res_col_ids) { auto& p = _parent->cast(); + if (p._short_circuit_for_null_in_build_side) { + _build_side_has_external_nullmap = true; + return; + } for (size_t i = 0; i < _build_expr_ctxs.size(); ++i) { - if (!_shared_state->is_null_safe_eq_join[i] && !p._short_circuit_for_null_in_build_side) { - const auto* column = block.get_by_position(res_col_ids[i]).column.get(); - if (check_and_get_column(*column)) { - _build_side_ignore_null |= !_shared_state->store_null_in_hash_table[i]; - } + const auto* column = block.get_by_position(res_col_ids[i]).column.get(); + if (column->is_nullable() && !_shared_state->serialize_null_into_key[i]) { + _build_side_has_external_nullmap = true; + return; } } } @@ -359,7 +339,7 @@ Status HashJoinBuildSinkLocalState::_hash_table_init(RuntimeState* state) { /// For 'null safe equal' join, /// the build key column maybe be converted to nullable from non-nullable. - if (p._should_convert_to_nullable[i]) { + if (p._serialize_null_into_key[i]) { data_type = vectorized::make_nullable(data_type); } data_types.emplace_back(std::move(data_type)); @@ -393,10 +373,6 @@ Status HashJoinBuildSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* st _hash_output_slot_ids = tnode.hash_join_node.hash_output_slot_ids; } - const bool build_stores_null = _join_op == TJoinOp::RIGHT_OUTER_JOIN || - _join_op == TJoinOp::FULL_OUTER_JOIN || - _join_op == TJoinOp::RIGHT_ANTI_JOIN; - const std::vector& eq_join_conjuncts = tnode.hash_join_node.eq_join_conjuncts; for (const auto& eq_join_conjunct : eq_join_conjuncts) { vectorized::VExprContextSPtr build_ctx; @@ -430,16 +406,18 @@ Status HashJoinBuildSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* st (eq_join_conjunct.right.nodes[0].is_nullable || eq_join_conjunct.left.nodes[0].is_nullable); - const bool should_convert_to_nullable = is_null_safe_equal && - !eq_join_conjunct.right.nodes[0].is_nullable && - eq_join_conjunct.left.nodes[0].is_nullable; _is_null_safe_eq_join.push_back(is_null_safe_equal); - _should_convert_to_nullable.emplace_back(should_convert_to_nullable); - // if is null aware, build join column and probe join column both need dispose null value - _store_null_in_hash_table.emplace_back( - is_null_safe_equal || - (_build_expr_ctxs.back()->root()->is_nullable() && build_stores_null)); + if (eq_join_conjuncts.size() == 1) { + // single column key serialize method must use nullmap for represent null to instead serialize null into key + _serialize_null_into_key.emplace_back(false); + } else if (is_null_safe_equal) { + // use serialize null into key to represent multi column null value + _serialize_null_into_key.emplace_back(true); + } else { + // on normal conditions, because null!=null, it can be expressed directly with nullmap. + _serialize_null_into_key.emplace_back(false); + } } return Status::OK(); diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h b/be/src/pipeline/exec/hashjoin_build_sink.h index 69aa6843b84ecb4..45aa1e8c8a262dc 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.h +++ b/be/src/pipeline/exec/hashjoin_build_sink.h @@ -56,7 +56,8 @@ class HashJoinBuildSinkLocalState final protected: Status _hash_table_init(RuntimeState* state); - void _set_build_ignore_flag(vectorized::Block& block, const std::vector& res_col_ids); + void _set_build_side_has_external_nullmap(vectorized::Block& block, + const std::vector& res_col_ids); Status _do_evaluate(vectorized::Block& block, vectorized::VExprContextSPtrs& exprs, RuntimeProfile::Counter& expr_call_timer, std::vector& res_col_ids); std::vector _convert_block_to_null(vectorized::Block& block); @@ -79,7 +80,6 @@ class HashJoinBuildSinkLocalState final vectorized::MutableBlock _build_side_mutable_block; std::shared_ptr _runtime_filter_slots; - bool _has_set_need_null_map_for_build = false; /* * The comparison result of a null value with any other value is null, @@ -87,21 +87,19 @@ class HashJoinBuildSinkLocalState final * the result of an equality condition involving null should be false, * so null does not need to be added to the hash table. */ - bool _build_side_ignore_null = false; + bool _build_side_has_external_nullmap = false; std::vector _build_col_ids; std::shared_ptr _finish_dependency; RuntimeProfile::Counter* _build_table_timer = nullptr; RuntimeProfile::Counter* _build_expr_call_timer = nullptr; RuntimeProfile::Counter* _build_table_insert_timer = nullptr; - RuntimeProfile::Counter* _build_side_compute_hash_timer = nullptr; RuntimeProfile::Counter* _build_side_merge_block_timer = nullptr; - RuntimeProfile::Counter* _allocate_resource_timer = nullptr; - RuntimeProfile::Counter* _build_blocks_memory_usage = nullptr; RuntimeProfile::Counter* _hash_table_memory_usage = nullptr; RuntimeProfile::Counter* _build_arena_memory_usage = nullptr; + RuntimeProfile::Counter* _runtime_filter_init_timer = nullptr; }; class HashJoinBuildSinkOperatorX final @@ -130,8 +128,8 @@ class HashJoinBuildSinkOperatorX final if (_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { return {ExchangeType::NOOP}; } else if (_is_broadcast_join) { - return _child->ignore_data_distribution() ? DataDistribution(ExchangeType::PASS_TO_ONE) - : DataDistribution(ExchangeType::NOOP); + return _child->is_serial_operator() ? DataDistribution(ExchangeType::PASS_TO_ONE) + : DataDistribution(ExchangeType::NOOP); } return _join_distribution == TJoinDistributionType::BUCKET_SHUFFLE || _join_distribution == TJoinDistributionType::COLOCATE @@ -139,9 +137,6 @@ class HashJoinBuildSinkOperatorX final : DataDistribution(ExchangeType::HASH_SHUFFLE, _partition_exprs); } - bool require_shuffled_data_distribution() const override { - return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && !_is_broadcast_join; - } bool is_shuffled_operator() const override { return _join_distribution == TJoinDistributionType::PARTITIONED; } @@ -157,13 +152,11 @@ class HashJoinBuildSinkOperatorX final // build expr vectorized::VExprContextSPtrs _build_expr_ctxs; // mark the build hash table whether it needs to store null value - std::vector _store_null_in_hash_table; + std::vector _serialize_null_into_key; // mark the join column whether support null eq std::vector _is_null_safe_eq_join; - std::vector _should_convert_to_nullable; - bool _is_broadcast_join = false; std::shared_ptr _shared_hashtable_controller; @@ -187,12 +180,12 @@ struct ProcessHashTableBuild { _batch_size(batch_size), _state(state) {} - template + template Status run(HashTableContext& hash_table_ctx, vectorized::ConstNullMapPtr null_map, bool* has_null_key) { - if (short_circuit_for_null || ignore_null) { + if (null_map) { // first row is mocked and is null + // TODO: Need to test the for loop. break may better for (uint32_t i = 1; i < _rows; i++) { if ((*null_map)[i]) { *has_null_key = true; @@ -210,8 +203,21 @@ struct ProcessHashTableBuild { hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows, null_map ? null_map->data() : nullptr, true, true, hash_table_ctx.hash_table->get_bucket_size()); - hash_table_ctx.hash_table->template build( - hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(), _rows); + // only 2 cases need to access the null value in hash table + bool keep_null_key = false; + if ((JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || + JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) && + with_other_conjuncts) { + //null aware join with other conjuncts + keep_null_key = true; + } else if (_parent->_shared_state->is_null_safe_eq_join.size() == 1 && + _parent->_shared_state->is_null_safe_eq_join[0]) { + // single null safe eq + keep_null_key = true; + } + + hash_table_ctx.hash_table->build(hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(), + _rows, keep_null_key); hash_table_ctx.bucket_nums.resize(_batch_size); hash_table_ctx.bucket_nums.shrink_to_fit(); @@ -231,4 +237,4 @@ struct ProcessHashTableBuild { }; } // namespace doris::pipeline -#include "common/compile_check_end.h" \ No newline at end of file +#include "common/compile_check_end.h" diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp b/be/src/pipeline/exec/hashjoin_probe_operator.cpp index 8ee041f57592caa..426bfcb219dc042 100644 --- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp +++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp @@ -19,6 +19,7 @@ #include +#include "common/cast_set.h" #include "common/logging.h" #include "pipeline/exec/operator.h" #include "runtime/descriptors.h" @@ -56,13 +57,11 @@ Status HashJoinProbeLocalState::init(RuntimeState* state, LocalStateInfo& info) _probe_arena_memory_usage = profile()->AddHighWaterMarkCounter("MemoryUsageProbeKeyArena", TUnit::BYTES, "", 1); // Probe phase - _probe_next_timer = ADD_TIMER(profile(), "ProbeFindNextTime"); _probe_expr_call_timer = ADD_TIMER(profile(), "ProbeExprCallTime"); _search_hashtable_timer = ADD_TIMER(profile(), "ProbeWhenSearchHashTableTime"); _build_side_output_timer = ADD_TIMER(profile(), "ProbeWhenBuildSideOutputTime"); _probe_side_output_timer = ADD_TIMER(profile(), "ProbeWhenProbeSideOutputTime"); - _probe_process_hashtable_timer = ADD_TIMER(profile(), "ProbeWhenProcessHashTableTime"); - _process_other_join_conjunct_timer = ADD_TIMER(profile(), "OtherJoinConjunctTime"); + _non_equal_join_conjuncts_timer = ADD_TIMER(profile(), "NonEqualJoinConjunctEvaluationTime"); _init_probe_side_timer = ADD_TIMER(profile(), "InitProbeSideTime"); return Status::OK(); } @@ -153,11 +152,9 @@ Status HashJoinProbeLocalState::close(RuntimeState* state) { bool HashJoinProbeLocalState::_need_probe_null_map(vectorized::Block& block, const std::vector& res_col_ids) { for (size_t i = 0; i < _probe_expr_ctxs.size(); ++i) { - if (!_shared_state->is_null_safe_eq_join[i]) { - auto column = block.get_by_position(res_col_ids[i]).column.get(); - if (check_and_get_column(*column)) { - return true; - } + const auto* column = block.get_by_position(res_col_ids[i]).column.get(); + if (column->is_nullable() && !_shared_state->serialize_null_into_key[i]) { + return true; } } return false; @@ -230,7 +227,6 @@ HashJoinProbeOperatorX::HashJoinProbeOperatorX(ObjectPool* pool, const TPlanNode Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Block* output_block, bool* eos) const { auto& local_state = get_local_state(state); - SCOPED_TIMER(local_state._probe_timer); if (local_state._shared_state->short_circuit_for_probe) { // If we use a short-circuit strategy, should return empty block directly. *eos = true; @@ -289,21 +285,19 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc if (local_state._probe_index < local_state._probe_block.rows()) { DCHECK(local_state._has_set_need_null_map_for_probe); std::visit( - [&](auto&& arg, auto&& process_hashtable_ctx, auto need_null_map_for_probe, - auto ignore_null) { + [&](auto&& arg, auto&& process_hashtable_ctx, auto need_judge_null) { using HashTableProbeType = std::decay_t; if constexpr (!std::is_same_v) { using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { - st = process_hashtable_ctx - .template process( - arg, - need_null_map_for_probe - ? &local_state._null_map_column->get_data() - : nullptr, - mutable_join_block, &temp_block, - local_state._probe_block.rows(), _is_mark_join, - _have_other_join_conjunct); + st = process_hashtable_ctx.template process( + arg, + local_state._null_map_column + ? &local_state._null_map_column->get_data() + : nullptr, + mutable_join_block, &temp_block, + cast_set(local_state._probe_block.rows()), + _is_mark_join, _have_other_join_conjunct); } else { st = Status::InternalError("uninited hash table"); } @@ -313,8 +307,8 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc }, local_state._shared_state->hash_table_variants->method_variant, *local_state._process_hashtable_ctx_variants, - vectorized::make_bool_variant(local_state._need_null_map_for_probe), - vectorized::make_bool_variant(local_state._shared_state->probe_ignore_null)); + vectorized::make_bool_variant(local_state._need_null_map_for_probe && + local_state._shared_state->probe_ignore_null)); } else if (local_state._probe_eos) { if (_is_right_semi_anti || (_is_outer_join && _join_op != TJoinOp::LEFT_OUTER_JOIN)) { std::visit( @@ -323,7 +317,7 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc if constexpr (!std::is_same_v) { using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { - st = process_hashtable_ctx.process_data_in_hashtable( + st = process_hashtable_ctx.finish_probing( arg, mutable_join_block, &temp_block, eos, _is_mark_join); } else { st = Status::InternalError("uninited hash table"); @@ -382,34 +376,22 @@ Status HashJoinProbeLocalState::_extract_join_column(vectorized::Block& block, } auto& shared_state = *_shared_state; - auto& p = _parent->cast(); for (size_t i = 0; i < shared_state.build_exprs_size; ++i) { - if (p._should_convert_to_nullable[i]) { + const auto* column = block.get_by_position(res_col_ids[i]).column.get(); + if (!column->is_nullable() && shared_state.serialize_null_into_key[i]) { _key_columns_holder.emplace_back( vectorized::make_nullable(block.get_by_position(res_col_ids[i]).column)); _probe_columns[i] = _key_columns_holder.back().get(); - continue; - } - - if (shared_state.is_null_safe_eq_join[i]) { - _probe_columns[i] = block.get_by_position(res_col_ids[i]).column.get(); + } else if (const auto* nullable = check_and_get_column(*column); + nullable && !shared_state.serialize_null_into_key[i]) { + // update nulllmap and split nested out of ColumnNullable when serialize_null_into_key is false and column is nullable + const auto& col_nested = nullable->get_nested_column(); + const auto& col_nullmap = nullable->get_null_map_data(); + DCHECK(_null_map_column != nullptr); + vectorized::VectorizedUtils::update_null_map(_null_map_column->get_data(), col_nullmap); + _probe_columns[i] = &col_nested; } else { - const auto* column = block.get_by_position(res_col_ids[i]).column.get(); - if (const auto* nullable = check_and_get_column(*column)) { - const auto& col_nested = nullable->get_nested_column(); - const auto& col_nullmap = nullable->get_null_map_data(); - - DCHECK(_null_map_column != nullptr); - vectorized::VectorizedUtils::update_null_map(_null_map_column->get_data(), - col_nullmap); - if (shared_state.store_null_in_hash_table[i]) { - _probe_columns[i] = nullable; - } else { - _probe_columns[i] = &col_nested; - } - } else { - _probe_columns[i] = column; - } + _probe_columns[i] = column; } } return Status::OK(); @@ -530,20 +512,6 @@ Status HashJoinProbeOperatorX::init(const TPlanNode& tnode, RuntimeState* state) null_aware || (_probe_expr_ctxs.back()->root()->is_nullable() && probe_dispose_null); conjuncts_index++; - const bool is_null_safe_equal = eq_join_conjunct.__isset.opcode && - (eq_join_conjunct.opcode == TExprOpcode::EQ_FOR_NULL) && - (eq_join_conjunct.right.nodes[0].is_nullable || - eq_join_conjunct.left.nodes[0].is_nullable); - - /// If it's right anti join, - /// we should convert the probe to nullable if the build side is nullable. - /// And if it is 'null safe equal', - /// we must make sure the build side and the probe side are both nullable or non-nullable. - const bool should_convert_to_nullable = - (is_null_safe_equal || _join_op == TJoinOp::RIGHT_ANTI_JOIN) && - !eq_join_conjunct.left.nodes[0].is_nullable && - eq_join_conjunct.right.nodes[0].is_nullable; - _should_convert_to_nullable.emplace_back(should_convert_to_nullable); } for (size_t i = 0; i < _probe_expr_ctxs.size(); ++i) { _probe_ignore_null |= !probe_not_ignore_null[i]; diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h b/be/src/pipeline/exec/hashjoin_probe_operator.h index 917c2692b44d610..1bdb9d13347d09e 100644 --- a/be/src/pipeline/exec/hashjoin_probe_operator.h +++ b/be/src/pipeline/exec/hashjoin_probe_operator.h @@ -117,14 +117,12 @@ class HashJoinProbeLocalState final std::make_unique(); RuntimeProfile::Counter* _probe_expr_call_timer = nullptr; - RuntimeProfile::Counter* _probe_next_timer = nullptr; RuntimeProfile::Counter* _probe_side_output_timer = nullptr; - RuntimeProfile::Counter* _probe_process_hashtable_timer = nullptr; RuntimeProfile::HighWaterMarkCounter* _probe_arena_memory_usage = nullptr; RuntimeProfile::Counter* _search_hashtable_timer = nullptr; RuntimeProfile::Counter* _init_probe_side_timer = nullptr; RuntimeProfile::Counter* _build_side_output_timer = nullptr; - RuntimeProfile::Counter* _process_other_join_conjunct_timer = nullptr; + RuntimeProfile::Counter* _non_equal_join_conjuncts_timer = nullptr; }; class HashJoinProbeOperatorX final : public JoinProbeOperatorX { @@ -152,9 +150,6 @@ class HashJoinProbeOperatorX final : public JoinProbeOperatorX _should_convert_to_nullable; - vectorized::DataTypes _right_table_data_types; vectorized::DataTypes _left_table_data_types; std::vector _hash_output_slot_ids; diff --git a/be/src/pipeline/exec/jdbc_table_sink_operator.cpp b/be/src/pipeline/exec/jdbc_table_sink_operator.cpp index 10fd0d8e40bf253..29c881d1c281000 100644 --- a/be/src/pipeline/exec/jdbc_table_sink_operator.cpp +++ b/be/src/pipeline/exec/jdbc_table_sink_operator.cpp @@ -47,6 +47,7 @@ Status JdbcTableSinkOperatorX::open(RuntimeState* state) { Status JdbcTableSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); + COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); RETURN_IF_ERROR(local_state.sink(state, block, eos)); return Status::OK(); } diff --git a/be/src/pipeline/exec/join/process_hash_table_probe.h b/be/src/pipeline/exec/join/process_hash_table_probe.h index bf4a4d5763c02ea..14e0edd977f57bc 100644 --- a/be/src/pipeline/exec/join/process_hash_table_probe.h +++ b/be/src/pipeline/exec/join/process_hash_table_probe.h @@ -55,20 +55,20 @@ struct ProcessHashTableProbe { int last_probe_index, bool all_match_one, bool have_other_join_conjunct); - template + template Status process(HashTableType& hash_table_ctx, ConstNullMapPtr null_map, vectorized::MutableBlock& mutable_block, vectorized::Block* output_block, - size_t probe_rows, bool is_mark_join, bool have_other_join_conjunct); + uint32_t probe_rows, bool is_mark_join, bool have_other_join_conjunct); // Only process the join with no other join conjunct, because of no other join conjunt // the output block struct is same with mutable block. we can do more opt on it and simplify // the logic of probe // TODO: opt the visited here to reduce the size of hash table - template + template Status do_process(HashTableType& hash_table_ctx, ConstNullMapPtr null_map, vectorized::MutableBlock& mutable_block, vectorized::Block* output_block, - size_t probe_rows); + uint32_t probe_rows); // In the presence of other join conjunct, the process of join become more complicated. // each matching join column need to be processed by other join conjunct. so the struct of mutable block // and output block may be different @@ -87,13 +87,12 @@ struct ProcessHashTableProbe { // Process full outer join/ right join / right semi/anti join to output the join result // in hash table template - Status process_data_in_hashtable(HashTableType& hash_table_ctx, - vectorized::MutableBlock& mutable_block, - vectorized::Block* output_block, bool* eos, bool is_mark_join); + Status finish_probing(HashTableType& hash_table_ctx, vectorized::MutableBlock& mutable_block, + vectorized::Block* output_block, bool* eos, bool is_mark_join); /// For null aware join with other conjuncts, if the probe key of one row on left side is null, /// we should make this row match with all rows in build side. - size_t _process_probe_null_key(uint32_t probe_idx); + uint32_t _process_probe_null_key(uint32_t probe_idx); pipeline::HashJoinProbeLocalState* _parent = nullptr; const int _batch_size; @@ -136,10 +135,10 @@ struct ProcessHashTableProbe { RuntimeProfile::Counter* _init_probe_side_timer = nullptr; RuntimeProfile::Counter* _build_side_output_timer = nullptr; RuntimeProfile::Counter* _probe_side_output_timer = nullptr; - RuntimeProfile::Counter* _probe_process_hashtable_timer = nullptr; + RuntimeProfile::Counter* _finish_probe_phase_timer = nullptr; - int _right_col_idx; - int _right_col_len; + size_t _right_col_idx; + size_t _right_col_len; }; } // namespace pipeline diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h index 667c7a468d70fc3..05cd3d7d9e0590f 100644 --- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h +++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h @@ -19,6 +19,7 @@ #include +#include "common/cast_set.h" #include "common/status.h" #include "pipeline/exec/hashjoin_probe_operator.h" #include "process_hash_table_probe.h" @@ -29,7 +30,7 @@ #include "vec/exprs/vexpr_context.h" namespace doris::pipeline { - +#include "common/compile_check_begin.h" template ProcessHashTableProbe::ProcessHashTableProbe(HashJoinProbeLocalState* parent, int batch_size) @@ -55,7 +56,7 @@ ProcessHashTableProbe::ProcessHashTableProbe(HashJoinProbeLocalState _init_probe_side_timer(parent->_init_probe_side_timer), _build_side_output_timer(parent->_build_side_output_timer), _probe_side_output_timer(parent->_probe_side_output_timer), - _probe_process_hashtable_timer(parent->_probe_process_hashtable_timer), + _finish_probe_phase_timer(parent->_finish_probe_phase_timer), _right_col_idx((_is_right_semi_anti && !_have_other_join_conjunct) ? 0 : _parent->left_table_data_types().size()), @@ -186,13 +187,13 @@ typename HashTableType::State ProcessHashTableProbe::_init_probe_sid } template -template +template Status ProcessHashTableProbe::do_process(HashTableType& hash_table_ctx, vectorized::ConstNullMapPtr null_map, vectorized::MutableBlock& mutable_block, vectorized::Block* output_block, - size_t probe_rows) { + uint32_t probe_rows) { if (_right_col_len && !_build_block) { return Status::InternalError("build block is nullptr"); } @@ -205,8 +206,8 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c SCOPED_TIMER(_init_probe_side_timer); _init_probe_side( hash_table_ctx, probe_rows, with_other_conjuncts, - need_null_map_for_probe ? null_map->data() : nullptr, - need_null_map_for_probe && ignore_null && + null_map ? null_map->data() : nullptr, + need_judge_null && (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN || JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN || JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || @@ -216,7 +217,7 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c auto& mcol = mutable_block.mutable_columns(); const bool has_mark_join_conjunct = !_parent->_mark_join_conjuncts.empty(); - int current_offset = 0; + uint32_t current_offset = 0; if constexpr ((JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) && with_other_conjuncts) { @@ -254,13 +255,12 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c } } else { SCOPED_TIMER(_search_hashtable_timer); - auto [new_probe_idx, new_build_idx, - new_current_offset] = hash_table_ctx.hash_table->template find_batch < JoinOpType, - with_other_conjuncts, is_mark_join, - need_null_map_for_probe && - ignore_null > (hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(), - probe_index, build_index, probe_rows, _probe_indexs.data(), - _probe_visited, _build_indexs.data(), has_mark_join_conjunct); + auto [new_probe_idx, new_build_idx, new_current_offset] = + hash_table_ctx.hash_table->template find_batch( + hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(), probe_index, + build_index, cast_set(probe_rows), _probe_indexs.data(), + _probe_visited, _build_indexs.data(), has_mark_join_conjunct); probe_index = new_probe_idx; build_index = new_build_idx; current_offset = new_current_offset; @@ -304,12 +304,12 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c } template -size_t ProcessHashTableProbe::_process_probe_null_key(uint32_t probe_index) { +uint32_t ProcessHashTableProbe::_process_probe_null_key(uint32_t probe_index) { const auto rows = _build_block->rows(); DCHECK_LT(_build_index_for_null_probe_key, rows); DCHECK_LT(0, _build_index_for_null_probe_key); - size_t matched_cnt = 0; + uint32_t matched_cnt = 0; for (; _build_index_for_null_probe_key < rows && matched_cnt < _batch_size; ++matched_cnt) { _probe_indexs[matched_cnt] = probe_index; _build_indexs[matched_cnt] = _build_index_for_null_probe_key++; @@ -502,8 +502,8 @@ Status ProcessHashTableProbe::do_other_join_conjuncts(vectorized::Bl return Status::OK(); } - SCOPED_TIMER(_parent->_process_other_join_conjunct_timer); - int orig_columns = output_block->columns(); + SCOPED_TIMER(_parent->_non_equal_join_conjuncts_timer); + size_t orig_columns = output_block->columns(); vectorized::IColumn::Filter other_conjunct_filter(row_count, 1); { bool can_be_filter_all = false; @@ -617,10 +617,11 @@ Status ProcessHashTableProbe::do_other_join_conjuncts(vectorized::Bl template template -Status ProcessHashTableProbe::process_data_in_hashtable( - HashTableType& hash_table_ctx, vectorized::MutableBlock& mutable_block, - vectorized::Block* output_block, bool* eos, bool is_mark_join) { - SCOPED_TIMER(_probe_process_hashtable_timer); +Status ProcessHashTableProbe::finish_probing(HashTableType& hash_table_ctx, + vectorized::MutableBlock& mutable_block, + vectorized::Block* output_block, bool* eos, + bool is_mark_join) { + SCOPED_TIMER(_finish_probe_phase_timer); auto& mcol = mutable_block.mutable_columns(); if (is_mark_join) { std::unique_ptr mark_column = @@ -673,19 +674,19 @@ Status ProcessHashTableProbe::process_data_in_hashtable( } template -template +template Status ProcessHashTableProbe::process(HashTableType& hash_table_ctx, vectorized::ConstNullMapPtr null_map, vectorized::MutableBlock& mutable_block, vectorized::Block* output_block, - size_t probe_rows, bool is_mark_join, + uint32_t probe_rows, bool is_mark_join, bool have_other_join_conjunct) { Status res; std::visit( [&](auto is_mark_join, auto have_other_join_conjunct) { - res = do_process( - hash_table_ctx, null_map, mutable_block, output_block, probe_rows); + res = do_process(hash_table_ctx, null_map, mutable_block, + output_block, probe_rows); }, vectorized::make_bool_variant(is_mark_join), vectorized::make_bool_variant(have_other_join_conjunct)); @@ -701,50 +702,32 @@ struct ExtractType { }; #define INSTANTIATION(JoinOpType, T) \ - template Status \ - ProcessHashTableProbe::process::Type>( \ - ExtractType::Type & hash_table_ctx, vectorized::ConstNullMapPtr null_map, \ - vectorized::MutableBlock & mutable_block, vectorized::Block * output_block, \ - size_t probe_rows, bool is_mark_join, bool have_other_join_conjunct); \ - template Status \ - ProcessHashTableProbe::process::Type>( \ + template Status ProcessHashTableProbe::process::Type>( \ ExtractType::Type & hash_table_ctx, vectorized::ConstNullMapPtr null_map, \ vectorized::MutableBlock & mutable_block, vectorized::Block * output_block, \ - size_t probe_rows, bool is_mark_join, bool have_other_join_conjunct); \ - template Status \ - ProcessHashTableProbe::process::Type>( \ + uint32_t probe_rows, bool is_mark_join, bool have_other_join_conjunct); \ + template Status ProcessHashTableProbe::process::Type>( \ ExtractType::Type & hash_table_ctx, vectorized::ConstNullMapPtr null_map, \ vectorized::MutableBlock & mutable_block, vectorized::Block * output_block, \ - size_t probe_rows, bool is_mark_join, bool have_other_join_conjunct); \ - template Status \ - ProcessHashTableProbe::process::Type>( \ - ExtractType::Type & hash_table_ctx, vectorized::ConstNullMapPtr null_map, \ - vectorized::MutableBlock & mutable_block, vectorized::Block * output_block, \ - size_t probe_rows, bool is_mark_join, bool have_other_join_conjunct); \ - \ - template Status \ - ProcessHashTableProbe::process_data_in_hashtable::Type>( \ + uint32_t probe_rows, bool is_mark_join, bool have_other_join_conjunct); \ + template Status ProcessHashTableProbe::finish_probing::Type>( \ ExtractType::Type & hash_table_ctx, vectorized::MutableBlock & mutable_block, \ vectorized::Block * output_block, bool* eos, bool is_mark_join); -#define INSTANTIATION_FOR(JoinOpType) \ - template struct ProcessHashTableProbe; \ - \ - INSTANTIATION(JoinOpType, (SerializedHashTableContext)); \ - INSTANTIATION(JoinOpType, (I8HashTableContext)); \ - INSTANTIATION(JoinOpType, (I16HashTableContext)); \ - INSTANTIATION(JoinOpType, (I32HashTableContext)); \ - INSTANTIATION(JoinOpType, (I64HashTableContext)); \ - INSTANTIATION(JoinOpType, (I128HashTableContext)); \ - INSTANTIATION(JoinOpType, (I256HashTableContext)); \ - INSTANTIATION(JoinOpType, (I64FixedKeyHashTableContext)); \ - INSTANTIATION(JoinOpType, (I64FixedKeyHashTableContext)); \ - INSTANTIATION(JoinOpType, (I128FixedKeyHashTableContext)); \ - INSTANTIATION(JoinOpType, (I128FixedKeyHashTableContext)); \ - INSTANTIATION(JoinOpType, (I256FixedKeyHashTableContext)); \ - INSTANTIATION(JoinOpType, (I256FixedKeyHashTableContext)); \ - INSTANTIATION(JoinOpType, (I136FixedKeyHashTableContext)); \ - INSTANTIATION(JoinOpType, (MethodOneString)); \ - INSTANTIATION(JoinOpType, (I136FixedKeyHashTableContext)); - +#define INSTANTIATION_FOR(JoinOpType) \ + template struct ProcessHashTableProbe; \ + \ + INSTANTIATION(JoinOpType, (SerializedHashTableContext)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext)); \ + INSTANTIATION(JoinOpType, (PrimaryTypeHashTableContext)); \ + INSTANTIATION(JoinOpType, (FixedKeyHashTableContext)); \ + INSTANTIATION(JoinOpType, (FixedKeyHashTableContext)); \ + INSTANTIATION(JoinOpType, (FixedKeyHashTableContext)); \ + INSTANTIATION(JoinOpType, (FixedKeyHashTableContext)); \ + INSTANTIATION(JoinOpType, (MethodOneString)); +#include "common/compile_check_end.h" } // namespace doris::pipeline diff --git a/be/src/pipeline/exec/join_build_sink_operator.cpp b/be/src/pipeline/exec/join_build_sink_operator.cpp index fc0d3b8746077ba..8b3f5cd98ff7c06 100644 --- a/be/src/pipeline/exec/join_build_sink_operator.cpp +++ b/be/src/pipeline/exec/join_build_sink_operator.cpp @@ -33,15 +33,11 @@ Status JoinBuildSinkLocalState::init(RuntimeState* stat PipelineXSinkLocalState::profile()->add_info_string("JoinType", to_string(p._join_op)); - _build_rows_counter = ADD_COUNTER(PipelineXSinkLocalState::profile(), - "BuildRows", TUnit::UNIT); _publish_runtime_filter_timer = ADD_TIMER(PipelineXSinkLocalState::profile(), "PublishRuntimeFilterTime"); - _runtime_filter_compute_timer = ADD_TIMER(PipelineXSinkLocalState::profile(), - "RuntimeFilterComputeTime"); - _runtime_filter_init_timer = - ADD_TIMER(PipelineXSinkLocalState::profile(), "RuntimeFilterInitTime"); + _runtime_filter_compute_timer = + ADD_TIMER(PipelineXSinkLocalState::profile(), "BuildRuntimeFilterTime"); return Status::OK(); } diff --git a/be/src/pipeline/exec/join_build_sink_operator.h b/be/src/pipeline/exec/join_build_sink_operator.h index 714e0c341906781..9d79a97397ff776 100644 --- a/be/src/pipeline/exec/join_build_sink_operator.h +++ b/be/src/pipeline/exec/join_build_sink_operator.h @@ -39,10 +39,8 @@ class JoinBuildSinkLocalState : public PipelineXSinkLocalState template friend class JoinBuildSinkOperatorX; - RuntimeProfile::Counter* _build_rows_counter = nullptr; RuntimeProfile::Counter* _publish_runtime_filter_timer = nullptr; RuntimeProfile::Counter* _runtime_filter_compute_timer = nullptr; - RuntimeProfile::Counter* _runtime_filter_init_timer = nullptr; std::vector> _runtime_filters; }; diff --git a/be/src/pipeline/exec/join_probe_operator.cpp b/be/src/pipeline/exec/join_probe_operator.cpp index 76dc75a90d8f3c4..11b5b29c8b556b1 100644 --- a/be/src/pipeline/exec/join_probe_operator.cpp +++ b/be/src/pipeline/exec/join_probe_operator.cpp @@ -29,11 +29,10 @@ Status JoinProbeLocalState::init(RuntimeState* state, LocalStateInfo& info) { RETURN_IF_ERROR(Base::init(state, info)); - _probe_timer = ADD_TIMER(Base::profile(), "ProbeTime"); _join_filter_timer = ADD_TIMER(Base::profile(), "JoinFilterTimer"); _build_output_block_timer = ADD_TIMER(Base::profile(), "BuildOutputBlock"); _probe_rows_counter = ADD_COUNTER_WITH_LEVEL(Base::profile(), "ProbeRows", TUnit::UNIT, 1); - + _finish_probe_phase_timer = ADD_TIMER(Base::profile(), "FinishProbePhaseTime"); return Status::OK(); } diff --git a/be/src/pipeline/exec/join_probe_operator.h b/be/src/pipeline/exec/join_probe_operator.h index 3f68c73d04b1612..078806cea4fc5ac 100644 --- a/be/src/pipeline/exec/join_probe_operator.h +++ b/be/src/pipeline/exec/join_probe_operator.h @@ -49,10 +49,10 @@ class JoinProbeLocalState : public PipelineXLocalState { size_t _mark_column_id = -1; - RuntimeProfile::Counter* _probe_timer = nullptr; RuntimeProfile::Counter* _probe_rows_counter = nullptr; RuntimeProfile::Counter* _join_filter_timer = nullptr; RuntimeProfile::Counter* _build_output_block_timer = nullptr; + RuntimeProfile::Counter* _finish_probe_phase_timer = nullptr; std::unique_ptr _child_block = nullptr; bool _child_eos = false; diff --git a/be/src/pipeline/exec/memory_scratch_sink_operator.cpp b/be/src/pipeline/exec/memory_scratch_sink_operator.cpp index 1d022f9304fd0de..2c69c0e2b2ba9fc 100644 --- a/be/src/pipeline/exec/memory_scratch_sink_operator.cpp +++ b/be/src/pipeline/exec/memory_scratch_sink_operator.cpp @@ -33,6 +33,9 @@ Status MemoryScratchSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo RETURN_IF_ERROR(Base::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); + _get_arrow_schema_timer = ADD_TIMER(_profile, "GetArrowSchemaTime"); + _convert_block_to_arrow_batch_timer = ADD_TIMER(_profile, "ConvertBlockToArrowBatchTime"); + _evaluation_timer = ADD_TIMER(_profile, "EvaluationTime"); // create queue state->exec_env()->result_queue_mgr()->create_queue(state->fragment_instance_id(), &_queue); @@ -92,13 +95,22 @@ Status MemoryScratchSinkOperatorX::sink(RuntimeState* state, vectorized::Block* // Exec vectorized expr here to speed up, block.rows() == 0 means expr exec // failed, just return the error status vectorized::Block block; - RETURN_IF_ERROR(vectorized::VExprContext::get_output_block_after_execute_exprs( - local_state._output_vexpr_ctxs, *input_block, &block)); + { + SCOPED_TIMER(local_state._evaluation_timer); + RETURN_IF_ERROR(vectorized::VExprContext::get_output_block_after_execute_exprs( + local_state._output_vexpr_ctxs, *input_block, &block)); + } std::shared_ptr block_arrow_schema; - // After expr executed, use recaculated schema as final schema - RETURN_IF_ERROR(convert_block_arrow_schema(block, &block_arrow_schema, state->timezone())); - RETURN_IF_ERROR(convert_to_arrow_batch(block, block_arrow_schema, arrow::default_memory_pool(), - &result, _timezone_obj)); + { + SCOPED_TIMER(local_state._get_arrow_schema_timer); + // After expr executed, use recaculated schema as final schema + RETURN_IF_ERROR(get_arrow_schema(block, &block_arrow_schema, state->timezone())); + } + { + SCOPED_TIMER(local_state._convert_block_to_arrow_batch_timer); + RETURN_IF_ERROR(convert_to_arrow_batch( + block, block_arrow_schema, arrow::default_memory_pool(), &result, _timezone_obj)); + } local_state._queue->blocking_put(result); if (local_state._queue->size() > config::max_memory_sink_batch_count) { local_state._queue_dependency->block(); diff --git a/be/src/pipeline/exec/memory_scratch_sink_operator.h b/be/src/pipeline/exec/memory_scratch_sink_operator.h index 69c0fa14042ef28..c74659d15b96f29 100644 --- a/be/src/pipeline/exec/memory_scratch_sink_operator.h +++ b/be/src/pipeline/exec/memory_scratch_sink_operator.h @@ -45,6 +45,9 @@ class MemoryScratchSinkLocalState final : public PipelineXSinkLocalState _queue_dependency = nullptr; + RuntimeProfile::Counter* _get_arrow_schema_timer = nullptr; + RuntimeProfile::Counter* _convert_block_to_arrow_batch_timer = nullptr; + RuntimeProfile::Counter* _evaluation_timer = nullptr; }; class MemoryScratchSinkOperatorX final : public DataSinkOperatorX { diff --git a/be/src/pipeline/exec/multi_cast_data_stream_source.cpp b/be/src/pipeline/exec/multi_cast_data_stream_source.cpp index 71204f1285ce7b1..e45e59d17e27b37 100644 --- a/be/src/pipeline/exec/multi_cast_data_stream_source.cpp +++ b/be/src/pipeline/exec/multi_cast_data_stream_source.cpp @@ -40,6 +40,9 @@ Status MultiCastDataStreamSourceLocalState::init(RuntimeState* state, LocalState auto& p = _parent->cast(); _shared_state->multi_cast_data_streamer->set_dep_by_sender_idx(p._consumer_id, _dependency); _wait_for_rf_timer = ADD_TIMER(_runtime_profile, "WaitForRuntimeFilter"); + _filter_timer = ADD_TIMER(_runtime_profile, "FilterTime"); + _get_data_timer = ADD_TIMER(_runtime_profile, "GetDataTime"); + _materialize_data_timer = ADD_TIMER(_runtime_profile, "MaterializeDataTime"); // init profile for runtime filter RuntimeFilterConsumer::_init_profile(profile()); init_runtime_filter_dependency(_filter_dependencies, p.operator_id(), p.node_id(), @@ -86,15 +89,19 @@ Status MultiCastDataStreamerSourceOperatorX::get_block(RuntimeState* state, if (!local_state._output_expr_contexts.empty()) { output_block = &tmp_block; } - RETURN_IF_ERROR(local_state._shared_state->multi_cast_data_streamer->pull(_consumer_id, - output_block, eos)); - + { + SCOPED_TIMER(local_state._get_data_timer); + RETURN_IF_ERROR(local_state._shared_state->multi_cast_data_streamer->pull( + _consumer_id, output_block, eos)); + } if (!local_state._conjuncts.empty()) { + SCOPED_TIMER(local_state._filter_timer); RETURN_IF_ERROR(vectorized::VExprContext::filter_block(local_state._conjuncts, output_block, output_block->columns())); } if (!local_state._output_expr_contexts.empty() && output_block->rows() > 0) { + SCOPED_TIMER(local_state._materialize_data_timer); RETURN_IF_ERROR(vectorized::VExprContext::get_output_block_after_execute_exprs( local_state._output_expr_contexts, *output_block, block, true)); vectorized::materialize_block_inplace(*block); diff --git a/be/src/pipeline/exec/multi_cast_data_stream_source.h b/be/src/pipeline/exec/multi_cast_data_stream_source.h index 2059f706cad3f50..57410bf8d9568a5 100644 --- a/be/src/pipeline/exec/multi_cast_data_stream_source.h +++ b/be/src/pipeline/exec/multi_cast_data_stream_source.h @@ -68,6 +68,9 @@ class MultiCastDataStreamSourceLocalState final : public PipelineXLocalState> _filter_dependencies; RuntimeProfile::Counter* _wait_for_rf_timer = nullptr; + RuntimeProfile::Counter* _filter_timer = nullptr; + RuntimeProfile::Counter* _get_data_timer = nullptr; + RuntimeProfile::Counter* _materialize_data_timer = nullptr; }; class MultiCastDataStreamerSourceOperatorX final diff --git a/be/src/pipeline/exec/multi_cast_data_streamer.cpp b/be/src/pipeline/exec/multi_cast_data_streamer.cpp index 3e629093e23b97a..25c939695f90efd 100644 --- a/be/src/pipeline/exec/multi_cast_data_streamer.cpp +++ b/be/src/pipeline/exec/multi_cast_data_streamer.cpp @@ -23,16 +23,14 @@ namespace doris::pipeline { #include "common/compile_check_begin.h" -MultiCastBlock::MultiCastBlock(vectorized::Block* block, int used_count, int un_finish_copy, - size_t mem_size) - : _used_count(used_count), _un_finish_copy(un_finish_copy), _mem_size(mem_size) { +MultiCastBlock::MultiCastBlock(vectorized::Block* block, int un_finish_copy, size_t mem_size) + : _un_finish_copy(un_finish_copy), _mem_size(mem_size) { _block = vectorized::Block::create_unique(block->get_columns_with_type_and_name()); block->clear(); } Status MultiCastDataStreamer::pull(int sender_idx, doris::vectorized::Block* block, bool* eos) { int* un_finish_copy = nullptr; - int use_count = 0; { std::lock_guard l(_mutex); auto& pos_to_pull = _sender_pos_to_read[sender_idx]; @@ -43,8 +41,6 @@ Status MultiCastDataStreamer::pull(int sender_idx, doris::vectorized::Block* blo _cumulative_mem_size -= pos_to_pull->_mem_size; - pos_to_pull->_used_count--; - use_count = pos_to_pull->_used_count; un_finish_copy = &pos_to_pull->_un_finish_copy; pos_to_pull++; @@ -56,12 +52,7 @@ Status MultiCastDataStreamer::pull(int sender_idx, doris::vectorized::Block* blo *eos = _eos and pos_to_pull == end; } - if (use_count == 0) { - // will clear _multi_cast_blocks - _wait_copy_block(block, *un_finish_copy); - } else { - _copy_block(block, *un_finish_copy); - } + _copy_block(block, *un_finish_copy); return Status::OK(); } @@ -71,21 +62,13 @@ void MultiCastDataStreamer::_copy_block(vectorized::Block* block, int& un_finish for (int i = 0; i < block->columns(); ++i) { block->get_by_position(i).column = block->get_by_position(i).column->clone_resized(rows); } - std::unique_lock l(_mutex); un_finish_copy--; if (un_finish_copy == 0) { - l.unlock(); - _cv.notify_one(); + _multi_cast_blocks.pop_front(); } } -void MultiCastDataStreamer::_wait_copy_block(vectorized::Block* block, int& un_finish_copy) { - std::unique_lock l(_mutex); - _cv.wait(l, [&]() { return un_finish_copy == 0; }); - _multi_cast_blocks.pop_front(); -} - Status MultiCastDataStreamer::push(RuntimeState* state, doris::vectorized::Block* block, bool eos) { auto rows = block->rows(); COUNTER_UPDATE(_process_rows, rows); @@ -96,8 +79,7 @@ Status MultiCastDataStreamer::push(RuntimeState* state, doris::vectorized::Block { std::lock_guard l(_mutex); - _multi_cast_blocks.emplace_back(block, _cast_sender_count, _cast_sender_count - 1, - block_mem_size); + _multi_cast_blocks.emplace_back(block, _cast_sender_count, block_mem_size); // last elem auto end = std::prev(_multi_cast_blocks.end()); for (int i = 0; i < _sender_pos_to_read.size(); ++i) { diff --git a/be/src/pipeline/exec/multi_cast_data_streamer.h b/be/src/pipeline/exec/multi_cast_data_streamer.h index 07e64016363f656..51a73cf0c2b053e 100644 --- a/be/src/pipeline/exec/multi_cast_data_streamer.h +++ b/be/src/pipeline/exec/multi_cast_data_streamer.h @@ -23,10 +23,11 @@ namespace doris::pipeline { class Dependency; struct MultiCastBlock { - MultiCastBlock(vectorized::Block* block, int used_count, int need_copy, size_t mem_size); + MultiCastBlock(vectorized::Block* block, int need_copy, size_t mem_size); std::unique_ptr _block; - int _used_count; + // Each block is copied during pull. If _un_finish_copy == 0, + // it indicates that this block has been fully used and can be released. int _un_finish_copy; size_t _mem_size; }; @@ -69,14 +70,10 @@ class MultiCastDataStreamer { void _block_reading(int sender_idx); void _copy_block(vectorized::Block* block, int& un_finish_copy); - - void _wait_copy_block(vectorized::Block* block, int& un_finish_copy); - const RowDescriptor& _row_desc; RuntimeProfile* _profile = nullptr; std::list _multi_cast_blocks; std::vector::iterator> _sender_pos_to_read; - std::condition_variable _cv; std::mutex _mutex; bool _eos = false; int _cast_sender_count = 0; diff --git a/be/src/pipeline/exec/nested_loop_join_build_operator.cpp b/be/src/pipeline/exec/nested_loop_join_build_operator.cpp index 59020a5df437bde..83b378e792c3fa3 100644 --- a/be/src/pipeline/exec/nested_loop_join_build_operator.cpp +++ b/be/src/pipeline/exec/nested_loop_join_build_operator.cpp @@ -139,7 +139,6 @@ Status NestedLoopJoinBuildSinkOperatorX::sink(doris::RuntimeState* state, vector } if (eos) { - COUNTER_UPDATE(local_state._build_rows_counter, local_state._build_rows); RuntimeFilterBuild rf_ctx(&local_state); RETURN_IF_ERROR(rf_ctx(state)); diff --git a/be/src/pipeline/exec/nested_loop_join_build_operator.h b/be/src/pipeline/exec/nested_loop_join_build_operator.h index f2ca259754b661c..d6e72799f97d92d 100644 --- a/be/src/pipeline/exec/nested_loop_join_build_operator.h +++ b/be/src/pipeline/exec/nested_loop_join_build_operator.h @@ -76,8 +76,8 @@ class NestedLoopJoinBuildSinkOperatorX final if (_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { return {ExchangeType::NOOP}; } - return _child->ignore_data_distribution() ? DataDistribution(ExchangeType::BROADCAST) - : DataDistribution(ExchangeType::NOOP); + return _child->is_serial_operator() ? DataDistribution(ExchangeType::BROADCAST) + : DataDistribution(ExchangeType::NOOP); } private: diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp index d0fb4ee19a58249..afa1a2e59b798ce 100644 --- a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp +++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp @@ -43,6 +43,10 @@ Status NestedLoopJoinProbeLocalState::init(RuntimeState* state, LocalStateInfo& SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); _loop_join_timer = ADD_TIMER(profile(), "LoopGenerateJoin"); + _output_temp_blocks_timer = ADD_TIMER(profile(), "OutputTempBlocksTime"); + _update_visited_flags_timer = ADD_TIMER(profile(), "UpdateVisitedFlagsTime"); + _join_conjuncts_evaluation_timer = ADD_TIMER(profile(), "JoinConjunctsEvaluationTime"); + _filtered_by_join_conjuncts_timer = ADD_TIMER(profile(), "FilteredByJoinConjunctsTime"); return Status::OK(); } @@ -168,23 +172,26 @@ Status NestedLoopJoinProbeLocalState::generate_join_block_data(RuntimeState* sta _process_left_child_block(_join_block, now_process_build_block); } - if constexpr (set_probe_side_flag) { - RETURN_IF_ERROR( - (_do_filtering_and_update_visited_flags( - &_join_block, !p._is_left_semi_anti))); - _update_additional_flags(&_join_block); - // If this join operation is left outer join or full outer join, when - // `_left_side_process_count`, means all rows from build - // side have been joined with _left_side_process_count, we should output current - // probe row with null from build side. - if (_left_side_process_count) { - _finalize_current_phase( - _join_block, state->batch_size()); + { + SCOPED_TIMER(_finish_probe_phase_timer); + if constexpr (set_probe_side_flag) { + RETURN_IF_ERROR( + (_do_filtering_and_update_visited_flags( + &_join_block, !p._is_left_semi_anti))); + _update_additional_flags(&_join_block); + // If this join operation is left outer join or full outer join, when + // `_left_side_process_count`, means all rows from build + // side have been joined with _left_side_process_count, we should output current + // probe row with null from build side. + if (_left_side_process_count) { + _finalize_current_phase( + _join_block, state->batch_size()); + } + } else if (_left_side_process_count && p._is_mark_join && + _shared_state->build_blocks.empty()) { + _append_left_data_with_null(_join_block); } - } else if (_left_side_process_count && p._is_mark_join && - _shared_state->build_blocks.empty()) { - _append_left_data_with_null(_join_block); } } @@ -377,6 +384,7 @@ void NestedLoopJoinProbeLocalState::_append_left_data_with_null(vectorized::Bloc void NestedLoopJoinProbeLocalState::_process_left_child_block( vectorized::Block& block, const vectorized::Block& now_process_build_block) const { + SCOPED_TIMER(_output_temp_blocks_timer); auto& p = _parent->cast(); auto dst_columns = block.mutate_columns(); const size_t max_added_rows = now_process_build_block.rows(); @@ -485,6 +493,7 @@ Status NestedLoopJoinProbeOperatorX::push(doris::RuntimeState* state, vectorized set_build_side_flag, set_probe_side_flag>( state, join_op_variants); }; + SCOPED_TIMER(local_state._loop_join_timer); RETURN_IF_ERROR( std::visit(func, local_state._shared_state->join_op_variants, vectorized::make_bool_variant(_match_all_build || _is_right_semi_anti), diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.h b/be/src/pipeline/exec/nested_loop_join_probe_operator.h index 5b0fec159e28bf1..c744e6acdc507e3 100644 --- a/be/src/pipeline/exec/nested_loop_join_probe_operator.h +++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.h @@ -68,42 +68,48 @@ class NestedLoopJoinProbeLocalState final size_t build_block_idx, size_t processed_blocks_num, bool materialize, Filter& filter) { - if constexpr (SetBuildSideFlag) { - for (size_t i = 0; i < processed_blocks_num; i++) { - auto& build_side_flag = - assert_cast( - _shared_state->build_side_visited_flags[build_block_idx].get()) - ->get_data(); - auto* __restrict build_side_flag_data = build_side_flag.data(); - auto cur_sz = build_side_flag.size(); - const size_t offset = _build_offset_stack.top(); - _build_offset_stack.pop(); - for (size_t j = 0; j < cur_sz; j++) { - build_side_flag_data[j] |= filter[offset + j]; + { + SCOPED_TIMER(_update_visited_flags_timer); + if constexpr (SetBuildSideFlag) { + for (size_t i = 0; i < processed_blocks_num; i++) { + auto& build_side_flag = + assert_cast( + _shared_state->build_side_visited_flags[build_block_idx].get()) + ->get_data(); + auto* __restrict build_side_flag_data = build_side_flag.data(); + auto cur_sz = build_side_flag.size(); + const size_t offset = _build_offset_stack.top(); + _build_offset_stack.pop(); + for (size_t j = 0; j < cur_sz; j++) { + build_side_flag_data[j] |= filter[offset + j]; + } + build_block_idx = build_block_idx == 0 ? _shared_state->build_blocks.size() - 1 + : build_block_idx - 1; } - build_block_idx = build_block_idx == 0 ? _shared_state->build_blocks.size() - 1 - : build_block_idx - 1; } - } - if constexpr (SetProbeSideFlag) { - int64_t end = filter.size(); - for (int i = _left_block_pos == _child_block->rows() ? _left_block_pos - 1 - : _left_block_pos; - i >= _left_block_start_pos; i--) { - int64_t offset = 0; - if (!_probe_offset_stack.empty()) { - offset = _probe_offset_stack.top(); - _probe_offset_stack.pop(); - } - if (!_cur_probe_row_visited_flags[i]) { - _cur_probe_row_visited_flags[i] = - simd::contain_byte(filter.data() + offset, end - offset, 1) ? 1 - : 0; + if constexpr (SetProbeSideFlag) { + int64_t end = filter.size(); + for (int i = _left_block_pos == _child_block->rows() ? _left_block_pos - 1 + : _left_block_pos; + i >= _left_block_start_pos; i--) { + int64_t offset = 0; + if (!_probe_offset_stack.empty()) { + offset = _probe_offset_stack.top(); + _probe_offset_stack.pop(); + } + if (!_cur_probe_row_visited_flags[i]) { + _cur_probe_row_visited_flags[i] = + simd::contain_byte(filter.data() + offset, end - offset, 1) + ? 1 + : 0; + } + end = offset; } - end = offset; } } + if (materialize) { + SCOPED_TIMER(_filtered_by_join_conjuncts_timer); vectorized::Block::filter_block_internal(block, filter, column_to_keep); } else { CLEAR_BLOCK @@ -125,8 +131,11 @@ class NestedLoopJoinProbeLocalState final if (LIKELY(!_join_conjuncts.empty() && block->rows() > 0)) { vectorized::IColumn::Filter filter(block->rows(), 1); bool can_filter_all = false; - RETURN_IF_ERROR(vectorized::VExprContext::execute_conjuncts( - _join_conjuncts, nullptr, IgnoreNull, block, &filter, &can_filter_all)); + { + SCOPED_TIMER(_join_conjuncts_evaluation_timer); + RETURN_IF_ERROR(vectorized::VExprContext::execute_conjuncts( + _join_conjuncts, nullptr, IgnoreNull, block, &filter, &can_filter_all)); + } if (can_filter_all) { CLEAR_BLOCK @@ -185,6 +194,10 @@ class NestedLoopJoinProbeLocalState final vectorized::VExprContextSPtrs _join_conjuncts; RuntimeProfile::Counter* _loop_join_timer = nullptr; + RuntimeProfile::Counter* _output_temp_blocks_timer = nullptr; + RuntimeProfile::Counter* _update_visited_flags_timer = nullptr; + RuntimeProfile::Counter* _join_conjuncts_evaluation_timer = nullptr; + RuntimeProfile::Counter* _filtered_by_join_conjuncts_timer = nullptr; }; class NestedLoopJoinProbeOperatorX final diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 0d1cb362ea00bde..124f2d1c70ec937 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -43,6 +43,9 @@ namespace doris::pipeline { Status OlapScanLocalState::_init_profile() { RETURN_IF_ERROR(ScanLocalState::_init_profile()); + // Rows read from storage. + // Include the rows read from doris page cache. + _scan_rows = ADD_COUNTER(_runtime_profile, "ScanRows", TUnit::UNIT); // 1. init segment profile _segment_profile.reset(new RuntimeProfile("SegmentIterator")); _scanner_profile->add_child(_segment_profile.get(), true, nullptr); @@ -58,22 +61,20 @@ Status OlapScanLocalState::_init_profile() { _block_load_counter = ADD_COUNTER(_segment_profile, "BlocksLoad", TUnit::UNIT); _block_fetch_timer = ADD_TIMER(_scanner_profile, "BlockFetchTime"); _delete_bitmap_get_agg_timer = ADD_TIMER(_scanner_profile, "DeleteBitmapGetAggTime"); - _sync_rowset_timer = ADD_TIMER(_scanner_profile, "SyncRowsetTime"); - _block_convert_timer = ADD_TIMER(_scanner_profile, "BlockConvertTime"); + if (config::is_cloud_mode()) { + _sync_rowset_timer = ADD_TIMER(_scanner_profile, "SyncRowsetTime"); + } _block_init_timer = ADD_TIMER(_segment_profile, "BlockInitTime"); _block_init_seek_timer = ADD_TIMER(_segment_profile, "BlockInitSeekTime"); _block_init_seek_counter = ADD_COUNTER(_segment_profile, "BlockInitSeekCount", TUnit::UNIT); - _block_conditions_filtered_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredTime"); - _block_conditions_filtered_bf_timer = - ADD_TIMER(_segment_profile, "BlockConditionsFilteredBloomFilterTime"); + _segment_generate_row_range_timer = ADD_TIMER(_segment_profile, "GenerateRowRangeTime"); + _segment_generate_row_range_by_bf_timer = + ADD_TIMER(_segment_profile, "GenerateRowRangeByBloomFilterIndexTime"); _collect_iterator_merge_next_timer = ADD_TIMER(_segment_profile, "CollectIteratorMergeTime"); - _collect_iterator_normal_next_timer = ADD_TIMER(_segment_profile, "CollectIteratorNormalTime"); - _block_conditions_filtered_zonemap_timer = - ADD_TIMER(_segment_profile, "BlockConditionsFilteredZonemapTime"); - _block_conditions_filtered_zonemap_rp_timer = - ADD_TIMER(_segment_profile, "BlockConditionsFilteredZonemapRuntimePredicateTime"); - _block_conditions_filtered_dict_timer = - ADD_TIMER(_segment_profile, "BlockConditionsFilteredDictTime"); + _segment_generate_row_range_by_zonemap_timer = + ADD_TIMER(_segment_profile, "GenerateRowRangeByZoneMapIndexTime"); + _segment_generate_row_range_by_dict_timer = + ADD_TIMER(_segment_profile, "GenerateRowRangeByDictTime"); _rows_vec_cond_filtered_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT); @@ -86,10 +87,11 @@ Status OlapScanLocalState::_init_profile() { _vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime"); _short_cond_timer = ADD_TIMER(_segment_profile, "ShortPredEvalTime"); _expr_filter_timer = ADD_TIMER(_segment_profile, "ExprFilterEvalTime"); - _first_read_timer = ADD_TIMER(_segment_profile, "FirstReadTime"); - _second_read_timer = ADD_TIMER(_segment_profile, "SecondReadTime"); - _first_read_seek_timer = ADD_TIMER(_segment_profile, "FirstReadSeekTime"); - _first_read_seek_counter = ADD_COUNTER(_segment_profile, "FirstReadSeekCount", TUnit::UNIT); + _predicate_column_read_timer = ADD_TIMER(_segment_profile, "PredicateColumnReadTime"); + _non_predicate_column_read_timer = ADD_TIMER(_segment_profile, "NonPredicateColumnReadTime"); + _predicate_column_read_seek_timer = ADD_TIMER(_segment_profile, "PredicateColumnReadSeekTime"); + _predicate_column_read_seek_counter = + ADD_COUNTER(_segment_profile, "PredicateColumnReadSeekCount", TUnit::UNIT); _lazy_read_timer = ADD_TIMER(_segment_profile, "LazyReadTime"); _lazy_read_seek_timer = ADD_TIMER(_segment_profile, "LazyReadSeekTime"); @@ -99,7 +101,7 @@ Status OlapScanLocalState::_init_profile() { _stats_filtered_counter = ADD_COUNTER(_segment_profile, "RowsStatsFiltered", TUnit::UNIT); _stats_rp_filtered_counter = - ADD_COUNTER(_segment_profile, "RowsZonemapRuntimePredicateFiltered", TUnit::UNIT); + ADD_COUNTER(_segment_profile, "RowsZoneMapRuntimePredicateFiltered", TUnit::UNIT); _bf_filtered_counter = ADD_COUNTER(_segment_profile, "RowsBloomFilterFiltered", TUnit::UNIT); _dict_filtered_counter = ADD_COUNTER(_segment_profile, "RowsDictFiltered", TUnit::UNIT); _del_filtered_counter = ADD_COUNTER(_scanner_profile, "RowsDelFiltered", TUnit::UNIT); @@ -130,8 +132,6 @@ Status OlapScanLocalState::_init_profile() { ADD_TIMER(_segment_profile, "InvertedIndexQueryNullBitmapTime"); _inverted_index_query_bitmap_copy_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapCopyTime"); - _inverted_index_query_bitmap_op_timer = - ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapOpTime"); _inverted_index_searcher_open_timer = ADD_TIMER(_segment_profile, "InvertedIndexSearcherOpenTime"); _inverted_index_searcher_search_timer = @@ -143,7 +143,7 @@ Status OlapScanLocalState::_init_profile() { _inverted_index_downgrade_count_counter = ADD_COUNTER(_segment_profile, "InvertedIndexDowngradeCount", TUnit::UNIT); - _output_index_result_column_timer = ADD_TIMER(_segment_profile, "OutputIndexResultColumnTimer"); + _output_index_result_column_timer = ADD_TIMER(_segment_profile, "OutputIndexResultColumnTime"); _filtered_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentFiltered", TUnit::UNIT); _total_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentTotal", TUnit::UNIT); _tablet_counter = ADD_COUNTER(_runtime_profile, "TabletNum", TUnit::UNIT); @@ -278,8 +278,9 @@ Status OlapScanLocalState::_init_scanners(std::list* s scan_range->version.data() + scan_range->version.size(), version); tablets.emplace_back(std::move(tablet), version); } - int64_t duration_ns = 0; + if (config::is_cloud_mode()) { + int64_t duration_ns = 0; SCOPED_RAW_TIMER(&duration_ns); std::vector> tasks; tasks.reserve(_scan_ranges.size()); @@ -289,8 +290,8 @@ Status OlapScanLocalState::_init_scanners(std::list* s }); } RETURN_IF_ERROR(cloud::bthread_fork_join(tasks, 10)); + _sync_rowset_timer->update(duration_ns); } - _sync_rowset_timer->update(duration_ns); if (enable_parallel_scan && !p._should_run_serial && !has_cpu_limit && p._push_down_agg_type == TPushAggOp::NONE && @@ -331,25 +332,6 @@ Status OlapScanLocalState::_init_scanners(std::list* s int scanners_per_tablet = std::max(1, 64 / (int)_scan_ranges.size()); - auto build_new_scanner = [&](BaseTabletSPtr tablet, int64_t version, - const std::vector& key_ranges) { - COUNTER_UPDATE(_key_range_counter, key_ranges.size()); - auto scanner = vectorized::NewOlapScanner::create_shared( - this, vectorized::NewOlapScanner::Params { - state(), - _scanner_profile.get(), - key_ranges, - std::move(tablet), - version, - {}, - p._limit, - p._olap_scan_node.is_preaggregation, - }); - RETURN_IF_ERROR(scanner->prepare(state(), _conjuncts)); - scanners->push_back(std::move(scanner)); - return Status::OK(); - }; - for (auto& scan_range : _scan_ranges) { auto tablet = DORIS_TRY(ExecEnv::get_tablet(scan_range->tablet_id)); int64_t version = 0; @@ -375,7 +357,21 @@ Status OlapScanLocalState::_init_scanners(std::list* s ++j, ++i) { scanner_ranges.push_back((*ranges)[i].get()); } - RETURN_IF_ERROR(build_new_scanner(tablet, version, scanner_ranges)); + + COUNTER_UPDATE(_key_range_counter, scanner_ranges.size()); + auto scanner = vectorized::NewOlapScanner::create_shared( + this, vectorized::NewOlapScanner::Params { + state(), + _scanner_profile.get(), + scanner_ranges, + std::move(tablet), + version, + {}, + p._limit, + p._olap_scan_node.is_preaggregation, + }); + RETURN_IF_ERROR(scanner->prepare(state(), _conjuncts)); + scanners->push_back(std::move(scanner)); } } diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h index c972c7ce99a288a..9e8624b3a0b2550 100644 --- a/be/src/pipeline/exec/olap_scan_operator.h +++ b/be/src/pipeline/exec/olap_scan_operator.h @@ -97,11 +97,8 @@ class OlapScanLocalState final : public ScanLocalState { std::unique_ptr _segment_profile; - RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr; - RuntimeProfile::Counter* _tablet_counter = nullptr; RuntimeProfile::Counter* _key_range_counter = nullptr; - RuntimeProfile::Counter* _rows_pushed_cond_filtered_counter = nullptr; RuntimeProfile::Counter* _reader_init_timer = nullptr; RuntimeProfile::Counter* _scanner_init_timer = nullptr; RuntimeProfile::Counter* _process_conjunct_timer = nullptr; @@ -139,23 +136,19 @@ class OlapScanLocalState final : public ScanLocalState { RuntimeProfile::Counter* _block_init_timer = nullptr; RuntimeProfile::Counter* _block_init_seek_timer = nullptr; RuntimeProfile::Counter* _block_init_seek_counter = nullptr; - RuntimeProfile::Counter* _block_conditions_filtered_timer = nullptr; - RuntimeProfile::Counter* _block_conditions_filtered_bf_timer = nullptr; + RuntimeProfile::Counter* _segment_generate_row_range_timer = nullptr; + RuntimeProfile::Counter* _segment_generate_row_range_by_bf_timer = nullptr; RuntimeProfile::Counter* _collect_iterator_merge_next_timer = nullptr; - RuntimeProfile::Counter* _collect_iterator_normal_next_timer = nullptr; - RuntimeProfile::Counter* _block_conditions_filtered_zonemap_timer = nullptr; - RuntimeProfile::Counter* _block_conditions_filtered_zonemap_rp_timer = nullptr; - RuntimeProfile::Counter* _block_conditions_filtered_dict_timer = nullptr; - RuntimeProfile::Counter* _first_read_timer = nullptr; - RuntimeProfile::Counter* _second_read_timer = nullptr; - RuntimeProfile::Counter* _first_read_seek_timer = nullptr; - RuntimeProfile::Counter* _first_read_seek_counter = nullptr; + RuntimeProfile::Counter* _segment_generate_row_range_by_zonemap_timer = nullptr; + RuntimeProfile::Counter* _segment_generate_row_range_by_dict_timer = nullptr; + RuntimeProfile::Counter* _predicate_column_read_timer = nullptr; + RuntimeProfile::Counter* _non_predicate_column_read_timer = nullptr; + RuntimeProfile::Counter* _predicate_column_read_seek_timer = nullptr; + RuntimeProfile::Counter* _predicate_column_read_seek_counter = nullptr; RuntimeProfile::Counter* _lazy_read_timer = nullptr; RuntimeProfile::Counter* _lazy_read_seek_timer = nullptr; RuntimeProfile::Counter* _lazy_read_seek_counter = nullptr; - RuntimeProfile::Counter* _block_convert_timer = nullptr; - // total pages read // used by segment v2 RuntimeProfile::Counter* _total_pages_num_counter = nullptr; @@ -175,7 +168,6 @@ class OlapScanLocalState final : public ScanLocalState { RuntimeProfile::Counter* _inverted_index_query_cache_miss_counter = nullptr; RuntimeProfile::Counter* _inverted_index_query_timer = nullptr; RuntimeProfile::Counter* _inverted_index_query_bitmap_copy_timer = nullptr; - RuntimeProfile::Counter* _inverted_index_query_bitmap_op_timer = nullptr; RuntimeProfile::Counter* _inverted_index_searcher_open_timer = nullptr; RuntimeProfile::Counter* _inverted_index_searcher_search_timer = nullptr; RuntimeProfile::Counter* _inverted_index_searcher_cache_hit_counter = nullptr; diff --git a/be/src/pipeline/exec/operator.cpp b/be/src/pipeline/exec/operator.cpp index 6e3099db7486bc7..3b5174d87c0f7fd 100644 --- a/be/src/pipeline/exec/operator.cpp +++ b/be/src/pipeline/exec/operator.cpp @@ -74,6 +74,7 @@ #include "pipeline/exec/union_source_operator.h" #include "pipeline/local_exchange/local_exchange_sink_operator.h" #include "pipeline/local_exchange/local_exchange_source_operator.h" +#include "pipeline/pipeline.h" #include "util/debug_util.h" #include "util/runtime_profile.h" #include "util/string_util.h" @@ -116,11 +117,16 @@ std::string PipelineXSinkLocalState::name_suffix() { }() + ")"; } -DataDistribution DataSinkOperatorXBase::required_data_distribution() const { - return _child && _child->ignore_data_distribution() +DataDistribution OperatorBase::required_data_distribution() const { + return _child && _child->is_serial_operator() && !is_source() ? DataDistribution(ExchangeType::PASSTHROUGH) : DataDistribution(ExchangeType::NOOP); } + +bool OperatorBase::require_shuffled_data_distribution() const { + return Pipeline::is_hash_exchange(required_data_distribution().distribution_type); +} + const RowDescriptor& OperatorBase::row_desc() const { return _child->row_desc(); } @@ -660,7 +666,7 @@ Status AsyncWriterSink::close(RuntimeState* state, Status exec_s if (_writer) { Status st = _writer->get_writer_status(); if (exec_status.ok()) { - _writer->force_close(state->is_cancelled() ? Status::Cancelled("Cancelled") + _writer->force_close(state->is_cancelled() ? state->cancel_reason() : Status::Cancelled("force close")); } else { _writer->force_close(exec_status); diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h index 5df0a19498f3952..6053b1a2f48e875 100644 --- a/be/src/pipeline/exec/operator.h +++ b/be/src/pipeline/exec/operator.h @@ -118,7 +118,8 @@ class OperatorBase { _followed_by_shuffled_operator = followed_by_shuffled_operator; } [[nodiscard]] virtual bool is_shuffled_operator() const { return false; } - [[nodiscard]] virtual bool require_shuffled_data_distribution() const { return false; } + [[nodiscard]] virtual DataDistribution required_data_distribution() const; + [[nodiscard]] virtual bool require_shuffled_data_distribution() const; protected: OperatorPtr _child = nullptr; @@ -483,7 +484,6 @@ class DataSinkOperatorXBase : public OperatorBase { } [[nodiscard]] virtual std::shared_ptr create_shared_state() const = 0; - [[nodiscard]] virtual DataDistribution required_data_distribution() const; Status close(RuntimeState* state) override { return Status::InternalError("Should not reach here!"); @@ -496,8 +496,6 @@ class DataSinkOperatorXBase : public OperatorBase { [[nodiscard]] bool is_sink() const override { return true; } - [[nodiscard]] bool is_source() const override { return false; } - static Status close(RuntimeState* state, Status exec_status) { auto result = state->get_sink_local_state_result(); if (!result) { @@ -652,19 +650,7 @@ class OperatorXBase : public OperatorBase { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, _op_name); } [[nodiscard]] std::string get_name() const override { return _op_name; } - [[nodiscard]] virtual DataDistribution required_data_distribution() const { - return _child && _child->ignore_data_distribution() && !is_source() - ? DataDistribution(ExchangeType::PASSTHROUGH) - : DataDistribution(ExchangeType::NOOP); - } - [[nodiscard]] virtual bool ignore_data_distribution() const { - return _child ? _child->ignore_data_distribution() : _ignore_data_distribution; - } - [[nodiscard]] bool ignore_data_hash_distribution() const { - return _child ? _child->ignore_data_hash_distribution() : _ignore_data_distribution; - } [[nodiscard]] virtual bool need_more_input_data(RuntimeState* state) const { return true; } - void set_ignore_data_distribution() { _ignore_data_distribution = true; } Status open(RuntimeState* state) override; @@ -735,8 +721,6 @@ class OperatorXBase : public OperatorBase { bool has_output_row_desc() const { return _output_row_descriptor != nullptr; } - [[nodiscard]] bool is_source() const override { return false; } - [[nodiscard]] virtual Status get_block_after_projects(RuntimeState* state, vectorized::Block* block, bool* eos); @@ -746,6 +730,9 @@ class OperatorXBase : public OperatorBase { void set_parallel_tasks(int parallel_tasks) { _parallel_tasks = parallel_tasks; } int parallel_tasks() const { return _parallel_tasks; } + // To keep compatibility with older FE + void set_serial_operator() { _is_serial_operator = true; } + protected: template friend class PipelineXLocalState; @@ -779,7 +766,6 @@ class OperatorXBase : public OperatorBase { uint32_t _debug_point_count = 0; std::string _op_name; - bool _ignore_data_distribution = false; int _parallel_tasks = 0; //_keep_origin is used to avoid copying during projection, @@ -850,9 +836,9 @@ class StatefulOperatorX : public OperatorX { template requires(std::is_base_of_v) -class AsyncWriterSink : public PipelineXSinkLocalState { +class AsyncWriterSink : public PipelineXSinkLocalState { public: - using Base = PipelineXSinkLocalState; + using Base = PipelineXSinkLocalState; AsyncWriterSink(DataSinkOperatorXBase* parent, RuntimeState* state) : Base(parent, state), _async_writer_dependency(nullptr) { _finish_dependency = diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h index 6b3a74c83df97ce..15f6b22387a8e29 100644 --- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h +++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h @@ -309,9 +309,6 @@ class PartitionedAggSinkOperatorX : public DataSinkOperatorXrequire_data_distribution(); } - bool require_shuffled_data_distribution() const override { - return _agg_sink_operator->require_shuffled_data_distribution(); - } Status set_child(OperatorPtr child) override { RETURN_IF_ERROR(DataSinkOperatorX::set_child(child)); diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h index 3aab11f62d883e6..f8fc0780b6fc3f4 100644 --- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h +++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h @@ -165,9 +165,6 @@ class PartitionedHashJoinProbeOperatorX final _distribution_partition_exprs)); } - bool require_shuffled_data_distribution() const override { - return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN; - } bool is_shuffled_operator() const override { return _join_distribution == TJoinDistributionType::PARTITIONED; } diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h index c768d7518b95c96..8e89763b50a9d5a 100644 --- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h +++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h @@ -115,9 +115,6 @@ class PartitionedHashJoinSinkOperatorX _distribution_partition_exprs); } - bool require_shuffled_data_distribution() const override { - return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN; - } bool is_shuffled_operator() const override { return _join_distribution == TJoinDistributionType::PARTITIONED; } diff --git a/be/src/pipeline/exec/repeat_operator.cpp b/be/src/pipeline/exec/repeat_operator.cpp index dba4f27af7c385d..5c94d43f0d1e05d 100644 --- a/be/src/pipeline/exec/repeat_operator.cpp +++ b/be/src/pipeline/exec/repeat_operator.cpp @@ -46,6 +46,16 @@ Status RepeatLocalState::open(RuntimeState* state) { return Status::OK(); } +Status RepeatLocalState::init(RuntimeState* state, LocalStateInfo& info) { + RETURN_IF_ERROR(Base::init(state, info)); + SCOPED_TIMER(exec_time_counter()); + SCOPED_TIMER(_init_timer); + _evaluate_input_timer = ADD_TIMER(profile(), "EvaluateInputDataTime"); + _get_repeat_data_timer = ADD_TIMER(profile(), "GetRepeatDataTime"); + _filter_timer = ADD_TIMER(profile(), "FilterTime"); + return Status::OK(); +} + Status RepeatOperatorX::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(OperatorXBase::init(tnode, state)); RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(tnode.repeat_node.exprs, _expr_ctxs)); @@ -166,23 +176,24 @@ Status RepeatLocalState::add_grouping_id_column(std::size_t rows, std::size_t& c Status RepeatOperatorX::push(RuntimeState* state, vectorized::Block* input_block, bool eos) const { auto& local_state = get_local_state(state); + SCOPED_TIMER(local_state._evaluate_input_timer); local_state._child_eos = eos; - auto& _intermediate_block = local_state._intermediate_block; - auto& _expr_ctxs = local_state._expr_ctxs; - DCHECK(!_intermediate_block || _intermediate_block->rows() == 0); + auto& intermediate_block = local_state._intermediate_block; + auto& expr_ctxs = local_state._expr_ctxs; + DCHECK(!intermediate_block || intermediate_block->rows() == 0); if (input_block->rows() > 0) { - _intermediate_block = vectorized::Block::create_unique(); + intermediate_block = vectorized::Block::create_unique(); - for (auto& expr : _expr_ctxs) { + for (auto& expr : expr_ctxs) { int result_column_id = -1; RETURN_IF_ERROR(expr->execute(input_block, &result_column_id)); DCHECK(result_column_id != -1); input_block->get_by_position(result_column_id).column = input_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); - _intermediate_block->insert(input_block->get_by_position(result_column_id)); + intermediate_block->insert(input_block->get_by_position(result_column_id)); } - DCHECK_EQ(_expr_ctxs.size(), _intermediate_block->columns()); + DCHECK_EQ(expr_ctxs.size(), intermediate_block->columns()); } return Status::OK(); @@ -202,33 +213,39 @@ Status RepeatOperatorX::pull(doris::RuntimeState* state, vectorized::Block* outp } DCHECK(output_block->rows() == 0); - if (_intermediate_block && _intermediate_block->rows() > 0) { - RETURN_IF_ERROR(local_state.get_repeated_block(_intermediate_block.get(), _repeat_id_idx, - output_block)); + { + SCOPED_TIMER(local_state._get_repeat_data_timer); + if (_intermediate_block && _intermediate_block->rows() > 0) { + RETURN_IF_ERROR(local_state.get_repeated_block(_intermediate_block.get(), + _repeat_id_idx, output_block)); - _repeat_id_idx++; + _repeat_id_idx++; - int size = _repeat_id_list.size(); - if (_repeat_id_idx >= size) { - _intermediate_block->clear(); + int size = _repeat_id_list.size(); + if (_repeat_id_idx >= size) { + _intermediate_block->clear(); + _child_block.clear_column_data(_child->row_desc().num_materialized_slots()); + _repeat_id_idx = 0; + } + } else if (local_state._expr_ctxs.empty()) { + auto m_block = vectorized::VectorizedUtils::build_mutable_mem_reuse_block( + output_block, _output_slots); + auto rows = _child_block.rows(); + auto& columns = m_block.mutable_columns(); + + for (int repeat_id_idx = 0; repeat_id_idx < _repeat_id_list.size(); repeat_id_idx++) { + std::size_t cur_col = 0; + RETURN_IF_ERROR( + local_state.add_grouping_id_column(rows, cur_col, columns, repeat_id_idx)); + } _child_block.clear_column_data(_child->row_desc().num_materialized_slots()); - _repeat_id_idx = 0; } - } else if (local_state._expr_ctxs.empty()) { - auto m_block = vectorized::VectorizedUtils::build_mutable_mem_reuse_block(output_block, - _output_slots); - auto rows = _child_block.rows(); - auto& columns = m_block.mutable_columns(); - - for (int repeat_id_idx = 0; repeat_id_idx < _repeat_id_list.size(); repeat_id_idx++) { - std::size_t cur_col = 0; - RETURN_IF_ERROR( - local_state.add_grouping_id_column(rows, cur_col, columns, repeat_id_idx)); - } - _child_block.clear_column_data(_child->row_desc().num_materialized_slots()); } - RETURN_IF_ERROR(vectorized::VExprContext::filter_block(local_state._conjuncts, output_block, - output_block->columns())); + { + SCOPED_TIMER(local_state._filter_timer); + RETURN_IF_ERROR(vectorized::VExprContext::filter_block(local_state._conjuncts, output_block, + output_block->columns())); + } *eos = _child_eos && _child_block.rows() == 0; local_state.reached_limit(output_block, eos); return Status::OK(); diff --git a/be/src/pipeline/exec/repeat_operator.h b/be/src/pipeline/exec/repeat_operator.h index 22398df372ae654..31f88f37231aaae 100644 --- a/be/src/pipeline/exec/repeat_operator.h +++ b/be/src/pipeline/exec/repeat_operator.h @@ -36,6 +36,7 @@ class RepeatLocalState final : public PipelineXLocalState { using Base = PipelineXLocalState; RepeatLocalState(RuntimeState* state, OperatorXBase* parent); + Status init(RuntimeState* state, LocalStateInfo& info) override; Status open(RuntimeState* state) override; Status get_repeated_block(vectorized::Block* child_block, int repeat_id_idx, @@ -53,6 +54,10 @@ class RepeatLocalState final : public PipelineXLocalState { int _repeat_id_idx; std::unique_ptr _intermediate_block; vectorized::VExprContextSPtrs _expr_ctxs; + + RuntimeProfile::Counter* _evaluate_input_timer = nullptr; + RuntimeProfile::Counter* _get_repeat_data_timer = nullptr; + RuntimeProfile::Counter* _filter_timer = nullptr; }; class RepeatOperatorX final : public StatefulOperatorX { diff --git a/be/src/pipeline/exec/result_file_sink_operator.cpp b/be/src/pipeline/exec/result_file_sink_operator.cpp index 93026427b86d56d..bc4e4c88d14ca7f 100644 --- a/be/src/pipeline/exec/result_file_sink_operator.cpp +++ b/be/src/pipeline/exec/result_file_sink_operator.cpp @@ -85,12 +85,6 @@ Status ResultFileSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& i SCOPED_TIMER(_init_timer); _sender_id = info.sender_id; - _brpc_wait_timer = ADD_TIMER(_profile, "BrpcSendTime.Wait"); - _local_send_timer = ADD_TIMER(_profile, "LocalSendTime"); - _brpc_send_timer = ADD_TIMER(_profile, "BrpcSendTime"); - _split_block_distribute_by_channel_timer = - ADD_TIMER(_profile, "SplitBlockDistributeByChannelTime"); - _brpc_send_timer = ADD_TIMER(_profile, "BrpcSendTime"); auto& p = _parent->cast(); CHECK(p._file_opts.get() != nullptr); // create sender @@ -101,6 +95,7 @@ Status ResultFileSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& i state->fragment_instance_id(), p._buf_size, &_sender, state->execution_timeout(), state->batch_size())); } + _sender->set_dependency(state->fragment_instance_id(), _dependency->shared_from_this()); // create writer _writer.reset(new (std::nothrow) vectorized::VFileResultWriter( diff --git a/be/src/pipeline/exec/result_file_sink_operator.h b/be/src/pipeline/exec/result_file_sink_operator.h index 7268efe4de40654..e9f2b8eeb9c6700 100644 --- a/be/src/pipeline/exec/result_file_sink_operator.h +++ b/be/src/pipeline/exec/result_file_sink_operator.h @@ -40,26 +40,12 @@ class ResultFileSinkLocalState final [[nodiscard]] int sender_id() const { return _sender_id; } - RuntimeProfile::Counter* brpc_wait_timer() { return _brpc_wait_timer; } - RuntimeProfile::Counter* local_send_timer() { return _local_send_timer; } - RuntimeProfile::Counter* brpc_send_timer() { return _brpc_send_timer; } - RuntimeProfile::Counter* merge_block_timer() { return _merge_block_timer; } - RuntimeProfile::Counter* split_block_distribute_by_channel_timer() { - return _split_block_distribute_by_channel_timer; - } - private: friend class ResultFileSinkOperatorX; std::shared_ptr _sender; std::shared_ptr _block_holder; - RuntimeProfile::Counter* _brpc_wait_timer = nullptr; - RuntimeProfile::Counter* _local_send_timer = nullptr; - RuntimeProfile::Counter* _brpc_send_timer = nullptr; - RuntimeProfile::Counter* _merge_block_timer = nullptr; - RuntimeProfile::Counter* _split_block_distribute_by_channel_timer = nullptr; - int _sender_id; }; diff --git a/be/src/pipeline/exec/result_sink_operator.cpp b/be/src/pipeline/exec/result_sink_operator.cpp index f04ace2e292595c..15612168affd898 100644 --- a/be/src/pipeline/exec/result_sink_operator.cpp +++ b/be/src/pipeline/exec/result_sink_operator.cpp @@ -17,13 +17,12 @@ #include "result_sink_operator.h" +#include #include #include -#include #include "common/config.h" -#include "common/object_pool.h" #include "exec/rowid_fetcher.h" #include "pipeline/exec/operator.h" #include "runtime/buffer_control_block.h" @@ -41,13 +40,12 @@ Status ResultSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) RETURN_IF_ERROR(Base::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); + _fetch_row_id_timer = ADD_TIMER(profile(), "FetchRowIdTime"); + _write_data_timer = ADD_TIMER(profile(), "WriteDataTime"); static const std::string timer_name = "WaitForDependencyTime"; _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(_profile, timer_name, 1); auto fragment_instance_id = state->fragment_instance_id(); - _blocks_sent_counter = ADD_COUNTER_WITH_LEVEL(_profile, "BlocksProduced", TUnit::UNIT, 1); - _rows_sent_counter = ADD_COUNTER_WITH_LEVEL(_profile, "RowsProduced", TUnit::UNIT, 1); - if (state->query_options().enable_parallel_result_sink) { _sender = _parent->cast()._sender; } else { @@ -146,12 +144,15 @@ Status ResultSinkOperatorX::open(RuntimeState* state) { Status ResultSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); - COUNTER_UPDATE(local_state.rows_sent_counter(), (int64_t)block->rows()); - COUNTER_UPDATE(local_state.blocks_sent_counter(), 1); + COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); if (_fetch_option.use_two_phase_fetch && block->rows() > 0) { + SCOPED_TIMER(local_state._fetch_row_id_timer); RETURN_IF_ERROR(_second_phase_fetch_data(state, block)); } - RETURN_IF_ERROR(local_state._writer->write(state, *block)); + { + SCOPED_TIMER(local_state._write_data_timer); + RETURN_IF_ERROR(local_state._writer->write(state, *block)); + } if (_fetch_option.use_two_phase_fetch) { // Block structure may be changed by calling _second_phase_fetch_data(). // So we should clear block in case of unmatched columns @@ -191,9 +192,10 @@ Status ResultSinkLocalState::close(RuntimeState* state, Status exec_status) { final_status = st; } - LOG_INFO("Query {} result sink closed with status {} and has written {} rows", - print_id(state->query_id()), final_status.to_string_no_stack(), - _writer->get_written_rows()); + VLOG_NOTICE << fmt::format( + "Query {} result sink closed with status {} and has written {} rows", + print_id(state->query_id()), final_status.to_string_no_stack(), + _writer->get_written_rows()); } // close sender, this is normal path end diff --git a/be/src/pipeline/exec/result_sink_operator.h b/be/src/pipeline/exec/result_sink_operator.h index 3c503096ecb51e8..339c167825643bd 100644 --- a/be/src/pipeline/exec/result_sink_operator.h +++ b/be/src/pipeline/exec/result_sink_operator.h @@ -128,8 +128,6 @@ class ResultSinkLocalState final : public PipelineXSinkLocalState _sender = nullptr; std::shared_ptr _writer = nullptr; - RuntimeProfile::Counter* _blocks_sent_counter = nullptr; - RuntimeProfile::Counter* _rows_sent_counter = nullptr; + + RuntimeProfile::Counter* _fetch_row_id_timer = nullptr; + RuntimeProfile::Counter* _write_data_timer = nullptr; }; class ResultSinkOperatorX final : public DataSinkOperatorX { diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index 4f3c97bab717b64..f55ef7da36981c1 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -73,7 +73,7 @@ Status ScanLocalState::init(RuntimeState* state, LocalStateInfo& info) SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); auto& p = _parent->cast(); - RETURN_IF_ERROR(RuntimeFilterConsumer::init(state, p.ignore_data_distribution())); + RETURN_IF_ERROR(RuntimeFilterConsumer::init(state, p.is_serial_operator())); // init profile for runtime filter RuntimeFilterConsumer::_init_profile(profile()); init_runtime_filter_dependency(_filter_dependencies, p.operator_id(), p.node_id(), @@ -375,7 +375,7 @@ Status ScanLocalState::_normalize_bloom_filter(vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt) { if (TExprNodeType::BLOOM_PRED == expr->node_type()) { - DCHECK(expr->children().size() == 1); + DCHECK(expr->get_num_children() == 1); PushDownType temp_pdt = _should_push_down_bloom_filter(); if (temp_pdt != PushDownType::UNACCEPTABLE) { _filter_predicates.bloom_filters.emplace_back(slot->col_name(), @@ -391,7 +391,7 @@ Status ScanLocalState::_normalize_bitmap_filter(vectorized::VExpr* expr vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt) { if (TExprNodeType::BITMAP_PRED == expr->node_type()) { - DCHECK(expr->children().size() == 1); + DCHECK(expr->get_num_children() == 1); PushDownType temp_pdt = _should_push_down_bitmap_filter(); if (temp_pdt != PushDownType::UNACCEPTABLE) { _filter_predicates.bitmap_filters.emplace_back(slot->col_name(), @@ -620,7 +620,7 @@ Status ScanLocalState::_normalize_in_and_eq_predicate(vectorized::VExpr range.intersection(temp_range); *pdt = PushDownType::ACCEPTABLE; } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { - DCHECK(expr->children().size() == 2); + DCHECK(expr->get_num_children() == 2); auto eq_checker = [](const std::string& fn_name) { return fn_name == "eq"; }; StringRef value; @@ -769,7 +769,7 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( iter->next(); } } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { - DCHECK(expr->children().size() == 2); + DCHECK(expr->get_num_children() == 2); auto ne_checker = [](const std::string& fn_name) { return fn_name == "ne"; }; StringRef value; @@ -924,7 +924,7 @@ Status ScanLocalState::_normalize_noneq_binary_predicate( vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange& range, PushDownType* pdt) { if (TExprNodeType::BINARY_PRED == expr->node_type()) { - DCHECK(expr->children().size() == 2); + DCHECK(expr->get_num_children() == 2); auto noneq_checker = [](const std::string& fn_name) { return fn_name != "ne" && fn_name != "eq" && fn_name != "eq_for_null"; @@ -990,7 +990,7 @@ Status ScanLocalState::_start_scanners( auto& p = _parent->cast(); _scanner_ctx = vectorized::ScannerContext::create_shared( state(), this, p._output_tuple_desc, p.output_row_descriptor(), scanners, p.limit(), - _scan_dependency, p.ignore_data_distribution()); + _scan_dependency, p.is_serial_operator()); return Status::OK(); } @@ -1048,13 +1048,10 @@ Status ScanLocalState::_init_profile() { ADD_COUNTER(_scanner_profile, "NewlyCreateFreeBlocksNum", TUnit::UNIT); _scale_up_scanners_counter = ADD_COUNTER(_scanner_profile, "NumScaleUpScanners", TUnit::UNIT); // time of transfer thread to wait for block from scan thread - _scanner_wait_batch_timer = ADD_TIMER(_scanner_profile, "ScannerBatchWaitTime"); _scanner_sched_counter = ADD_COUNTER(_scanner_profile, "ScannerSchedCount", TUnit::UNIT); - _scanner_ctx_sched_time = ADD_TIMER(_scanner_profile, "ScannerCtxSchedTime"); _scan_timer = ADD_TIMER(_scanner_profile, "ScannerGetBlockTime"); _scan_cpu_timer = ADD_TIMER(_scanner_profile, "ScannerCpuTime"); - _convert_block_timer = ADD_TIMER(_scanner_profile, "ScannerConvertBlockTime"); _filter_timer = ADD_TIMER(_scanner_profile, "ScannerFilterTime"); // time of scan thread to wait for worker thread of the thread pool @@ -1145,6 +1142,8 @@ ScanOperatorX::ScanOperatorX(ObjectPool* pool, const TPlanNode& : OperatorX(pool, tnode, operator_id, descs), _runtime_filter_descs(tnode.runtime_filters), _parallel_tasks(parallel_tasks) { + OperatorX::_is_serial_operator = + tnode.__isset.is_serial_operator && tnode.is_serial_operator; if (tnode.__isset.push_down_count) { _push_down_count = tnode.push_down_count; } @@ -1282,6 +1281,7 @@ Status ScanOperatorX::get_block(RuntimeState* state, vectorized: if (*eos) { // reach limit, stop the scanners. local_state._scanner_ctx->stop_scanners(state); + local_state._scanner_profile->add_info_string("EOS", "True"); } return Status::OK(); diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h index bf650cb8495935c..5d41c800383bd06 100644 --- a/be/src/pipeline/exec/scan_operator.h +++ b/be/src/pipeline/exec/scan_operator.h @@ -102,8 +102,6 @@ class ScanLocalStateBase : public PipelineXLocalState<>, public RuntimeFilterCon std::shared_ptr _scanner_profile; RuntimeProfile::Counter* _scanner_sched_counter = nullptr; - RuntimeProfile::Counter* _scanner_ctx_sched_time = nullptr; - RuntimeProfile::Counter* _scanner_wait_batch_timer = nullptr; RuntimeProfile::Counter* _scanner_wait_worker_timer = nullptr; // Num of newly created free blocks when running query RuntimeProfile::Counter* _newly_create_free_blocks_num = nullptr; @@ -114,8 +112,6 @@ class ScanLocalStateBase : public PipelineXLocalState<>, public RuntimeFilterCon // time of get block from scanner RuntimeProfile::Counter* _scan_timer = nullptr; RuntimeProfile::Counter* _scan_cpu_timer = nullptr; - // time of convert input block to output block from scanner - RuntimeProfile::Counter* _convert_block_timer = nullptr; // time of filter output block from scanner RuntimeProfile::Counter* _filter_timer = nullptr; RuntimeProfile::Counter* _memory_usage_counter = nullptr; @@ -383,8 +379,8 @@ class ScanOperatorX : public OperatorX { TPushAggOp::type get_push_down_agg_type() { return _push_down_agg_type; } DataDistribution required_data_distribution() const override { - if (OperatorX::ignore_data_distribution()) { - // `ignore_data_distribution()` returns true means we ignore the distribution. + if (OperatorX::is_serial_operator()) { + // `is_serial_operator()` returns true means we ignore the distribution. return {ExchangeType::NOOP}; } return {ExchangeType::BUCKET_HASH_SHUFFLE}; diff --git a/be/src/pipeline/exec/set_probe_sink_operator.cpp b/be/src/pipeline/exec/set_probe_sink_operator.cpp index 37db9afacfcacd4..813dad3ad79de64 100644 --- a/be/src/pipeline/exec/set_probe_sink_operator.cpp +++ b/be/src/pipeline/exec/set_probe_sink_operator.cpp @@ -71,12 +71,16 @@ Status SetProbeSinkOperatorX::sink(RuntimeState* state, vectorized auto probe_rows = in_block->rows(); if (probe_rows > 0) { - RETURN_IF_ERROR(_extract_probe_column(local_state, *in_block, local_state._probe_columns, - _cur_child_id)); + { + SCOPED_TIMER(local_state._extract_probe_data_timer); + RETURN_IF_ERROR(_extract_probe_column(local_state, *in_block, + local_state._probe_columns, _cur_child_id)); + } RETURN_IF_ERROR(std::visit( [&](auto&& arg) -> Status { using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { + SCOPED_TIMER(local_state._probe_timer); vectorized::HashTableProbe process_hashtable_ctx(&local_state, probe_rows); return process_hashtable_ctx.mark_data_in_hashtable(arg); @@ -99,6 +103,9 @@ Status SetProbeSinkLocalState::init(RuntimeState* state, LocalSink RETURN_IF_ERROR(Base::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); + + _probe_timer = ADD_TIMER(Base::profile(), "ProbeTime"); + _extract_probe_data_timer = ADD_TIMER(Base::profile(), "ExtractProbeDataTime"); Parent& parent = _parent->cast(); _shared_state->probe_finished_children_dependency[parent._cur_child_id] = _dependency; _dependency->block(); diff --git a/be/src/pipeline/exec/set_probe_sink_operator.h b/be/src/pipeline/exec/set_probe_sink_operator.h index ab53f5358c2a919..368ea812cdfe013 100644 --- a/be/src/pipeline/exec/set_probe_sink_operator.h +++ b/be/src/pipeline/exec/set_probe_sink_operator.h @@ -60,6 +60,9 @@ class SetProbeSinkLocalState final : public PipelineXSinkLocalState @@ -96,8 +99,6 @@ class SetProbeSinkOperatorX final : public DataSinkOperatorX create_shared_state() const override { return nullptr; } private: diff --git a/be/src/pipeline/exec/set_sink_operator.cpp b/be/src/pipeline/exec/set_sink_operator.cpp index e2f684d19f5e077..539134e53e7fe21 100644 --- a/be/src/pipeline/exec/set_sink_operator.cpp +++ b/be/src/pipeline/exec/set_sink_operator.cpp @@ -24,6 +24,7 @@ #include "vec/core/materialize_block.h" namespace doris::pipeline { +#include "common/compile_check_begin.h" template Status SetSinkOperatorX::sink(RuntimeState* state, vectorized::Block* in_block, @@ -39,8 +40,10 @@ Status SetSinkOperatorX::sink(RuntimeState* state, vectorized::Blo auto& valid_element_in_hash_tbl = local_state._shared_state->valid_element_in_hash_tbl; if (in_block->rows() != 0) { - RETURN_IF_ERROR(local_state._mutable_block.merge(*in_block)); - + { + SCOPED_TIMER(local_state._merge_block_timer); + RETURN_IF_ERROR(local_state._mutable_block.merge(*in_block)); + } if (local_state._mutable_block.rows() > std::numeric_limits::max()) { return Status::NotSupported("set operator do not support build table rows over:" + std::to_string(std::numeric_limits::max())); @@ -48,6 +51,7 @@ Status SetSinkOperatorX::sink(RuntimeState* state, vectorized::Blo } if (eos || local_state._mutable_block.allocated_bytes() >= BUILD_BLOCK_MAX_SIZE) { + SCOPED_TIMER(local_state._build_timer); build_block = local_state._mutable_block.to_block(); RETURN_IF_ERROR(_process_build_block(local_state, build_block, state)); local_state._mutable_block.clear(); @@ -87,14 +91,14 @@ Status SetSinkOperatorX::_process_build_block( vectorized::materialize_block_inplace(block); vectorized::ColumnRawPtrs raw_ptrs(_child_exprs.size()); RETURN_IF_ERROR(_extract_build_column(local_state, block, raw_ptrs, rows)); - + auto st = Status::OK(); std::visit( [&](auto&& arg) { using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { vectorized::HashTableBuild hash_table_build_process(&local_state, rows, raw_ptrs, state); - static_cast(hash_table_build_process(arg, local_state._arena)); + st = hash_table_build_process(arg, local_state._arena); } else { LOG(FATAL) << "FATAL: uninited hash table"; __builtin_unreachable(); @@ -102,7 +106,7 @@ Status SetSinkOperatorX::_process_build_block( }, local_state._shared_state->hash_table_variants->method_variant); - return Status::OK(); + return st; } template @@ -119,7 +123,7 @@ Status SetSinkOperatorX::_extract_build_column( rows = is_all_const ? 1 : rows; for (size_t i = 0; i < _child_exprs.size(); ++i) { - int result_col_id = result_locs[i]; + size_t result_col_id = result_locs[i]; if (is_all_const) { block.get_by_position(result_col_id).column = @@ -151,6 +155,7 @@ Status SetSinkLocalState::init(RuntimeState* state, LocalSinkState RETURN_IF_ERROR(PipelineXSinkLocalState::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); + _merge_block_timer = ADD_TIMER(_profile, "MergeBlocksTime"); _build_timer = ADD_TIMER(_profile, "BuildTime"); auto& parent = _parent->cast(); _shared_state->probe_finished_children_dependency[parent._cur_child_id] = _dependency; diff --git a/be/src/pipeline/exec/set_sink_operator.h b/be/src/pipeline/exec/set_sink_operator.h index 1c08eddc141f2e8..ba387d97b413600 100644 --- a/be/src/pipeline/exec/set_sink_operator.h +++ b/be/src/pipeline/exec/set_sink_operator.h @@ -23,6 +23,7 @@ #include "operator.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { template @@ -48,14 +49,14 @@ class SetSinkLocalState final : public PipelineXSinkLocalState { private: friend class SetSinkOperatorX; - template - friend struct vectorized::HashTableBuild; - RuntimeProfile::Counter* _build_timer; // time to build hash table vectorized::MutableBlock _mutable_block; // every child has its result expr list vectorized::VExprContextSPtrs _child_exprs; vectorized::Arena _arena; + + RuntimeProfile::Counter* _merge_block_timer = nullptr; + RuntimeProfile::Counter* _build_timer = nullptr; }; template @@ -93,7 +94,6 @@ class SetSinkOperatorX final : public DataSinkOperatorX @@ -106,13 +106,14 @@ class SetSinkOperatorX final : public DataSinkOperatorX _partition_exprs; using OperatorBase::_child; }; +#include "common/compile_check_end.h" } // namespace pipeline } // namespace doris diff --git a/be/src/pipeline/exec/set_source_operator.cpp b/be/src/pipeline/exec/set_source_operator.cpp index 278e2bb70140d81..ebcd13ddf14ce40 100644 --- a/be/src/pipeline/exec/set_source_operator.cpp +++ b/be/src/pipeline/exec/set_source_operator.cpp @@ -23,12 +23,14 @@ #include "pipeline/exec/operator.h" namespace doris::pipeline { - +#include "common/compile_check_begin.h" template Status SetSourceLocalState::init(RuntimeState* state, LocalStateInfo& info) { RETURN_IF_ERROR(Base::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); + _get_data_timer = ADD_TIMER(_runtime_profile, "GetDataTime"); + _filter_timer = ADD_TIMER(_runtime_profile, "FilterTime"); _shared_state->probe_finished_children_dependency.resize( _parent->cast>()._child_quantity, nullptr); return Status::OK(); @@ -75,21 +77,26 @@ Status SetSourceOperatorX::get_block(RuntimeState* state, vectoriz auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); _create_mutable_cols(local_state, block); - auto st = std::visit( - [&](auto&& arg) -> Status { - using HashTableCtxType = std::decay_t; - if constexpr (!std::is_same_v) { - return _get_data_in_hashtable(local_state, arg, block, - state->batch_size(), eos); - } else { - LOG(FATAL) << "FATAL: uninited hash table"; - __builtin_unreachable(); - } - }, - local_state._shared_state->hash_table_variants->method_variant); - RETURN_IF_ERROR(st); - RETURN_IF_ERROR(vectorized::VExprContext::filter_block(local_state._conjuncts, block, - block->columns())); + { + SCOPED_TIMER(local_state._get_data_timer); + RETURN_IF_ERROR(std::visit( + [&](auto&& arg) -> Status { + using HashTableCtxType = std::decay_t; + if constexpr (!std::is_same_v) { + return _get_data_in_hashtable(local_state, arg, block, + state->batch_size(), eos); + } else { + LOG(FATAL) << "FATAL: uninited hash table"; + __builtin_unreachable(); + } + }, + local_state._shared_state->hash_table_variants->method_variant)); + } + { + SCOPED_TIMER(local_state._filter_timer); + RETURN_IF_ERROR(vectorized::VExprContext::filter_block(local_state._conjuncts, block, + block->columns())); + } local_state.reached_limit(block, eos); return Status::OK(); } @@ -115,7 +122,7 @@ template Status SetSourceOperatorX::_get_data_in_hashtable( SetSourceLocalState& local_state, HashTableContext& hash_table_ctx, vectorized::Block* output_block, const int batch_size, bool* eos) { - int left_col_len = local_state._left_table_data_types.size(); + size_t left_col_len = local_state._left_table_data_types.size(); hash_table_ctx.init_iterator(); auto& iter = hash_table_ctx.iterator; auto block_size = 0; diff --git a/be/src/pipeline/exec/set_source_operator.h b/be/src/pipeline/exec/set_source_operator.h index 5157a2f9c979fe6..976ffde3bf23eae 100644 --- a/be/src/pipeline/exec/set_source_operator.h +++ b/be/src/pipeline/exec/set_source_operator.h @@ -26,7 +26,7 @@ namespace doris { class RuntimeState; namespace pipeline { - +#include "common/compile_check_begin.h" template class SetSourceOperatorX; @@ -46,6 +46,9 @@ class SetSourceLocalState final : public PipelineXLocalState { std::vector _mutable_cols; //record build column type vectorized::DataTypes _left_table_data_types; + + RuntimeProfile::Counter* _get_data_timer = nullptr; + RuntimeProfile::Counter* _filter_timer = nullptr; }; template @@ -82,8 +85,8 @@ class SetSourceOperatorX final : public OperatorX& local_state, RowRefListWithFlags& value, int& block_size); - const int _child_quantity; + const size_t _child_quantity; }; - +#include "common/compile_check_end.h" } // namespace pipeline } // namespace doris diff --git a/be/src/pipeline/exec/sort_sink_operator.cpp b/be/src/pipeline/exec/sort_sink_operator.cpp index 6d6684437b81249..faec4961af93b7f 100644 --- a/be/src/pipeline/exec/sort_sink_operator.cpp +++ b/be/src/pipeline/exec/sort_sink_operator.cpp @@ -32,6 +32,8 @@ Status SortSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) { SCOPED_TIMER(_init_timer); _sort_blocks_memory_usage = ADD_COUNTER_WITH_LEVEL(_profile, "MemoryUsageSortBlocks", TUnit::BYTES, 1); + _append_blocks_timer = ADD_TIMER(profile(), "AppendBlockTime"); + _update_runtime_predicate_timer = ADD_TIMER(profile(), "UpdateRuntimePredicateTime"); return Status::OK(); } @@ -119,7 +121,10 @@ Status SortSinkOperatorX::sink(doris::RuntimeState* state, vectorized::Block* in SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); if (in_block->rows() > 0) { - RETURN_IF_ERROR(local_state._shared_state->sorter->append_block(in_block)); + { + SCOPED_TIMER(local_state._append_blocks_timer); + RETURN_IF_ERROR(local_state._shared_state->sorter->append_block(in_block)); + } int64_t data_size = local_state._shared_state->sorter->data_size(); COUNTER_SET(local_state._sort_blocks_memory_usage, data_size); COUNTER_SET(local_state._memory_used_counter, data_size); @@ -128,6 +133,7 @@ Status SortSinkOperatorX::sink(doris::RuntimeState* state, vectorized::Block* in RETURN_IF_CANCELLED(state); if (state->get_query_ctx()->has_runtime_predicate(_node_id)) { + SCOPED_TIMER(local_state._update_runtime_predicate_timer); auto& predicate = state->get_query_ctx()->get_runtime_predicate(_node_id); if (predicate.enable()) { vectorized::Field new_top = local_state._shared_state->sorter->get_top_value(); diff --git a/be/src/pipeline/exec/sort_sink_operator.h b/be/src/pipeline/exec/sort_sink_operator.h index 0829c38b40f0b58..6bf87164e71026e 100644 --- a/be/src/pipeline/exec/sort_sink_operator.h +++ b/be/src/pipeline/exec/sort_sink_operator.h @@ -46,6 +46,8 @@ class SortSinkLocalState : public PipelineXSinkLocalState { // topn top value vectorized::Field old_top {vectorized::Field::Types::Null}; + RuntimeProfile::Counter* _append_blocks_timer = nullptr; + RuntimeProfile::Counter* _update_runtime_predicate_timer = nullptr; }; class SortSinkOperatorX final : public DataSinkOperatorX { @@ -73,7 +75,6 @@ class SortSinkOperatorX final : public DataSinkOperatorX { return {ExchangeType::NOOP}; } } - bool require_shuffled_data_distribution() const override { return _is_analytic_sort; } bool require_data_distribution() const override { return _is_colocate; } size_t get_revocable_mem_size(RuntimeState* state) const; diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.cpp b/be/src/pipeline/exec/streaming_aggregation_operator.cpp index 603a1a216103eed..cf5071d62e47378 100644 --- a/be/src/pipeline/exec/streaming_aggregation_operator.cpp +++ b/be/src/pipeline/exec/streaming_aggregation_operator.cpp @@ -22,6 +22,7 @@ #include #include +#include "common/cast_set.h" #include "common/compiler_util.h" // IWYU pragma: keep #include "pipeline/exec/operator.h" #include "vec/exprs/vectorized_agg_fn.h" @@ -93,25 +94,18 @@ Status StreamingAggLocalState::init(RuntimeState* state, LocalStateInfo& info) { "MemoryUsageSerializeKeyArena", TUnit::BYTES, "", 1); _build_timer = ADD_TIMER(Base::profile(), "BuildTime"); - _build_table_convert_timer = ADD_TIMER(Base::profile(), "BuildConvertToPartitionedTime"); - _serialize_key_timer = ADD_TIMER(Base::profile(), "SerializeKeyTime"); - _exec_timer = ADD_TIMER(Base::profile(), "ExecTime"); _merge_timer = ADD_TIMER(Base::profile(), "MergeTime"); _expr_timer = ADD_TIMER(Base::profile(), "ExprTime"); - _serialize_data_timer = ADD_TIMER(Base::profile(), "SerializeDataTime"); + _insert_values_to_column_timer = ADD_TIMER(Base::profile(), "InsertValuesToColumnTime"); _deserialize_data_timer = ADD_TIMER(Base::profile(), "DeserializeAndMergeTime"); _hash_table_compute_timer = ADD_TIMER(Base::profile(), "HashTableComputeTime"); _hash_table_emplace_timer = ADD_TIMER(Base::profile(), "HashTableEmplaceTime"); _hash_table_input_counter = ADD_COUNTER(Base::profile(), "HashTableInputCount", TUnit::UNIT); - _max_row_size_counter = ADD_COUNTER(Base::profile(), "MaxRowSizeInBytes", TUnit::UNIT); _hash_table_size_counter = ADD_COUNTER(profile(), "HashTableSize", TUnit::UNIT); - _queue_byte_size_counter = ADD_COUNTER(profile(), "MaxSizeInBlockQueue", TUnit::BYTES); - _queue_size_counter = ADD_COUNTER(profile(), "MaxSizeOfBlockQueue", TUnit::UNIT); _streaming_agg_timer = ADD_TIMER(profile(), "StreamingAggTime"); _build_timer = ADD_TIMER(profile(), "BuildTime"); _expr_timer = ADD_TIMER(Base::profile(), "ExprTime"); _get_results_timer = ADD_TIMER(profile(), "GetResultsTime"); - _serialize_result_timer = ADD_TIMER(profile(), "SerializeResultTime"); _hash_table_iterate_timer = ADD_TIMER(profile(), "HashTableIterateTime"); _insert_keys_to_column_timer = ADD_TIMER(profile(), "InsertKeysToColumnTime"); @@ -679,7 +673,7 @@ Status StreamingAggLocalState::_pre_agg_with_serialized_key(doris::vectorized::B } for (int i = 0; i != _aggregate_evaluators.size(); ++i) { - SCOPED_TIMER(_serialize_data_timer); + SCOPED_TIMER(_insert_values_to_column_timer); RETURN_IF_ERROR( _aggregate_evaluators[i]->streaming_agg_serialize_to_column( in_block, value_columns[i], rows, @@ -848,12 +842,12 @@ Status StreamingAggLocalState::_get_with_serialized_key_result(RuntimeState* sta return Status::OK(); } -Status StreamingAggLocalState::_serialize_without_key(RuntimeState* state, vectorized::Block* block, - bool* eos) { +Status StreamingAggLocalState::_get_results_without_key(RuntimeState* state, + vectorized::Block* block, bool* eos) { // 1. `child(0)->rows_returned() == 0` mean not data from child // in level two aggregation node should return NULL result // level one aggregation node set `eos = true` return directly - SCOPED_TIMER(_serialize_result_timer); + SCOPED_TIMER(_get_results_timer); if (UNLIKELY(_input_num_rows == 0)) { *eos = true; return Status::OK(); @@ -892,10 +886,10 @@ Status StreamingAggLocalState::_serialize_without_key(RuntimeState* state, vecto return Status::OK(); } -Status StreamingAggLocalState::_serialize_with_serialized_key_result(RuntimeState* state, - vectorized::Block* block, - bool* eos) { - SCOPED_TIMER(_serialize_result_timer); +Status StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* state, + vectorized::Block* block, + bool* eos) { + SCOPED_TIMER(_get_results_timer); auto& p = _parent->cast(); int key_size = _probe_expr_ctxs.size(); int agg_size = _aggregate_evaluators.size(); @@ -914,7 +908,6 @@ Status StreamingAggLocalState::_serialize_with_serialized_key_result(RuntimeStat } } - SCOPED_TIMER(_get_results_timer); std::visit( vectorized::Overload { [&](std::monostate& arg) -> void { @@ -970,7 +963,7 @@ Status StreamingAggLocalState::_serialize_with_serialized_key_result(RuntimeStat } { - SCOPED_TIMER(_serialize_data_timer); + SCOPED_TIMER(_insert_values_to_column_timer); for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) { value_data_types[i] = _aggregate_evaluators[i]->function()->get_serialized_type(); @@ -1152,7 +1145,7 @@ Status StreamingAggOperatorX::init(const TPlanNode& tnode, RuntimeState* state) RETURN_IF_ERROR(vectorized::AggFnEvaluator::create( _pool, tnode.agg_node.aggregate_functions[i], tnode.agg_node.__isset.agg_sort_infos ? tnode.agg_node.agg_sort_infos[i] : dummy, - &evaluator)); + tnode.agg_node.grouping_exprs.empty(), &evaluator)); _aggregate_evaluators.push_back(evaluator); } @@ -1225,7 +1218,8 @@ Status StreamingAggOperatorX::open(RuntimeState* state) { // check output type if (_needs_finalize) { RETURN_IF_ERROR(vectorized::AggFnEvaluator::check_agg_fn_output( - _probe_expr_ctxs.size(), _aggregate_evaluators, _agg_fn_output_row_descriptor)); + cast_set(_probe_expr_ctxs.size()), _aggregate_evaluators, + _agg_fn_output_row_descriptor)); } RETURN_IF_ERROR(vectorized::VExpr::open(_probe_expr_ctxs, state)); diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.h b/be/src/pipeline/exec/streaming_aggregation_operator.h index 9a84b694635a462..b695880ac2857bd 100644 --- a/be/src/pipeline/exec/streaming_aggregation_operator.h +++ b/be/src/pipeline/exec/streaming_aggregation_operator.h @@ -65,11 +65,11 @@ class StreamingAggLocalState final : public PipelineXLocalState void _update_memusage_with_serialized_key(); Status _init_hash_method(const vectorized::VExprContextSPtrs& probe_exprs); Status _get_without_key_result(RuntimeState* state, vectorized::Block* block, bool* eos); - Status _serialize_without_key(RuntimeState* state, vectorized::Block* block, bool* eos); + Status _get_results_without_key(RuntimeState* state, vectorized::Block* block, bool* eos); Status _get_with_serialized_key_result(RuntimeState* state, vectorized::Block* block, bool* eos); - Status _serialize_with_serialized_key_result(RuntimeState* state, vectorized::Block* block, - bool* eos); + Status _get_results_with_serialized_key(RuntimeState* state, vectorized::Block* block, + bool* eos); template Status _merge_with_serialized_key_helper(vectorized::Block* block); @@ -83,25 +83,19 @@ class StreamingAggLocalState final : public PipelineXLocalState Status _create_agg_status(vectorized::AggregateDataPtr data); size_t _get_hash_table_size(); - RuntimeProfile::Counter* _queue_byte_size_counter = nullptr; - RuntimeProfile::Counter* _queue_size_counter = nullptr; RuntimeProfile::Counter* _streaming_agg_timer = nullptr; RuntimeProfile::Counter* _hash_table_compute_timer = nullptr; RuntimeProfile::Counter* _hash_table_emplace_timer = nullptr; RuntimeProfile::Counter* _hash_table_input_counter = nullptr; RuntimeProfile::Counter* _build_timer = nullptr; RuntimeProfile::Counter* _expr_timer = nullptr; - RuntimeProfile::Counter* _build_table_convert_timer = nullptr; - RuntimeProfile::Counter* _serialize_key_timer = nullptr; RuntimeProfile::Counter* _merge_timer = nullptr; - RuntimeProfile::Counter* _serialize_data_timer = nullptr; + RuntimeProfile::Counter* _insert_values_to_column_timer = nullptr; RuntimeProfile::Counter* _deserialize_data_timer = nullptr; - RuntimeProfile::Counter* _max_row_size_counter = nullptr; RuntimeProfile::Counter* _hash_table_memory_usage = nullptr; RuntimeProfile::HighWaterMarkCounter* _serialize_key_arena_memory_usage = nullptr; RuntimeProfile::Counter* _hash_table_size_counter = nullptr; RuntimeProfile::Counter* _get_results_timer = nullptr; - RuntimeProfile::Counter* _serialize_result_timer = nullptr; RuntimeProfile::Counter* _hash_table_iterate_timer = nullptr; RuntimeProfile::Counter* _insert_keys_to_column_timer = nullptr; @@ -136,13 +130,13 @@ class StreamingAggLocalState final : public PipelineXLocalState if constexpr (NeedFinalize) { return local_state->_get_without_key_result(state, block, eos); } else { - return local_state->_serialize_without_key(state, block, eos); + return local_state->_get_results_without_key(state, block, eos); } } else { if constexpr (NeedFinalize) { return local_state->_get_with_serialized_key_result(state, block, eos); } else { - return local_state->_serialize_with_serialized_key_result(state, block, eos); + return local_state->_get_results_with_serialized_key(state, block, eos); } } } diff --git a/be/src/pipeline/exec/table_function_operator.cpp b/be/src/pipeline/exec/table_function_operator.cpp index 38e69f7cb0e897e..c1621470f435b4f 100644 --- a/be/src/pipeline/exec/table_function_operator.cpp +++ b/be/src/pipeline/exec/table_function_operator.cpp @@ -32,6 +32,18 @@ namespace doris::pipeline { TableFunctionLocalState::TableFunctionLocalState(RuntimeState* state, OperatorXBase* parent) : PipelineXLocalState<>(state, parent), _child_block(vectorized::Block::create_unique()) {} +Status TableFunctionLocalState::init(RuntimeState* state, LocalStateInfo& info) { + RETURN_IF_ERROR(PipelineXLocalState<>::init(state, info)); + SCOPED_TIMER(exec_time_counter()); + SCOPED_TIMER(_init_timer); + _init_function_timer = ADD_TIMER(_runtime_profile, "InitTableFunctionTime"); + _process_rows_timer = ADD_TIMER(_runtime_profile, "ProcessRowsTime"); + _copy_data_timer = ADD_TIMER(_runtime_profile, "CopyDataTime"); + _filter_timer = ADD_TIMER(_runtime_profile, "FilterTime"); + _repeat_data_timer = ADD_TIMER(_runtime_profile, "RepeatDataTime"); + return Status::OK(); +} + Status TableFunctionLocalState::open(RuntimeState* state) { SCOPED_TIMER(PipelineXLocalState<>::exec_time_counter()); SCOPED_TIMER(PipelineXLocalState<>::_open_timer); @@ -59,6 +71,7 @@ void TableFunctionLocalState::_copy_output_slots( if (!_current_row_insert_times) { return; } + SCOPED_TIMER(_copy_data_timer); auto& p = _parent->cast(); for (auto index : p._output_slot_indexs) { auto src_column = _child_block->get_by_position(index).column; @@ -197,15 +210,18 @@ Status TableFunctionLocalState::get_expanded_block(RuntimeState* state, columns[index]->insert_many_defaults(row_size - columns[index]->size()); } - // 3. eval conjuncts - RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_conjuncts, output_block, - output_block->columns())); + { + SCOPED_TIMER(_filter_timer); // 3. eval conjuncts + RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_conjuncts, output_block, + output_block->columns())); + } *eos = _child_eos && _cur_child_offset == -1; return Status::OK(); } void TableFunctionLocalState::process_next_child_row() { + SCOPED_TIMER(_process_rows_timer); _cur_child_offset++; if (_cur_child_offset >= _child_block->rows()) { diff --git a/be/src/pipeline/exec/table_function_operator.h b/be/src/pipeline/exec/table_function_operator.h index 75b1608fad7112a..81160acb7f7611c 100644 --- a/be/src/pipeline/exec/table_function_operator.h +++ b/be/src/pipeline/exec/table_function_operator.h @@ -37,6 +37,7 @@ class TableFunctionLocalState final : public PipelineXLocalState<> { TableFunctionLocalState(RuntimeState* state, OperatorXBase* parent); ~TableFunctionLocalState() override = default; + Status init(RuntimeState* state, LocalStateInfo& infos) override; Status open(RuntimeState* state) override; Status close(RuntimeState* state) override { for (auto* fn : _fns) { @@ -67,6 +68,12 @@ class TableFunctionLocalState final : public PipelineXLocalState<> { std::unique_ptr _child_block; int _current_row_insert_times = 0; bool _child_eos = false; + + RuntimeProfile::Counter* _init_function_timer = nullptr; + RuntimeProfile::Counter* _process_rows_timer = nullptr; + RuntimeProfile::Counter* _copy_data_timer = nullptr; + RuntimeProfile::Counter* _filter_timer = nullptr; + RuntimeProfile::Counter* _repeat_data_timer = nullptr; }; class TableFunctionOperatorX final : public StatefulOperatorX { @@ -93,6 +100,7 @@ class TableFunctionOperatorX final : public StatefulOperatorXprocess_init(input_block, state)); } local_state.process_next_child_row(); diff --git a/be/src/pipeline/exec/union_sink_operator.cpp b/be/src/pipeline/exec/union_sink_operator.cpp index 288fc131037fabb..8467eeb1d5467a6 100644 --- a/be/src/pipeline/exec/union_sink_operator.cpp +++ b/be/src/pipeline/exec/union_sink_operator.cpp @@ -32,6 +32,7 @@ Status UnionSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) RETURN_IF_ERROR(Base::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); + _expr_timer = ADD_TIMER(_profile, "ExprTime"); auto& p = _parent->cast(); _shared_state->data_queue.set_sink_dependency(_dependency, p._cur_child_id); return Status::OK(); diff --git a/be/src/pipeline/exec/union_sink_operator.h b/be/src/pipeline/exec/union_sink_operator.h index f939950143ae920..aa94ed9a73038fb 100644 --- a/be/src/pipeline/exec/union_sink_operator.h +++ b/be/src/pipeline/exec/union_sink_operator.h @@ -55,6 +55,7 @@ class UnionSinkLocalState final : public PipelineXSinkLocalState { @@ -136,6 +137,7 @@ class UnionSinkOperatorX final : public DataSinkOperatorX { Status materialize_block(RuntimeState* state, vectorized::Block* src_block, int child_idx, vectorized::Block* res_block) { auto& local_state = get_local_state(state); + SCOPED_TIMER(local_state._expr_timer); const auto& child_exprs = local_state._child_expr; vectorized::ColumnsWithTypeAndName colunms; for (size_t i = 0; i < child_exprs.size(); ++i) { diff --git a/be/src/pipeline/local_exchange/local_exchange_source_operator.h b/be/src/pipeline/local_exchange/local_exchange_source_operator.h index c0da5c8120c1e93..3c706d50182538d 100644 --- a/be/src/pipeline/local_exchange/local_exchange_source_operator.h +++ b/be/src/pipeline/local_exchange/local_exchange_source_operator.h @@ -81,9 +81,6 @@ class LocalExchangeSourceOperatorX final : public OperatorX void Exchanger::_enqueue_data_and_set_ready(int channel_id, LocalExchangeSinkLocalState& local_state, @@ -170,11 +171,11 @@ Status ShuffleExchanger::get_block(RuntimeState* state, vectorized::Block* block Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __restrict channel_ids, vectorized::Block* block, LocalExchangeSinkLocalState& local_state) { - const auto rows = block->rows(); + const auto rows = cast_set(block->rows()); auto row_idx = std::make_shared>(rows); { local_state._partition_rows_histogram.assign(_num_partitions + 1, 0); - for (size_t i = 0; i < rows; ++i) { + for (int32_t i = 0; i < rows; ++i) { local_state._partition_rows_histogram[channel_ids[i]]++; } for (int32_t i = 1; i <= _num_partitions; ++i) { @@ -212,7 +213,7 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest */ const auto& map = local_state._parent->cast() ._shuffle_idx_to_instance_idx; - new_block_wrapper->ref(map.size()); + new_block_wrapper->ref(cast_set(map.size())); for (const auto& it : map) { DCHECK(it.second >= 0 && it.second < _num_partitions) << it.first << " : " << it.second << " " << _num_partitions; @@ -225,7 +226,7 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest new_block_wrapper->unref(local_state._shared_state, local_state._channel_id); } } - } else if (_num_senders != _num_sources || _ignore_source_data_distribution) { + } else if (_num_senders != _num_sources) { // In this branch, data just should be distributed equally into all instances. new_block_wrapper->ref(_num_partitions); for (size_t i = 0; i < _num_partitions; i++) { @@ -241,7 +242,7 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest } else { DCHECK(!bucket_seq_to_instance_idx.empty()); new_block_wrapper->ref(_num_partitions); - for (size_t i = 0; i < _num_partitions; i++) { + for (int i = 0; i < _num_partitions; i++) { uint32_t start = local_state._partition_rows_histogram[i]; uint32_t size = local_state._partition_rows_histogram[i + 1] - start; if (size > 0) { @@ -426,7 +427,7 @@ Status BroadcastExchanger::sink(RuntimeState* state, vectorized::Block* in_block local_state._shared_state->add_total_mem_usage(wrapper->data_block.allocated_bytes(), local_state._channel_id); wrapper->ref(_num_partitions); - for (size_t i = 0; i < _num_partitions; i++) { + for (int i = 0; i < _num_partitions; i++) { _enqueue_data_and_set_ready(i, local_state, {wrapper, {0, wrapper->data_block.rows()}}); } @@ -500,11 +501,11 @@ Status AdaptivePassthroughExchanger::_split_rows(RuntimeState* state, const uint32_t* __restrict channel_ids, vectorized::Block* block, LocalExchangeSinkLocalState& local_state) { - const auto rows = block->rows(); + const auto rows = cast_set(block->rows()); auto row_idx = std::make_shared>(rows); { local_state._partition_rows_histogram.assign(_num_partitions + 1, 0); - for (size_t i = 0; i < rows; ++i) { + for (int32_t i = 0; i < rows; ++i) { local_state._partition_rows_histogram[channel_ids[i]]++; } for (int32_t i = 1; i <= _num_partitions; ++i) { @@ -517,7 +518,7 @@ Status AdaptivePassthroughExchanger::_split_rows(RuntimeState* state, local_state._partition_rows_histogram[channel_ids[i]]--; } } - for (size_t i = 0; i < _num_partitions; i++) { + for (int32_t i = 0; i < _num_partitions; i++) { const size_t start = local_state._partition_rows_histogram[i]; const size_t size = local_state._partition_rows_histogram[i + 1] - start; if (size > 0) { diff --git a/be/src/pipeline/local_exchange/local_exchanger.h b/be/src/pipeline/local_exchange/local_exchanger.h index 01b55816ba8aad4..bf052ac3b924ca5 100644 --- a/be/src/pipeline/local_exchange/local_exchanger.h +++ b/be/src/pipeline/local_exchange/local_exchanger.h @@ -21,6 +21,7 @@ #include "pipeline/exec/operator.h" namespace doris::pipeline { +#include "common/compile_check_begin.h" class LocalExchangeSourceLocalState; class LocalExchangeSinkLocalState; @@ -217,24 +218,21 @@ class ShuffleExchanger : public Exchanger { protected: ShuffleExchanger(int running_sink_operators, int num_sources, int num_partitions, - bool ignore_source_data_distribution, int free_block_limit) + int free_block_limit) : Exchanger(running_sink_operators, num_sources, num_partitions, - free_block_limit), - _ignore_source_data_distribution(ignore_source_data_distribution) { + free_block_limit) { _data_queue.resize(num_partitions); } Status _split_rows(RuntimeState* state, const uint32_t* __restrict channel_ids, vectorized::Block* block, LocalExchangeSinkLocalState& local_state); - - const bool _ignore_source_data_distribution = false; }; class BucketShuffleExchanger final : public ShuffleExchanger { ENABLE_FACTORY_CREATOR(BucketShuffleExchanger); BucketShuffleExchanger(int running_sink_operators, int num_sources, int num_partitions, - bool ignore_source_data_distribution, int free_block_limit) + int free_block_limit) : ShuffleExchanger(running_sink_operators, num_sources, num_partitions, - ignore_source_data_distribution, free_block_limit) {} + free_block_limit) {} ~BucketShuffleExchanger() override = default; ExchangeType get_type() const override { return ExchangeType::BUCKET_HASH_SHUFFLE; } }; @@ -351,5 +349,5 @@ class AdaptivePassthroughExchanger : public Exchanger { std::atomic_bool _is_pass_through = false; std::atomic_int32_t _total_block = 0; }; - +#include "common/compile_check_end.h" } // namespace doris::pipeline diff --git a/be/src/pipeline/pipeline.cpp b/be/src/pipeline/pipeline.cpp index 5b93fbdf1f8480c..96da754daa5d983 100644 --- a/be/src/pipeline/pipeline.cpp +++ b/be/src/pipeline/pipeline.cpp @@ -39,6 +39,7 @@ bool Pipeline::need_to_local_exchange(const DataDistribution target_data_distrib [&](OperatorPtr op) -> bool { return op->is_serial_operator(); })) { return false; } + // If all operators are serial and sink is not serial, we should improve parallelism for sink. if (std::all_of(_operators.begin(), _operators.end(), [&](OperatorPtr op) -> bool { return op->is_serial_operator(); })) { if (!_sink->is_serial_operator()) { @@ -46,21 +47,22 @@ bool Pipeline::need_to_local_exchange(const DataDistribution target_data_distrib } } else if (std::any_of(_operators.begin(), _operators.end(), [&](OperatorPtr op) -> bool { return op->is_serial_operator(); })) { + // If non-serial operators exist, we should improve parallelism for those. return true; } if (target_data_distribution.distribution_type != ExchangeType::BUCKET_HASH_SHUFFLE && target_data_distribution.distribution_type != ExchangeType::HASH_SHUFFLE) { + // Always do local exchange if non-hash-partition exchanger is required. + // For example, `PASSTHROUGH` exchanger is always required to distribute data evenly. return true; - } else if (_operators.front()->ignore_data_hash_distribution()) { - if (_data_distribution.distribution_type == target_data_distribution.distribution_type && - (_data_distribution.partition_exprs.empty() || - target_data_distribution.partition_exprs.empty())) { - return true; - } - return _data_distribution.distribution_type != target_data_distribution.distribution_type && - !(is_hash_exchange(_data_distribution.distribution_type) && - is_hash_exchange(target_data_distribution.distribution_type)); + } else if (_operators.front()->is_serial_operator()) { + DCHECK(std::all_of(_operators.begin(), _operators.end(), + [&](OperatorPtr op) -> bool { return op->is_serial_operator(); }) && + _sink->is_serial_operator()) + << debug_string(); + // All operators and sink are serial in this path. + return false; } else { return _data_distribution.distribution_type != target_data_distribution.distribution_type && !(is_hash_exchange(_data_distribution.distribution_type) && @@ -71,7 +73,6 @@ bool Pipeline::need_to_local_exchange(const DataDistribution target_data_distrib Status Pipeline::add_operator(OperatorPtr& op, const int parallelism) { if (parallelism > 0 && op->is_serial_operator()) { set_num_tasks(parallelism); - op->set_ignore_data_distribution(); } op->set_parallel_tasks(num_tasks()); _operators.emplace_back(op); diff --git a/be/src/pipeline/pipeline.h b/be/src/pipeline/pipeline.h index ef0ae9e9a75aa23..b969186b178bf7e 100644 --- a/be/src/pipeline/pipeline.h +++ b/be/src/pipeline/pipeline.h @@ -25,12 +25,13 @@ #include #include +#include "common/cast_set.h" #include "common/status.h" #include "pipeline/exec/operator.h" #include "util/runtime_profile.h" namespace doris::pipeline { - +#include "common/compile_check_begin.h" class PipelineFragmentContext; class Pipeline; @@ -43,8 +44,7 @@ class Pipeline : public std::enable_shared_from_this { friend class PipelineFragmentContext; public: - explicit Pipeline(PipelineId pipeline_id, int num_tasks, - std::weak_ptr context, int num_tasks_of_parent) + explicit Pipeline(PipelineId pipeline_id, int num_tasks, int num_tasks_of_parent) : _pipeline_id(pipeline_id), _num_tasks(num_tasks), _num_tasks_of_parent(num_tasks_of_parent) { @@ -85,7 +85,9 @@ class Pipeline : public std::enable_shared_from_this { std::vector>& children() { return _children; } void set_children(std::shared_ptr child) { _children.push_back(child); } - void set_children(std::vector> children) { _children = children; } + void set_children(std::vector> children) { + _children = std::move(children); + } void incr_created_tasks(int i, PipelineTask* task) { _num_tasks_created++; @@ -114,15 +116,16 @@ class Pipeline : public std::enable_shared_from_this { int num_tasks() const { return _num_tasks; } bool close_task() { return _num_tasks_running.fetch_sub(1) == 1; } - std::string debug_string() { + std::string debug_string() const { fmt::memory_buffer debug_string_buffer; fmt::format_to(debug_string_buffer, "Pipeline [id: {}, _num_tasks: {}, _num_tasks_created: {}]", _pipeline_id, _num_tasks, _num_tasks_created); - for (size_t i = 0; i < _operators.size(); i++) { + for (int i = 0; i < _operators.size(); i++) { fmt::format_to(debug_string_buffer, "\n{}", _operators[i]->debug_string(i)); } - fmt::format_to(debug_string_buffer, "\n{}", _sink->debug_string(_operators.size())); + fmt::format_to(debug_string_buffer, "\n{}", + _sink->debug_string(cast_set(_operators.size()))); return fmt::to_string(debug_string_buffer); } @@ -168,5 +171,5 @@ class Pipeline : public std::enable_shared_from_this { // Parallelism of parent pipeline. const int _num_tasks_of_parent; }; - +#include "common/compile_check_end.h" } // namespace doris::pipeline diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index fd3baefa76f9c71..d14a0d0c3cd4a7a 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -215,7 +215,6 @@ PipelinePtr PipelineFragmentContext::add_pipeline(PipelinePtr parent, int idx) { PipelineId id = _next_pipeline_id++; auto pipeline = std::make_shared( id, parent ? std::min(parent->num_tasks(), _num_instances) : _num_instances, - std::dynamic_pointer_cast(shared_from_this()), parent ? parent->num_tasks() : _num_instances); if (idx >= 0) { _pipelines.insert(_pipelines.begin() + idx, pipeline); @@ -236,8 +235,6 @@ Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& re if (request.__isset.query_options && request.query_options.__isset.execution_timeout) { _timeout = request.query_options.execution_timeout; } - _use_serial_source = - request.fragment.__isset.use_serial_source && request.fragment.use_serial_source; _fragment_level_profile = std::make_unique("PipelineContext"); _prepare_timer = ADD_TIMER(_fragment_level_profile, "PrepareTime"); @@ -255,11 +252,6 @@ Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& re auto* fragment_context = this; - LOG_INFO("PipelineFragmentContext::prepare") - .tag("query_id", print_id(_query_id)) - .tag("fragment_id", _fragment_id) - .tag("pthread_id", (uintptr_t)pthread_self()); - if (request.query_options.__isset.is_report_success) { fragment_context->set_is_report_success(request.query_options.is_report_success); } @@ -704,6 +696,9 @@ Status PipelineFragmentContext::_create_tree_helper(ObjectPool* pool, (followed_by_shuffled_operator || op->is_shuffled_operator()) && require_shuffled_data_distribution; + if (num_children == 0) { + _use_serial_source = op->is_serial_operator(); + } // rely on that tnodes is preorder of the plan for (int i = 0; i < num_children; i++) { ++*node_idx; @@ -736,8 +731,7 @@ Status PipelineFragmentContext::_add_local_exchange_impl( int idx, ObjectPool* pool, PipelinePtr cur_pipe, PipelinePtr new_pip, DataDistribution data_distribution, bool* do_local_exchange, int num_buckets, const std::map& bucket_seq_to_instance_idx, - const std::map& shuffle_idx_to_instance_idx, - const bool ignore_data_hash_distribution) { + const std::map& shuffle_idx_to_instance_idx) { auto& operators = cur_pipe->operators(); const auto downstream_pipeline_id = cur_pipe->id(); auto local_exchange_id = next_operator_id(); @@ -778,29 +772,32 @@ Status PipelineFragmentContext::_add_local_exchange_impl( std::max(cur_pipe->num_tasks(), _num_instances), use_global_hash_shuffle ? _total_instances : _num_instances, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit + ? cast_set( + _runtime_state->query_options().local_exchange_free_blocks_limit) : 0); break; case ExchangeType::BUCKET_HASH_SHUFFLE: shared_state->exchanger = BucketShuffleExchanger::create_unique( std::max(cur_pipe->num_tasks(), _num_instances), _num_instances, num_buckets, - ignore_data_hash_distribution, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit + ? cast_set( + _runtime_state->query_options().local_exchange_free_blocks_limit) : 0); break; case ExchangeType::PASSTHROUGH: shared_state->exchanger = PassthroughExchanger::create_unique( cur_pipe->num_tasks(), _num_instances, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit + ? cast_set( + _runtime_state->query_options().local_exchange_free_blocks_limit) : 0); break; case ExchangeType::BROADCAST: shared_state->exchanger = BroadcastExchanger::create_unique( cur_pipe->num_tasks(), _num_instances, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit + ? cast_set( + _runtime_state->query_options().local_exchange_free_blocks_limit) : 0); break; case ExchangeType::PASS_TO_ONE: @@ -809,13 +806,15 @@ Status PipelineFragmentContext::_add_local_exchange_impl( shared_state->exchanger = PassToOneExchanger::create_unique( cur_pipe->num_tasks(), _num_instances, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit + ? cast_set(_runtime_state->query_options() + .local_exchange_free_blocks_limit) : 0); } else { shared_state->exchanger = BroadcastExchanger::create_unique( cur_pipe->num_tasks(), _num_instances, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit + ? cast_set(_runtime_state->query_options() + .local_exchange_free_blocks_limit) : 0); } break; @@ -830,7 +829,8 @@ Status PipelineFragmentContext::_add_local_exchange_impl( shared_state->exchanger = LocalMergeSortExchanger::create_unique( sort_source, cur_pipe->num_tasks(), _num_instances, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit + ? cast_set( + _runtime_state->query_options().local_exchange_free_blocks_limit) : 0); break; } @@ -838,7 +838,8 @@ Status PipelineFragmentContext::_add_local_exchange_impl( shared_state->exchanger = AdaptivePassthroughExchanger::create_unique( cur_pipe->num_tasks(), _num_instances, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit + ? cast_set( + _runtime_state->query_options().local_exchange_free_blocks_limit) : 0); break; default: @@ -914,8 +915,7 @@ Status PipelineFragmentContext::_add_local_exchange( int pip_idx, int idx, int node_id, ObjectPool* pool, PipelinePtr cur_pipe, DataDistribution data_distribution, bool* do_local_exchange, int num_buckets, const std::map& bucket_seq_to_instance_idx, - const std::map& shuffle_idx_to_instance_idx, - const bool ignore_data_distribution) { + const std::map& shuffle_idx_to_instance_idx) { if (_num_instances <= 1 || cur_pipe->num_tasks_of_parent() <= 1) { return Status::OK(); } @@ -930,7 +930,7 @@ Status PipelineFragmentContext::_add_local_exchange( auto new_pip = add_pipeline(cur_pipe, pip_idx + 1); RETURN_IF_ERROR(_add_local_exchange_impl( idx, pool, cur_pipe, new_pip, data_distribution, do_local_exchange, num_buckets, - bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx, ignore_data_distribution)); + bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx)); CHECK(total_op_num + 1 == cur_pipe->operators().size() + new_pip->operators().size()) << "total_op_num: " << total_op_num @@ -944,7 +944,7 @@ Status PipelineFragmentContext::_add_local_exchange( cast_set(new_pip->operators().size()), pool, new_pip, add_pipeline(new_pip, pip_idx + 2), DataDistribution(ExchangeType::PASSTHROUGH), do_local_exchange, num_buckets, bucket_seq_to_instance_idx, - shuffle_idx_to_instance_idx, ignore_data_distribution)); + shuffle_idx_to_instance_idx)); } return Status::OK(); } @@ -970,13 +970,8 @@ Status PipelineFragmentContext::_plan_local_exchange( // scan node. so here use `_num_instance` to replace the `num_buckets` to prevent dividing 0 // still keep colocate plan after local shuffle RETURN_IF_ERROR(_plan_local_exchange( - _pipelines[pip_idx]->operators().front()->ignore_data_hash_distribution() || - num_buckets == 0 - ? _num_instances - : num_buckets, - pip_idx, _pipelines[pip_idx], bucket_seq_to_instance_idx, - shuffle_idx_to_instance_idx, - _pipelines[pip_idx]->operators().front()->ignore_data_hash_distribution())); + _use_serial_source || num_buckets == 0 ? _num_instances : num_buckets, pip_idx, + _pipelines[pip_idx], bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx)); } return Status::OK(); } @@ -984,8 +979,7 @@ Status PipelineFragmentContext::_plan_local_exchange( Status PipelineFragmentContext::_plan_local_exchange( int num_buckets, int pip_idx, PipelinePtr pip, const std::map& bucket_seq_to_instance_idx, - const std::map& shuffle_idx_to_instance_idx, - const bool ignore_data_hash_distribution) { + const std::map& shuffle_idx_to_instance_idx) { int idx = 1; bool do_local_exchange = false; do { @@ -997,8 +991,7 @@ Status PipelineFragmentContext::_plan_local_exchange( RETURN_IF_ERROR(_add_local_exchange( pip_idx, idx, ops[idx]->node_id(), _runtime_state->obj_pool(), pip, ops[idx]->required_data_distribution(), &do_local_exchange, num_buckets, - bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx, - ignore_data_hash_distribution)); + bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx)); } if (do_local_exchange) { // If local exchange is needed for current operator, we will split this pipeline to @@ -1015,8 +1008,7 @@ Status PipelineFragmentContext::_plan_local_exchange( RETURN_IF_ERROR(_add_local_exchange( pip_idx, idx, pip->sink()->node_id(), _runtime_state->obj_pool(), pip, pip->sink()->required_data_distribution(), &do_local_exchange, num_buckets, - bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx, - ignore_data_hash_distribution)); + bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx)); } return Status::OK(); } @@ -1200,6 +1192,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo std::stringstream error_msg; bool enable_query_cache = request.fragment.__isset.query_cache_param; + bool fe_with_old_version = false; switch (tnode.node_type) { case TPlanNodeType::OLAP_SCAN_NODE: { op.reset(new OlapScanOperatorX( @@ -1207,10 +1200,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo enable_query_cache ? request.fragment.query_cache_param : TQueryCacheParam {})); RETURN_IF_ERROR(cur_pipe->add_operator( op, request.__isset.parallel_instances ? request.parallel_instances : 0)); - if (request.__isset.parallel_instances) { - cur_pipe->set_num_tasks(request.parallel_instances); - op->set_ignore_data_distribution(); - } + fe_with_old_version = !tnode.__isset.is_serial_operator; break; } case TPlanNodeType::GROUP_COMMIT_SCAN_NODE: { @@ -1221,10 +1211,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo op.reset(new GroupCommitOperatorX(pool, tnode, next_operator_id(), descs, _num_instances)); RETURN_IF_ERROR(cur_pipe->add_operator( op, request.__isset.parallel_instances ? request.parallel_instances : 0)); - if (request.__isset.parallel_instances) { - cur_pipe->set_num_tasks(request.parallel_instances); - op->set_ignore_data_distribution(); - } + fe_with_old_version = !tnode.__isset.is_serial_operator; break; } case doris::TPlanNodeType::JDBC_SCAN_NODE: { @@ -1237,20 +1224,14 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo "Jdbc scan node is disabled, you can change be config enable_java_support " "to true and restart be."); } - if (request.__isset.parallel_instances) { - cur_pipe->set_num_tasks(request.parallel_instances); - op->set_ignore_data_distribution(); - } + fe_with_old_version = !tnode.__isset.is_serial_operator; break; } case doris::TPlanNodeType::FILE_SCAN_NODE: { op.reset(new FileScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances)); RETURN_IF_ERROR(cur_pipe->add_operator( op, request.__isset.parallel_instances ? request.parallel_instances : 0)); - if (request.__isset.parallel_instances) { - cur_pipe->set_num_tasks(request.parallel_instances); - op->set_ignore_data_distribution(); - } + fe_with_old_version = !tnode.__isset.is_serial_operator; break; } case TPlanNodeType::ES_SCAN_NODE: @@ -1258,10 +1239,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo op.reset(new EsScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances)); RETURN_IF_ERROR(cur_pipe->add_operator( op, request.__isset.parallel_instances ? request.parallel_instances : 0)); - if (request.__isset.parallel_instances) { - cur_pipe->set_num_tasks(request.parallel_instances); - op->set_ignore_data_distribution(); - } + fe_with_old_version = !tnode.__isset.is_serial_operator; break; } case TPlanNodeType::EXCHANGE_NODE: { @@ -1270,10 +1248,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo op.reset(new ExchangeSourceOperatorX(pool, tnode, next_operator_id(), descs, num_senders)); RETURN_IF_ERROR(cur_pipe->add_operator( op, request.__isset.parallel_instances ? request.parallel_instances : 0)); - if (request.__isset.parallel_instances) { - op->set_ignore_data_distribution(); - cur_pipe->set_num_tasks(request.parallel_instances); - } + fe_with_old_version = !tnode.__isset.is_serial_operator; break; } case TPlanNodeType::AGGREGATION_NODE: { @@ -1635,10 +1610,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo op.reset(new DataGenSourceOperatorX(pool, tnode, next_operator_id(), descs)); RETURN_IF_ERROR(cur_pipe->add_operator( op, request.__isset.parallel_instances ? request.parallel_instances : 0)); - if (request.__isset.parallel_instances) { - cur_pipe->set_num_tasks(request.parallel_instances); - op->set_ignore_data_distribution(); - } + fe_with_old_version = !tnode.__isset.is_serial_operator; break; } case TPlanNodeType::SCHEMA_SCAN_NODE: { @@ -1663,6 +1635,10 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo return Status::InternalError("Unsupported exec type in pipeline: {}", print_plan_node_type(tnode.node_type)); } + if (request.__isset.parallel_instances && fe_with_old_version) { + cur_pipe->set_num_tasks(request.parallel_instances); + op->set_serial_operator(); + } return Status::OK(); } diff --git a/be/src/pipeline/pipeline_fragment_context.h b/be/src/pipeline/pipeline_fragment_context.h index 6caa0e5c1067229..289f5c8236522f9 100644 --- a/be/src/pipeline/pipeline_fragment_context.h +++ b/be/src/pipeline/pipeline_fragment_context.h @@ -153,22 +153,19 @@ class PipelineFragmentContext : public TaskExecutionContext { const std::map& shuffle_idx_to_instance_idx); Status _plan_local_exchange(int num_buckets, int pip_idx, PipelinePtr pip, const std::map& bucket_seq_to_instance_idx, - const std::map& shuffle_idx_to_instance_idx, - const bool ignore_data_distribution); + const std::map& shuffle_idx_to_instance_idx); void _inherit_pipeline_properties(const DataDistribution& data_distribution, PipelinePtr pipe_with_source, PipelinePtr pipe_with_sink); Status _add_local_exchange(int pip_idx, int idx, int node_id, ObjectPool* pool, PipelinePtr cur_pipe, DataDistribution data_distribution, bool* do_local_exchange, int num_buckets, const std::map& bucket_seq_to_instance_idx, - const std::map& shuffle_idx_to_instance_idx, - const bool ignore_data_distribution); + const std::map& shuffle_idx_to_instance_idx); Status _add_local_exchange_impl(int idx, ObjectPool* pool, PipelinePtr cur_pipe, PipelinePtr new_pip, DataDistribution data_distribution, bool* do_local_exchange, int num_buckets, const std::map& bucket_seq_to_instance_idx, - const std::map& shuffle_idx_to_instance_idx, - const bool ignore_data_hash_distribution); + const std::map& shuffle_idx_to_instance_idx); Status _build_pipeline_tasks(const doris::TPipelineFragmentParams& request, ThreadPool* thread_pool); diff --git a/be/src/pipeline/pipeline_task.cpp b/be/src/pipeline/pipeline_task.cpp index e06b8028c9c7308..6f9e59c82919663 100644 --- a/be/src/pipeline/pipeline_task.cpp +++ b/be/src/pipeline/pipeline_task.cpp @@ -181,7 +181,7 @@ void PipelineTask::_init_profile() { _sink_timer = ADD_CHILD_TIMER(_task_profile, "SinkTime", exec_time); _close_timer = ADD_CHILD_TIMER(_task_profile, "CloseTime", exec_time); - _wait_worker_timer = ADD_TIMER(_task_profile, "WaitWorkerTime"); + _wait_worker_timer = ADD_TIMER_WITH_LEVEL(_task_profile, "WaitWorkerTime", 1); _schedule_counts = ADD_COUNTER(_task_profile, "NumScheduleTimes", TUnit::UNIT); _yield_counts = ADD_COUNTER(_task_profile, "NumYieldTimes", TUnit::UNIT); @@ -216,10 +216,6 @@ Status PipelineTask::_open() { return Status::OK(); } -void PipelineTask::set_task_queue(TaskQueue* task_queue) { - _task_queue = task_queue; -} - bool PipelineTask::_wait_to_start() { // Before task starting, we should make sure // 1. Execution dependency is ready (which is controlled by FE 2-phase commit) @@ -247,6 +243,12 @@ bool PipelineTask::_wait_to_start() { } bool PipelineTask::_is_blocked() { + Defer defer([this] { + if (_blocked_dep != nullptr) { + _task_profile->add_info_string("TaskState", "Blocked"); + _task_profile->add_info_string("BlockedByDependency", _blocked_dep->name()); + } + }); // `_dry_run = true` means we do not need data from source operator. if (!_dry_run) { for (int i = _read_dependencies.size() - 1; i >= 0; i--) { @@ -328,6 +330,8 @@ Status PipelineTask::execute(bool* eos) { RETURN_IF_ERROR(_open()); } + _task_profile->add_info_string("TaskState", "Runnable"); + _task_profile->add_info_string("BlockedByDependency", ""); while (!_fragment_context->is_canceled()) { if (_is_blocked()) { return Status::OK(); @@ -391,6 +395,7 @@ Status PipelineTask::execute(bool* eos) { *eos = status.is() ? true : *eos; if (*eos) { // just return, the scheduler will do finish work _eos = true; + _task_profile->add_info_string("TaskState", "Finished"); return Status::OK(); } } diff --git a/be/src/pipeline/pipeline_task.h b/be/src/pipeline/pipeline_task.h index febc9634c49f237..3b4627f589dc543 100644 --- a/be/src/pipeline/pipeline_task.h +++ b/be/src/pipeline/pipeline_task.h @@ -41,7 +41,7 @@ class PipelineFragmentContext; namespace doris::pipeline { -class TaskQueue; +class MultiCoreTaskQueue; class PriorityTaskQueue; class Dependency; @@ -159,8 +159,8 @@ class PipelineTask { } } - void set_task_queue(TaskQueue* task_queue); - TaskQueue* get_task_queue() { return _task_queue; } + void set_task_queue(MultiCoreTaskQueue* task_queue) { _task_queue = task_queue; } + MultiCoreTaskQueue* get_task_queue() { return _task_queue; } static constexpr auto THREAD_TIME_SLICE = 100'000'000ULL; @@ -257,7 +257,7 @@ class PipelineTask { uint32_t _schedule_time = 0; std::unique_ptr _block; PipelineFragmentContext* _fragment_context = nullptr; - TaskQueue* _task_queue = nullptr; + MultiCoreTaskQueue* _task_queue = nullptr; // used for priority queue // it may be visited by different thread but there is no race condition diff --git a/be/src/pipeline/query_cache/query_cache.h b/be/src/pipeline/query_cache/query_cache.h index a905831b530578d..827c516ad75f07f 100644 --- a/be/src/pipeline/query_cache/query_cache.h +++ b/be/src/pipeline/query_cache/query_cache.h @@ -37,6 +37,7 @@ #include "runtime/memory/mem_tracker.h" #include "util/slice.h" #include "util/time.h" +#include "vec/core/block.h" namespace doris { diff --git a/be/src/pipeline/task_queue.cpp b/be/src/pipeline/task_queue.cpp index ea9fb09e260c0b2..ea812ca9b12dd66 100644 --- a/be/src/pipeline/task_queue.cpp +++ b/be/src/pipeline/task_queue.cpp @@ -27,8 +27,7 @@ #include "runtime/workload_group/workload_group.h" namespace doris::pipeline { - -TaskQueue::~TaskQueue() = default; +#include "common/compile_check_begin.h" PipelineTask* SubTaskQueue::try_take(bool is_steal) { if (_queue.empty()) { @@ -121,7 +120,7 @@ Status PriorityTaskQueue::push(PipelineTask* task) { // update empty queue's runtime, to avoid too high priority if (_sub_queues[level].empty() && - _queue_level_min_vruntime > _sub_queues[level].get_vruntime()) { + double(_queue_level_min_vruntime) > _sub_queues[level].get_vruntime()) { _sub_queues[level].adjust_runtime(_queue_level_min_vruntime); } @@ -133,44 +132,35 @@ Status PriorityTaskQueue::push(PipelineTask* task) { MultiCoreTaskQueue::~MultiCoreTaskQueue() = default; -MultiCoreTaskQueue::MultiCoreTaskQueue(int core_size) : TaskQueue(core_size), _closed(false) { - _prio_task_queue_list = - std::make_shared>>(core_size); - for (int i = 0; i < core_size; i++) { - (*_prio_task_queue_list)[i] = std::make_unique(); - } -} +MultiCoreTaskQueue::MultiCoreTaskQueue(int core_size) + : _prio_task_queues(core_size), _closed(false), _core_size(core_size) {} void MultiCoreTaskQueue::close() { if (_closed) { return; } _closed = true; - for (int i = 0; i < _core_size; ++i) { - (*_prio_task_queue_list)[i]->close(); - } - std::atomic_store(&_prio_task_queue_list, - std::shared_ptr>>(nullptr)); + // close all priority task queue + std::ranges::for_each(_prio_task_queues, + [](auto& prio_task_queue) { prio_task_queue.close(); }); } PipelineTask* MultiCoreTaskQueue::take(int core_id) { PipelineTask* task = nullptr; - auto prio_task_queue_list = - std::atomic_load_explicit(&_prio_task_queue_list, std::memory_order_relaxed); while (!_closed) { - DCHECK(prio_task_queue_list->size() > core_id) - << " list size: " << prio_task_queue_list->size() << " core_id: " << core_id + DCHECK(_prio_task_queues.size() > core_id) + << " list size: " << _prio_task_queues.size() << " core_id: " << core_id << " _core_size: " << _core_size << " _next_core: " << _next_core.load(); - task = (*prio_task_queue_list)[core_id]->try_take(false); + task = _prio_task_queues[core_id].try_take(false); if (task) { task->set_core_id(core_id); break; } - task = _steal_take(core_id, *prio_task_queue_list); + task = _steal_take(core_id); if (task) { break; } - task = (*prio_task_queue_list)[core_id]->take(WAIT_CORE_TASK_TIMEOUT_MS /* timeout_ms */); + task = _prio_task_queues[core_id].take(WAIT_CORE_TASK_TIMEOUT_MS /* timeout_ms */); if (task) { task->set_core_id(core_id); break; @@ -182,8 +172,7 @@ PipelineTask* MultiCoreTaskQueue::take(int core_id) { return task; } -PipelineTask* MultiCoreTaskQueue::_steal_take( - int core_id, std::vector>& prio_task_queue_list) { +PipelineTask* MultiCoreTaskQueue::_steal_take(int core_id) { DCHECK(core_id < _core_size); int next_id = core_id; for (int i = 1; i < _core_size; ++i) { @@ -192,7 +181,7 @@ PipelineTask* MultiCoreTaskQueue::_steal_take( next_id = 0; } DCHECK(next_id < _core_size); - auto task = prio_task_queue_list[next_id]->try_take(true); + auto task = _prio_task_queues[next_id].try_take(true); if (task) { task->set_core_id(next_id); return task; @@ -212,17 +201,13 @@ Status MultiCoreTaskQueue::push_back(PipelineTask* task) { Status MultiCoreTaskQueue::push_back(PipelineTask* task, int core_id) { DCHECK(core_id < _core_size); task->put_in_runnable_queue(); - auto prio_task_queue_list = - std::atomic_load_explicit(&_prio_task_queue_list, std::memory_order_relaxed); - return (*prio_task_queue_list)[core_id]->push(task); + return _prio_task_queues[core_id].push(task); } void MultiCoreTaskQueue::update_statistics(PipelineTask* task, int64_t time_spent) { task->inc_runtime_ns(time_spent); - auto prio_task_queue_list = - std::atomic_load_explicit(&_prio_task_queue_list, std::memory_order_relaxed); - (*prio_task_queue_list)[task->get_core_id()]->inc_sub_queue_runtime(task->get_queue_level(), - time_spent); + _prio_task_queues[task->get_core_id()].inc_sub_queue_runtime(task->get_queue_level(), + time_spent); } } // namespace doris::pipeline \ No newline at end of file diff --git a/be/src/pipeline/task_queue.h b/be/src/pipeline/task_queue.h index b389ebc2c515177..1651eb50cac4aba 100644 --- a/be/src/pipeline/task_queue.h +++ b/be/src/pipeline/task_queue.h @@ -32,30 +32,7 @@ #include "pipeline_task.h" namespace doris::pipeline { - -class TaskQueue { -public: - TaskQueue(int core_size) : _core_size(core_size) {} - virtual ~TaskQueue(); - virtual void close() = 0; - // Get the task by core id. - // TODO: To think the logic is useful? - virtual PipelineTask* take(int core_id) = 0; - - // push from scheduler - virtual Status push_back(PipelineTask* task) = 0; - - // push from worker - virtual Status push_back(PipelineTask* task, int core_id) = 0; - - virtual void update_statistics(PipelineTask* task, int64_t time_spent) {} - - int cores() const { return _core_size; } - -protected: - int _core_size; - static constexpr auto WAIT_CORE_TASK_TIMEOUT_MS = 100; -}; +#include "common/compile_check_begin.h" class SubTaskQueue { friend class PriorityTaskQueue; @@ -70,11 +47,13 @@ class SubTaskQueue { // note: // runtime is the time consumed by the actual execution of the task // vruntime(means virtual runtime) = runtime / _level_factor - double get_vruntime() { return _runtime / _level_factor; } + double get_vruntime() { return double(_runtime) / _level_factor; } void inc_runtime(uint64_t delta_time) { _runtime += delta_time; } - void adjust_runtime(uint64_t vruntime) { this->_runtime = uint64_t(vruntime * _level_factor); } + void adjust_runtime(uint64_t vruntime) { + this->_runtime = uint64_t(double(vruntime) * _level_factor); + } bool empty() { return _queue.empty(); } @@ -124,31 +103,35 @@ class PriorityTaskQueue { }; // Need consider NUMA architecture -class MultiCoreTaskQueue : public TaskQueue { +class MultiCoreTaskQueue { public: explicit MultiCoreTaskQueue(int core_size); - ~MultiCoreTaskQueue() override; + ~MultiCoreTaskQueue(); - void close() override; + void close(); // Get the task by core id. - PipelineTask* take(int core_id) override; + PipelineTask* take(int core_id); // TODO combine these methods to `push_back(task, core_id = -1)` - Status push_back(PipelineTask* task) override; + Status push_back(PipelineTask* task); + + Status push_back(PipelineTask* task, int core_id); - Status push_back(PipelineTask* task, int core_id) override; + void update_statistics(PipelineTask* task, int64_t time_spent); - void update_statistics(PipelineTask* task, int64_t time_spent) override; + int cores() const { return _core_size; } private: - PipelineTask* _steal_take( - int core_id, std::vector>& prio_task_queue_list); + PipelineTask* _steal_take(int core_id); - std::shared_ptr>> _prio_task_queue_list; + std::vector _prio_task_queues; std::atomic _next_core = 0; std::atomic _closed; -}; + int _core_size; + static constexpr auto WAIT_CORE_TASK_TIMEOUT_MS = 100; +}; +#include "common/compile_check_end.h" } // namespace doris::pipeline diff --git a/be/src/pipeline/task_scheduler.cpp b/be/src/pipeline/task_scheduler.cpp index 475d3a8065f8b45..45898e764175b2b 100644 --- a/be/src/pipeline/task_scheduler.cpp +++ b/be/src/pipeline/task_scheduler.cpp @@ -44,14 +44,14 @@ #include "vec/runtime/vdatetime_value.h" namespace doris::pipeline { - +#include "common/compile_check_begin.h" TaskScheduler::~TaskScheduler() { stop(); LOG(INFO) << "Task scheduler " << _name << " shutdown"; } Status TaskScheduler::start() { - int cores = _task_queue->cores(); + int cores = _task_queue.cores(); RETURN_IF_ERROR(ThreadPoolBuilder(_name) .set_min_threads(cores) .set_max_threads(cores) @@ -60,14 +60,14 @@ Status TaskScheduler::start() { .build(&_fix_thread_pool)); LOG_INFO("TaskScheduler set cores").tag("size", cores); _markers.resize(cores, true); - for (size_t i = 0; i < cores; ++i) { + for (int i = 0; i < cores; ++i) { RETURN_IF_ERROR(_fix_thread_pool->submit_func([this, i] { _do_work(i); })); } return Status::OK(); } Status TaskScheduler::schedule_task(PipelineTask* task) { - return _task_queue->push_back(task); + return _task_queue.push_back(task); } // after _close_task, task maybe destructed. @@ -97,19 +97,19 @@ void _close_task(PipelineTask* task, Status exec_status) { task->fragment_context()->close_a_pipeline(task->pipeline_id()); } -void TaskScheduler::_do_work(size_t index) { +void TaskScheduler::_do_work(int index) { while (_markers[index]) { - auto* task = _task_queue->take(index); + auto* task = _task_queue.take(index); if (!task) { continue; } if (task->is_running()) { - static_cast(_task_queue->push_back(task, index)); + static_cast(_task_queue.push_back(task, index)); continue; } task->log_detail_if_need(); task->set_running(true); - task->set_task_queue(_task_queue.get()); + task->set_task_queue(&_task_queue); auto* fragment_ctx = task->fragment_context(); bool canceled = fragment_ctx->is_canceled(); @@ -189,9 +189,7 @@ void TaskScheduler::_do_work(size_t index) { void TaskScheduler::stop() { if (!_shutdown) { - if (_task_queue) { - _task_queue->close(); - } + _task_queue.close(); if (_fix_thread_pool) { for (size_t i = 0; i < _markers.size(); ++i) { _markers[i] = false; diff --git a/be/src/pipeline/task_scheduler.h b/be/src/pipeline/task_scheduler.h index 9a20807ea268e88..bdb5bec1776f584 100644 --- a/be/src/pipeline/task_scheduler.h +++ b/be/src/pipeline/task_scheduler.h @@ -31,24 +31,20 @@ #include "gutil/ref_counted.h" #include "pipeline_task.h" #include "runtime/workload_group/workload_group.h" +#include "task_queue.h" #include "util/thread.h" namespace doris { class ExecEnv; class ThreadPool; - -namespace pipeline { -class TaskQueue; -} // namespace pipeline } // namespace doris namespace doris::pipeline { class TaskScheduler { public: - TaskScheduler(ExecEnv* exec_env, std::shared_ptr task_queue, std::string name, - CgroupCpuCtl* cgroup_cpu_ctl) - : _task_queue(std::move(task_queue)), + TaskScheduler(int core_num, std::string name, CgroupCpuCtl* cgroup_cpu_ctl) + : _task_queue(core_num), _shutdown(false), _name(std::move(name)), _cgroup_cpu_ctl(cgroup_cpu_ctl) {} @@ -65,12 +61,12 @@ class TaskScheduler { private: std::unique_ptr _fix_thread_pool; - std::shared_ptr _task_queue; + MultiCoreTaskQueue _task_queue; std::vector _markers; bool _shutdown; std::string _name; CgroupCpuCtl* _cgroup_cpu_ctl = nullptr; - void _do_work(size_t index); + void _do_work(int index); }; } // namespace doris::pipeline \ No newline at end of file diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index cc6f9050ac39153..bea11feb916f108 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -286,8 +286,7 @@ JdbcTableDescriptor::JdbcTableDescriptor(const TTableDescriptor& tdesc) _connection_pool_max_size(tdesc.jdbcTable.connection_pool_max_size), _connection_pool_max_wait_time(tdesc.jdbcTable.connection_pool_max_wait_time), _connection_pool_max_life_time(tdesc.jdbcTable.connection_pool_max_life_time), - _connection_pool_keep_alive(tdesc.jdbcTable.connection_pool_keep_alive), - _enable_connection_pool(tdesc.jdbcTable.enable_connection_pool) {} + _connection_pool_keep_alive(tdesc.jdbcTable.connection_pool_keep_alive) {} std::string JdbcTableDescriptor::debug_string() const { fmt::memory_buffer buf; @@ -295,14 +294,13 @@ std::string JdbcTableDescriptor::debug_string() const { buf, "JDBCTable({} ,_jdbc_catalog_id = {}, _jdbc_resource_name={} ,_jdbc_driver_url={} " ",_jdbc_driver_class={} ,_jdbc_driver_checksum={} ,_jdbc_url={} " - ",_jdbc_table_name={} ,_jdbc_user={} ,_jdbc_passwd={} " - ",_enable_connection_pool={},_connection_pool_min_size={} " + ",_jdbc_table_name={} ,_jdbc_user={} ,_jdbc_passwd={} ,_connection_pool_min_size={} " ",_connection_pool_max_size={} ,_connection_pool_max_wait_time={} " ",_connection_pool_max_life_time={} ,_connection_pool_keep_alive={})", TableDescriptor::debug_string(), _jdbc_catalog_id, _jdbc_resource_name, _jdbc_driver_url, _jdbc_driver_class, _jdbc_driver_checksum, _jdbc_url, - _jdbc_table_name, _jdbc_user, _jdbc_passwd, _enable_connection_pool, - _connection_pool_min_size, _connection_pool_max_size, _connection_pool_max_wait_time, + _jdbc_table_name, _jdbc_user, _jdbc_passwd, _connection_pool_min_size, + _connection_pool_max_size, _connection_pool_max_wait_time, _connection_pool_max_life_time, _connection_pool_keep_alive); return fmt::to_string(buf); } diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index b5797b0d016d751..b807c5675430383 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -319,7 +319,6 @@ class JdbcTableDescriptor : public TableDescriptor { int32_t connection_pool_max_wait_time() const { return _connection_pool_max_wait_time; } int32_t connection_pool_max_life_time() const { return _connection_pool_max_life_time; } bool connection_pool_keep_alive() const { return _connection_pool_keep_alive; } - bool enable_connection_pool() const { return _enable_connection_pool; } private: int64_t _jdbc_catalog_id; @@ -336,7 +335,6 @@ class JdbcTableDescriptor : public TableDescriptor { int32_t _connection_pool_max_wait_time; int32_t _connection_pool_max_life_time; bool _connection_pool_keep_alive; - bool _enable_connection_pool; }; class TupleDescriptor { diff --git a/be/src/runtime/exec_env.cpp b/be/src/runtime/exec_env.cpp index c714db2d5e40fa5..e41cc982a7482cd 100644 --- a/be/src/runtime/exec_env.cpp +++ b/be/src/runtime/exec_env.cpp @@ -54,7 +54,10 @@ void ExecEnv::set_write_cooldown_meta_executors() { #endif // BE_TEST Result ExecEnv::get_tablet(int64_t tablet_id) { - return GetInstance()->storage_engine().get_tablet(tablet_id); + auto storage_engine = GetInstance()->_storage_engine.get(); + return storage_engine != nullptr + ? storage_engine->get_tablet(tablet_id) + : ResultError(Status::InternalError("failed to get tablet {}", tablet_id)); } const std::string& ExecEnv::token() const { diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 399c2a7ce052dfb..b1617744eac6ba0 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -101,6 +101,7 @@ class FrontendServiceClient; class FileMetaCache; class GroupCommitMgr; class TabletSchemaCache; +class TabletColumnObjectPool; class UserFunctionCache; class SchemaCache; class StoragePageCache; @@ -110,6 +111,7 @@ class RowCache; class DummyLRUCache; class CacheManager; class ProcessProfile; +class HeapProfiler; class WalManager; class DNSCache; @@ -274,6 +276,9 @@ class ExecEnv { void set_cache_manager(CacheManager* cm) { this->_cache_manager = cm; } void set_process_profile(ProcessProfile* pp) { this->_process_profile = pp; } void set_tablet_schema_cache(TabletSchemaCache* c) { this->_tablet_schema_cache = c; } + void set_tablet_column_object_pool(TabletColumnObjectPool* c) { + this->_tablet_column_object_pool = c; + } void set_storage_page_cache(StoragePageCache* c) { this->_storage_page_cache = c; } void set_segment_loader(SegmentLoader* sl) { this->_segment_loader = sl; } void set_routine_load_task_executor(RoutineLoadTaskExecutor* r) { @@ -299,6 +304,7 @@ class ExecEnv { std::map get_running_frontends(); TabletSchemaCache* get_tablet_schema_cache() { return _tablet_schema_cache; } + TabletColumnObjectPool* get_tablet_column_object_pool() { return _tablet_column_object_pool; } SchemaCache* schema_cache() { return _schema_cache; } StoragePageCache* get_storage_page_cache() { return _storage_page_cache; } SegmentLoader* segment_loader() { return _segment_loader; } @@ -306,6 +312,7 @@ class ExecEnv { RowCache* get_row_cache() { return _row_cache; } CacheManager* get_cache_manager() { return _cache_manager; } ProcessProfile* get_process_profile() { return _process_profile; } + HeapProfiler* get_heap_profiler() { return _heap_profiler; } segment_v2::InvertedIndexSearcherCache* get_inverted_index_searcher_cache() { return _inverted_index_searcher_cache; } @@ -437,6 +444,7 @@ class ExecEnv { // these redundancy header could introduce potential bug, at least, more header means slow compile. // So we choose to use raw pointer, please remember to delete these pointer in deconstructor. TabletSchemaCache* _tablet_schema_cache = nullptr; + TabletColumnObjectPool* _tablet_column_object_pool = nullptr; std::unique_ptr _storage_engine; SchemaCache* _schema_cache = nullptr; StoragePageCache* _storage_page_cache = nullptr; @@ -445,6 +453,7 @@ class ExecEnv { RowCache* _row_cache = nullptr; CacheManager* _cache_manager = nullptr; ProcessProfile* _process_profile = nullptr; + HeapProfiler* _heap_profiler = nullptr; segment_v2::InvertedIndexSearcherCache* _inverted_index_searcher_cache = nullptr; segment_v2::InvertedIndexQueryCache* _inverted_index_query_cache = nullptr; QueryCache* _query_cache = nullptr; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index d9eedc6d8c5dfe3..e43524b2d2a00b9 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -53,6 +53,7 @@ #include "olap/schema_cache.h" #include "olap/segment_loader.h" #include "olap/storage_engine.h" +#include "olap/tablet_column_object_pool.h" #include "olap/tablet_schema_cache.h" #include "olap/wal/wal_manager.h" #include "pipeline/pipeline_tracing.h" @@ -71,6 +72,7 @@ #include "runtime/load_path_mgr.h" #include "runtime/load_stream_mgr.h" #include "runtime/memory/cache_manager.h" +#include "runtime/memory/heap_profiler.h" #include "runtime/memory/mem_tracker.h" #include "runtime/memory/mem_tracker_limiter.h" #include "runtime/memory/thread_mem_tracker_mgr.h" @@ -338,6 +340,9 @@ Status ExecEnv::_init(const std::vector& store_paths, _tablet_schema_cache = TabletSchemaCache::create_global_schema_cache(config::tablet_schema_cache_capacity); + _tablet_column_object_pool = TabletColumnObjectPool::create_global_column_cache( + config::tablet_schema_cache_capacity); + // Storage engine doris::EngineOptions options; options.store_paths = store_paths; @@ -380,9 +385,8 @@ Status ExecEnv::init_pipeline_task_scheduler() { LOG_INFO("pipeline executors_size set ").tag("size", executors_size); // TODO pipeline workload group combie two blocked schedulers. - auto t_queue = std::make_shared(executors_size); _without_group_task_scheduler = - new pipeline::TaskScheduler(this, t_queue, "PipeNoGSchePool", nullptr); + new pipeline::TaskScheduler(executors_size, "PipeNoGSchePool", nullptr); RETURN_IF_ERROR(_without_group_task_scheduler->start()); _runtime_filter_timer_queue = new doris::pipeline::RuntimeFilterTimerQueue(); @@ -441,8 +445,11 @@ void ExecEnv::init_file_cache_factory(std::vector& cache_paths } for (const auto& status : cache_status) { if (!status.ok()) { - LOG(FATAL) << "failed to init file cache, err: " << status; - exit(-1); + if (!doris::config::ignore_broken_disk) { + LOG(FATAL) << "failed to init file cache, err: " << status; + exit(-1); + } + LOG(WARNING) << "failed to init file cache, err: " << status; } } } @@ -452,6 +459,7 @@ Status ExecEnv::_init_mem_env() { std::stringstream ss; // 1. init mem tracker _process_profile = ProcessProfile::create_global_instance(); + _heap_profiler = HeapProfiler::create_global_instance(); init_mem_tracker(); thread_context()->thread_mem_tracker_mgr->init(); #if defined(USE_MEM_TRACKER) && !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && \ @@ -674,7 +682,7 @@ void ExecEnv::destroy() { SAFE_STOP(_write_cooldown_meta_executors); // StorageEngine must be destoried before _page_no_cache_mem_tracker.reset and _cache_manager destory - // shouldn't use SAFE_STOP. otherwise will lead to twice stop. + SAFE_STOP(_storage_engine); _storage_engine.reset(); SAFE_STOP(_spill_stream_mgr); @@ -775,6 +783,7 @@ void ExecEnv::destroy() { SAFE_DELETE(_dns_cache); SAFE_DELETE(_process_profile); + SAFE_DELETE(_heap_profiler); _s_tracking_memory = false; diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 26fb098c76dfc51..18aacb452a64770 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -299,6 +299,10 @@ Status FragmentMgr::trigger_pipeline_context_report( // including the final status when execution finishes. void FragmentMgr::coordinator_callback(const ReportStatusRequest& req) { DCHECK(req.status.ok() || req.done); // if !status.ok() => done + if (req.coord_addr.hostname == "external") { + // External query (flink/spark read tablets) not need to report to FE. + return; + } Status exec_status = req.status; Status coord_status; FrontendServiceConnection coord(_exec_env->frontend_client_cache(), req.coord_addr, @@ -572,17 +576,19 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, } } +// Stage 2. prepare finished. then get FE instruction to execute Status FragmentMgr::start_query_execution(const PExecPlanFragmentStartRequest* request) { + TUniqueId query_id; + query_id.__set_hi(request->query_id().hi()); + query_id.__set_lo(request->query_id().lo()); std::shared_ptr q_ctx = nullptr; { std::lock_guard lock(_lock); - TUniqueId query_id; - query_id.__set_hi(request->query_id().hi()); - query_id.__set_lo(request->query_id().lo()); q_ctx = _get_or_erase_query_ctx(query_id); } if (q_ctx) { q_ctx->set_ready_to_execute(Status::OK()); + LOG_INFO("Query {} start execution", print_id(query_id)); } else { return Status::InternalError( "Failed to get query fragments context. Query may be " @@ -602,7 +608,6 @@ void FragmentMgr::remove_pipeline_context( .count(); g_fragment_executing_count << -1; g_fragment_last_active_time.set_value(now); - LOG_INFO("Removing query {} fragment {}", print_id(query_id), f_context->get_fragment_id()); _pipeline_map.erase({query_id, f_context->get_fragment_id()}); } } @@ -656,6 +661,7 @@ Status FragmentMgr::_get_query_ctx(const Params& params, TUniqueId query_id, boo return Status::OK(); } + // First time a fragment of a query arrived. print logs. LOG(INFO) << "query_id: " << print_id(query_id) << ", coord_addr: " << params.coord << ", total fragment num on current host: " << params.fragment_num_on_host << ", fe process uuid: " << params.query_options.fe_process_uuid @@ -665,7 +671,7 @@ Status FragmentMgr::_get_query_ctx(const Params& params, TUniqueId query_id, boo // This may be a first fragment request of the query. // Create the query fragments context. query_ctx = QueryContext::create_shared(query_id, _exec_env, params.query_options, - params.coord, pipeline, params.is_nereids, + params.coord, params.is_nereids, params.current_connect_fe, query_source); SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_ctx->query_mem_tracker); RETURN_IF_ERROR(DescriptorTbl::create(&(query_ctx->obj_pool), params.desc_tbl, @@ -684,7 +690,6 @@ Status FragmentMgr::_get_query_ctx(const Params& params, TUniqueId query_id, boo } _set_scan_concurrency(params, query_ctx.get()); - const bool is_pipeline = std::is_same_v; if (params.__isset.workload_groups && !params.workload_groups.empty()) { uint64_t tg_id = params.workload_groups[0].id; @@ -695,21 +700,14 @@ Status FragmentMgr::_get_query_ctx(const Params& params, TUniqueId query_id, boo RETURN_IF_ERROR(query_ctx->set_workload_group(workload_group_ptr)); _exec_env->runtime_query_statistics_mgr()->set_workload_group_id(print_id(query_id), tg_id); - - LOG(INFO) << "Query/load id: " << print_id(query_ctx->query_id()) - << ", use workload group: " << workload_group_ptr->debug_string() - << ", is pipeline: " << ((int)is_pipeline); } else { - LOG(INFO) << "Query/load id: " << print_id(query_ctx->query_id()) - << " carried group info but can not find group in be"; + LOG(WARNING) << "Query/load id: " << print_id(query_ctx->query_id()) + << "can't find its workload group " << tg_id; } } // There is some logic in query ctx's dctor, we could not check if exists and delete the // temp query ctx now. For example, the query id maybe removed from workload group's queryset. _query_ctx_map.insert(std::make_pair(query_ctx->query_id(), query_ctx)); - LOG(INFO) << "Register query/load memory tracker, query/load id: " - << print_id(query_ctx->query_id()) - << " limit: " << PrettyPrinter::print(query_ctx->mem_limit(), TUnit::BYTES); } return Status::OK(); } @@ -797,31 +795,33 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, query_ctx->set_merge_controller_handler(handler); } - for (const auto& local_param : params.local_params) { - const TUniqueId& fragment_instance_id = local_param.fragment_instance_id; + { + // (query_id, fragment_id) is executed only on one BE, locks _pipeline_map. std::lock_guard lock(_lock); - auto iter = _pipeline_map.find({params.query_id, params.fragment_id}); - if (iter != _pipeline_map.end()) { - return Status::InternalError("exec_plan_fragment input duplicated fragment_id({})", - params.fragment_id); + for (const auto& local_param : params.local_params) { + const TUniqueId& fragment_instance_id = local_param.fragment_instance_id; + auto iter = _pipeline_map.find({params.query_id, params.fragment_id}); + if (iter != _pipeline_map.end()) { + return Status::InternalError( + "exec_plan_fragment query_id({}) input duplicated fragment_id({})", + print_id(params.query_id), params.fragment_id); + } + query_ctx->fragment_instance_ids.push_back(fragment_instance_id); } - query_ctx->fragment_instance_ids.push_back(fragment_instance_id); - } - - if (!params.__isset.need_wait_execution_trigger || !params.need_wait_execution_trigger) { - query_ctx->set_ready_to_execute_only(); - } - int64 now = duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - { + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); g_fragment_executing_count << 1; g_fragment_last_active_time.set_value(now); - std::lock_guard lock(_lock); // TODO: simplify this mapping _pipeline_map.insert({{params.query_id, params.fragment_id}, context}); } + + if (!params.__isset.need_wait_execution_trigger || !params.need_wait_execution_trigger) { + query_ctx->set_ready_to_execute_only(); + } + query_ctx->set_pipeline_context(params.fragment_id, context); RETURN_IF_ERROR(context->submit()); @@ -892,11 +892,20 @@ void FragmentMgr::cancel_worker() { running_queries_on_all_fes.clear(); } + std::vector> ctx; { std::lock_guard lock(_lock); + ctx.reserve(_pipeline_map.size()); for (auto& pipeline_itr : _pipeline_map) { - pipeline_itr.second->clear_finished_tasks(); + ctx.push_back(pipeline_itr.second); } + } + for (auto& c : ctx) { + c->clear_finished_tasks(); + } + + { + std::lock_guard lock(_lock); for (auto it = _query_ctx_map.begin(); it != _query_ctx_map.end();) { if (auto q_ctx = it->second.lock()) { if (q_ctx->is_timeout(now)) { @@ -1031,6 +1040,7 @@ void FragmentMgr::debug(std::stringstream& ss) {} */ Status FragmentMgr::exec_external_plan_fragment(const TScanOpenParams& params, const TQueryPlanInfo& t_query_plan_info, + const TUniqueId& query_id, const TUniqueId& fragment_instance_id, std::vector* selected_columns) { // set up desc tbl @@ -1071,8 +1081,9 @@ Status FragmentMgr::exec_external_plan_fragment(const TScanOpenParams& params, // assign the param used for executing of PlanFragment-self TPipelineInstanceParams fragment_exec_params; - exec_fragment_params.query_id = t_query_plan_info.query_id; + exec_fragment_params.query_id = query_id; fragment_exec_params.fragment_instance_id = fragment_instance_id; + exec_fragment_params.coord.hostname = "external"; std::map<::doris::TPlanNodeId, std::vector> per_node_scan_ranges; std::vector scan_ranges; std::vector tablet_ids = params.tablet_ids; @@ -1122,7 +1133,6 @@ Status FragmentMgr::exec_external_plan_fragment(const TScanOpenParams& params, Status FragmentMgr::apply_filterv2(const PPublishFilterRequestV2* request, butil::IOBufAsZeroCopyInputStream* attach_data) { - bool is_pipeline = request->has_is_pipeline() && request->is_pipeline(); int64_t start_apply = MonotonicMillis(); std::shared_ptr pip_context; @@ -1134,24 +1144,18 @@ Status FragmentMgr::apply_filterv2(const PPublishFilterRequestV2* request, { std::unique_lock lock(_lock); for (auto fragment_id : fragment_ids) { - if (is_pipeline) { - auto iter = _pipeline_map.find( - {UniqueId(request->query_id()).to_thrift(), fragment_id}); - if (iter == _pipeline_map.end()) { - LOG(WARNING) << "No pipeline fragment is found: Query-ID = " - << request->query_id() << " fragment_id = " << fragment_id; - continue; - } - pip_context = iter->second; - - DCHECK(pip_context != nullptr); - runtime_filter_mgr = pip_context->get_query_ctx()->runtime_filter_mgr(); - query_thread_context = {pip_context->get_query_ctx()->query_id(), - pip_context->get_query_ctx()->query_mem_tracker, - pip_context->get_query_ctx()->workload_group()}; - } else { - return Status::InternalError("Non-pipeline is disabled!"); + auto iter = + _pipeline_map.find({UniqueId(request->query_id()).to_thrift(), fragment_id}); + if (iter == _pipeline_map.end()) { + continue; } + pip_context = iter->second; + + DCHECK(pip_context != nullptr); + runtime_filter_mgr = pip_context->get_query_ctx()->runtime_filter_mgr(); + query_thread_context = {pip_context->get_query_ctx()->query_id(), + pip_context->get_query_ctx()->query_mem_tracker, + pip_context->get_query_ctx()->workload_group()}; break; } } diff --git a/be/src/runtime/fragment_mgr.h b/be/src/runtime/fragment_mgr.h index 41b63db0b23ad9a..20b2fd8cdc20631 100644 --- a/be/src/runtime/fragment_mgr.h +++ b/be/src/runtime/fragment_mgr.h @@ -112,6 +112,7 @@ class FragmentMgr : public RestMonitorIface { // execute external query, all query info are packed in TScanOpenParams Status exec_external_plan_fragment(const TScanOpenParams& params, const TQueryPlanInfo& t_query_plan_info, + const TUniqueId& query_id, const TUniqueId& fragment_instance_id, std::vector* selected_columns); diff --git a/be/src/runtime/load_stream.cpp b/be/src/runtime/load_stream.cpp index 80cd167260c04df..88c64eb517c3689 100644 --- a/be/src/runtime/load_stream.cpp +++ b/be/src/runtime/load_stream.cpp @@ -31,11 +31,14 @@ #include #include "bvar/bvar.h" +#include "cloud/config.h" #include "common/signal_handler.h" #include "exec/tablet_info.h" #include "gutil/ref_counted.h" +#include "olap/tablet.h" #include "olap/tablet_fwd.h" #include "olap/tablet_schema.h" +#include "runtime/exec_env.h" #include "runtime/fragment_mgr.h" #include "runtime/load_channel.h" #include "runtime/load_stream_mgr.h" @@ -149,6 +152,14 @@ Status TabletStream::append_data(const PStreamHeader& header, butil::IOBuf* data signal::set_signal_task_id(_load_id); g_load_stream_flush_running_threads << -1; auto st = _load_stream_writer->append_data(new_segid, header.offset(), buf, file_type); + if (!st.ok() && !config::is_cloud_mode()) { + auto res = ExecEnv::get_tablet(_id); + TabletSharedPtr tablet = + res.has_value() ? std::dynamic_pointer_cast(res.value()) : nullptr; + if (tablet) { + tablet->report_error(st); + } + } if (eos && st.ok()) { DBUG_EXECUTE_IF("TabletStream.append_data.unknown_file_type", { file_type = static_cast(-1); }); diff --git a/be/src/runtime/load_stream_writer.cpp b/be/src/runtime/load_stream_writer.cpp index 37243fab14bdb35..2e987edc7bd3bad 100644 --- a/be/src/runtime/load_stream_writer.cpp +++ b/be/src/runtime/load_stream_writer.cpp @@ -201,7 +201,7 @@ Status LoadStreamWriter::add_segment(uint32_t segid, const SegmentStatistics& st } DBUG_EXECUTE_IF("LoadStreamWriter.add_segment.size_not_match", { segment_file_size++; }); - if (segment_file_size + inverted_file_size != stat.data_size) { + if (segment_file_size != stat.data_size) { return Status::Corruption( "add_segment failed, segment stat {} does not match, file size={}, inverted file " "size={}, stat.data_size={}, tablet id={}", diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index 666d32bdb56e4d8..e7e1c73e7cbb41d 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -48,7 +48,8 @@ class CachePolicy { CLOUD_TXN_DELETE_BITMAP_CACHE = 17, NONE = 18, // not be used FOR_UT_CACHE_NUMBER = 19, - QUERY_CACHE = 20 + QUERY_CACHE = 20, + TABLET_COLUMN_OBJECT_POOL = 21, }; static std::string type_string(CacheType type) { @@ -93,6 +94,8 @@ class CachePolicy { return "ForUTCacheNumber"; case CacheType::QUERY_CACHE: return "QueryCache"; + case CacheType::TABLET_COLUMN_OBJECT_POOL: + return "TabletColumnObjectPool"; default: LOG(FATAL) << "not match type of cache policy :" << static_cast(type); } @@ -119,7 +122,8 @@ class CachePolicy { {"CreateTabletRRIdxCache", CacheType::CREATE_TABLET_RR_IDX_CACHE}, {"CloudTabletCache", CacheType::CLOUD_TABLET_CACHE}, {"CloudTxnDeleteBitmapCache", CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE}, - {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}}; + {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}, + {"TabletColumnObjectPool", CacheType::TABLET_COLUMN_OBJECT_POOL}}; static CacheType string_to_type(std::string type) { if (StringToType.contains(type)) { diff --git a/be/src/runtime/memory/heap_profiler.cpp b/be/src/runtime/memory/heap_profiler.cpp new file mode 100644 index 000000000000000..01ed82f76ef6d15 --- /dev/null +++ b/be/src/runtime/memory/heap_profiler.cpp @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/memory/heap_profiler.h" + +#ifdef USE_JEMALLOC +#include "jemalloc/jemalloc.h" +#endif +#include "agent/utils.h" +#include "common/config.h" +#include "io/fs/local_file_system.h" + +namespace doris { + +void HeapProfiler::set_prof_active(bool prof) { +#ifdef USE_JEMALLOC + std::lock_guard guard(_mutex); + try { + int err = jemallctl("prof.active", nullptr, nullptr, &prof, 1); + err |= jemallctl("prof.thread_active_init", nullptr, nullptr, &prof, 1); + if (err) { + LOG(WARNING) << "jemalloc heap profiling start failed, " << err; + } else { + LOG(WARNING) << "jemalloc heap profiling started"; + } + } catch (...) { + LOG(WARNING) << "jemalloc heap profiling start failed"; + } +#endif +} + +bool HeapProfiler::get_prof_dump(const std::string& profile_file_name) { +#ifdef USE_JEMALLOC + std::lock_guard guard(_mutex); + const char* file_name_ptr = profile_file_name.c_str(); + try { + int err = jemallctl("prof.dump", nullptr, nullptr, &file_name_ptr, sizeof(const char*)); + if (err) { + LOG(WARNING) << "dump heap profile failed, " << err; + return false; + } else { + LOG(INFO) << "dump heap profile to " << profile_file_name; + return true; + } + } catch (...) { + LOG(WARNING) << "dump heap profile failed"; + return false; + } +#else + return false; +#endif +} + +static std::string jeprof_profile_to_dot(const std::string& profile_file_name) { + AgentUtils util; + const static std::string jeprof_path = fmt::format("{}/bin/jeprof", std::getenv("DORIS_HOME")); + const static std::string binary_path = + fmt::format("{}/lib/doris_be", std::getenv("DORIS_HOME")); + // https://doris.apache.org/community/developer-guide/debug-tool/#3-jeprof-parses-heap-profile + std::string jeprof_cmd = + fmt::format("{} --dot {} {}", jeprof_path, binary_path, profile_file_name); + std::string msg; + bool rc = util.exec_cmd(jeprof_cmd, &msg); + if (!rc) { + LOG(WARNING) << "jeprof profile to dot failed: " << msg; + } + return msg; +} + +void HeapProfiler::heap_profiler_start() { + set_prof_active(true); +} + +void HeapProfiler::heap_profiler_stop() { + set_prof_active(false); +} + +bool HeapProfiler::check_heap_profiler() { +#ifdef USE_JEMALLOC + size_t value = 0; + size_t sz = sizeof(value); + jemallctl("prof.active", &value, &sz, nullptr, 0); + return value; +#else + return false; +#endif +} + +std::string HeapProfiler::dump_heap_profile() { + if (!config::jeprofile_dir.empty()) { + auto st = io::global_local_filesystem()->create_directory(config::jeprofile_dir); + if (!st.ok()) { + LOG(WARNING) << "create jeprofile dir failed."; + return ""; + } + } + std::string profile_file_name = + fmt::format("{}/jeheap_dump.{}.{}.{}.heap", config::jeprofile_dir, std::time(nullptr), + getpid(), rand()); + if (get_prof_dump(profile_file_name)) { + return profile_file_name; + } else { + return ""; + } +} + +std::string HeapProfiler::dump_heap_profile_to_dot() { + std::string profile_file_name = dump_heap_profile(); + if (!profile_file_name.empty()) { + return jeprof_profile_to_dot(profile_file_name); + } else { + return ""; + } +} + +} // namespace doris diff --git a/be/src/runtime/memory/heap_profiler.h b/be/src/runtime/memory/heap_profiler.h new file mode 100644 index 000000000000000..7f156351200b3ac --- /dev/null +++ b/be/src/runtime/memory/heap_profiler.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "runtime/exec_env.h" + +namespace doris { + +class HeapProfiler { +public: + static HeapProfiler* create_global_instance() { return new HeapProfiler(); } + static HeapProfiler* instance() { return ExecEnv::GetInstance()->get_heap_profiler(); } + HeapProfiler() = default; + + void heap_profiler_start(); + void heap_profiler_stop(); + bool check_heap_profiler(); + std::string dump_heap_profile(); + std::string dump_heap_profile_to_dot(); + +private: + void set_prof_active(bool prof); + bool get_prof_dump(const std::string& profile_file_name); + + std::mutex _mutex; +}; + +} // namespace doris diff --git a/be/src/runtime/query_context.cpp b/be/src/runtime/query_context.cpp index 80f59d7101d3c70..811fa6002b5cf53 100644 --- a/be/src/runtime/query_context.cpp +++ b/be/src/runtime/query_context.cpp @@ -26,11 +26,9 @@ #include #include #include -#include #include #include "common/logging.h" -#include "olap/olap_common.h" #include "pipeline/dependency.h" #include "pipeline/pipeline_fragment_context.h" #include "runtime/exec_env.h" @@ -74,12 +72,11 @@ const std::string toString(QuerySource queryType) { QueryContext::QueryContext(TUniqueId query_id, ExecEnv* exec_env, const TQueryOptions& query_options, TNetworkAddress coord_addr, - bool is_pipeline, bool is_nereids, TNetworkAddress current_connect_fe, + bool is_nereids, TNetworkAddress current_connect_fe, QuerySource query_source) : _timeout_second(-1), _query_id(query_id), _exec_env(exec_env), - _is_pipeline(is_pipeline), _is_nereids(is_nereids), _query_options(query_options), _query_source(query_source) { @@ -180,8 +177,7 @@ QueryContext::~QueryContext() { } } - //TODO: check if pipeline and tracing both enabled - if (_is_pipeline && ExecEnv::GetInstance()->pipeline_tracer_context()->enabled()) [[unlikely]] { + if (ExecEnv::GetInstance()->pipeline_tracer_context()->enabled()) [[unlikely]] { try { ExecEnv::GetInstance()->pipeline_tracer_context()->end_query(_query_id, group_id); } catch (std::exception& e) { @@ -198,7 +194,8 @@ QueryContext::~QueryContext() { _exec_env->spill_stream_mgr()->async_cleanup_query(_query_id); DorisMetrics::instance()->query_ctx_cnt->increment(-1); - LOG_INFO("Query {} deconstructed, {}", print_id(this->_query_id), mem_tracker_msg); + // the only one msg shows query's end. any other msg should append to it if need. + LOG_INFO("Query {} deconstructed, mem_tracker: {}", print_id(this->_query_id), mem_tracker_msg); } void QueryContext::set_ready_to_execute(Status reason) { diff --git a/be/src/runtime/query_context.h b/be/src/runtime/query_context.h index 1a05b784d5bc5c4..ef753ee62259b45 100644 --- a/be/src/runtime/query_context.h +++ b/be/src/runtime/query_context.h @@ -79,8 +79,8 @@ class QueryContext { public: QueryContext(TUniqueId query_id, ExecEnv* exec_env, const TQueryOptions& query_options, - TNetworkAddress coord_addr, bool is_pipeline, bool is_nereids, - TNetworkAddress current_connect_fe, QuerySource query_type); + TNetworkAddress coord_addr, bool is_nereids, TNetworkAddress current_connect_fe, + QuerySource query_type); ~QueryContext(); @@ -246,7 +246,6 @@ class QueryContext { ExecEnv* _exec_env = nullptr; MonotonicStopWatch _query_watcher; int64_t _bytes_limit = 0; - bool _is_pipeline = false; bool _is_nereids = false; std::atomic _running_big_mem_op_num = 0; diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 08a229c0ecf72b6..1a238787207b173 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -29,6 +29,7 @@ #include #include +#include "common/config.h" #include "common/logging.h" #include "common/status.h" #include "exprs/bloom_filter_func.h" @@ -343,8 +344,10 @@ Status RuntimeFilterMergeControllerEntity::send_filter_size(const PSendFilterSiz auto* pquery_id = closure->request_->mutable_query_id(); pquery_id->set_hi(_state->query_id.hi()); pquery_id->set_lo(_state->query_id.lo()); - closure->cntl_->set_timeout_ms(std::min(3600, _state->execution_timeout) * 1000); - closure->cntl_->ignore_eovercrowded(); + closure->cntl_->set_timeout_ms(get_execution_rpc_timeout_ms(_state->execution_timeout)); + if (config::execution_ignore_eovercrowded) { + closure->cntl_->ignore_eovercrowded(); + } closure->request_->set_filter_id(filter_id); closure->request_->set_filter_size(cnt_val->global_size); @@ -449,15 +452,17 @@ Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* requ DummyBrpcCallback::create_shared()); closure->request_->set_filter_id(request->filter_id()); - closure->request_->set_is_pipeline(request->has_is_pipeline() && - request->is_pipeline()); closure->request_->set_merge_time(merge_time); *closure->request_->mutable_query_id() = request->query_id(); if (has_attachment) { closure->cntl_->request_attachment().append(request_attachment); } - closure->cntl_->set_timeout_ms(std::min(3600, _state->execution_timeout) * 1000); - closure->cntl_->ignore_eovercrowded(); + + closure->cntl_->set_timeout_ms(get_execution_rpc_timeout_ms(_state->execution_timeout)); + if (config::execution_ignore_eovercrowded) { + closure->cntl_->ignore_eovercrowded(); + } + // set fragment-id if (target.__isset.target_fragment_ids) { for (auto& target_fragment_id : target.target_fragment_ids) { diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 34ce79ec7a749a9..abc823bc25b291d 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -38,6 +38,7 @@ #include "agent/be_exec_version_manager.h" #include "cctz/time_zone.h" #include "common/compiler_util.h" // IWYU pragma: keep +#include "common/config.h" #include "common/factory_creator.h" #include "common/status.h" #include "gutil/integral_types.h" @@ -51,6 +52,10 @@ namespace doris { class IRuntimeFilter; +inline int32_t get_execution_rpc_timeout_ms(int32_t execution_timeout_sec) { + return std::min(config::execution_max_rpc_timeout_sec, execution_timeout_sec) * 1000; +} + namespace pipeline { class PipelineXLocalStateBase; class PipelineXSinkLocalStateBase; diff --git a/be/src/runtime/stream_load/stream_load_context.h b/be/src/runtime/stream_load/stream_load_context.h index 9d1601372f877d4..93f76fad4e613cd 100644 --- a/be/src/runtime/stream_load/stream_load_context.h +++ b/be/src/runtime/stream_load/stream_load_context.h @@ -164,9 +164,10 @@ class StreamLoadContext { // the following members control the max progress of a consuming // process. if any of them reach, the consuming will finish. - int64_t max_interval_s = 5; - int64_t max_batch_rows = 100000; - int64_t max_batch_size = 100 * 1024 * 1024; // 100MB + // same as values set in fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java + int64_t max_interval_s = 60; + int64_t max_batch_rows = 20000000; + int64_t max_batch_size = 1024 * 1024 * 1024; // 1GB // for parse json-data std::string data_format = ""; diff --git a/be/src/runtime/workload_group/workload_group.cpp b/be/src/runtime/workload_group/workload_group.cpp index 0488e9ec83c6c25..c6a3c07adda1dde 100644 --- a/be/src/runtime/workload_group/workload_group.cpp +++ b/be/src/runtime/workload_group/workload_group.cpp @@ -468,10 +468,9 @@ void WorkloadGroup::upsert_task_scheduler(WorkloadGroupInfo* tg_info, ExecEnv* e if (executors_size <= 0) { executors_size = CpuInfo::num_cores(); } - auto task_queue = std::make_shared(executors_size); std::unique_ptr pipeline_task_scheduler = - std::make_unique(exec_env, std::move(task_queue), - "Pipe_" + tg_name, cg_cpu_ctl_ptr); + std::make_unique(executors_size, "Pipe_" + tg_name, + cg_cpu_ctl_ptr); Status ret = pipeline_task_scheduler->start(); if (ret.ok()) { _task_sched = std::move(pipeline_task_scheduler); diff --git a/be/src/service/backend_service.cpp b/be/src/service/backend_service.cpp index d56aa49b19b1cf1..86d47add0dadade 100644 --- a/be/src/service/backend_service.cpp +++ b/be/src/service/backend_service.cpp @@ -353,11 +353,8 @@ void _ingest_binlog(StorageEngine& engine, IngestBinlogArg* arg) { std::vector segment_index_file_names; auto tablet_schema = rowset_meta->tablet_schema(); if (tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : tablet_schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : tablet_schema->inverted_indexes()) { + auto index_id = index->index_id(); for (int64_t segment_index = 0; segment_index < num_segments; ++segment_index) { auto get_segment_index_file_size_url = fmt::format( "{}?method={}&tablet_id={}&rowset_id={}&segment_index={}&segment_index_id={" @@ -379,7 +376,7 @@ void _ingest_binlog(StorageEngine& engine, IngestBinlogArg* arg) { rowset_meta->rowset_id().to_string(), segment_index); segment_index_file_names.push_back(InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), index_id, - index.get_index_suffix())); + index->get_index_suffix())); status = HttpClient::execute_with_retry(max_retry, 1, get_segment_index_file_size_cb); @@ -802,6 +799,11 @@ void BaseBackendService::submit_routine_load_task(TStatus& t_status, void BaseBackendService::open_scanner(TScanOpenResult& result_, const TScanOpenParams& params) { TStatus t_status; TUniqueId fragment_instance_id = generate_uuid(); + // A query_id is randomly generated to replace t_query_plan_info.query_id. + // external query does not need to report anything to FE, so the query_id can be changed. + // Otherwise, multiple independent concurrent open tablet scanners have the same query_id. + // when one of the scanners ends, the other scanners will be canceled through FragmentMgr.cancel(query_id). + TUniqueId query_id = generate_uuid(); std::shared_ptr p_context; static_cast(_exec_env->external_scan_context_mgr()->create_scan_context(&p_context)); p_context->fragment_instance_id = fragment_instance_id; @@ -838,13 +840,18 @@ void BaseBackendService::open_scanner(TScanOpenResult& result_, const TScanOpenP << " deserialize error, should not be modified after returned Doris FE processed"; exec_st = Status::InvalidArgument(msg.str()); } - p_context->query_id = t_query_plan_info.query_id; + p_context->query_id = query_id; } std::vector selected_columns; if (exec_st.ok()) { // start the scan procedure + LOG(INFO) << fmt::format( + "exec external scanner, old_query_id = {}, new_query_id = {}, fragment_instance_id " + "= {}", + print_id(t_query_plan_info.query_id), print_id(query_id), + print_id(fragment_instance_id)); exec_st = _exec_env->fragment_mgr()->exec_external_plan_fragment( - params, t_query_plan_info, fragment_instance_id, &selected_columns); + params, t_query_plan_info, query_id, fragment_instance_id, &selected_columns); } exec_st.to_thrift(&t_status); //return status diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index 9330867ded65a11..e7b920796a1b985 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -203,7 +203,20 @@ Status HttpService::start() { static_cast(PprofActions::setup(_env, _ev_http_server.get(), _pool)); // register jeprof actions - static_cast(JeprofileActions::setup(_env, _ev_http_server.get(), _pool)); + SetJeHeapProfileActiveActions* set_jeheap_profile_active_action = + _pool.add(new SetJeHeapProfileActiveActions(_env)); + _ev_http_server->register_handler(HttpMethod::GET, "/jeheap/active/{prof_value}", + set_jeheap_profile_active_action); + + DumpJeHeapProfileToDotActions* dump_jeheap_profile_to_dot_action = + _pool.add(new DumpJeHeapProfileToDotActions(_env)); + _ev_http_server->register_handler(HttpMethod::GET, "/jeheap/dump", + dump_jeheap_profile_to_dot_action); + + DumpJeHeapProfileActions* dump_jeheap_profile_action = + _pool.add(new DumpJeHeapProfileActions(_env)); + _ev_http_server->register_handler(HttpMethod::GET, "/jeheap/dump_only", + dump_jeheap_profile_action); // register metrics { @@ -374,7 +387,7 @@ void HttpService::register_local_handler(StorageEngine& engine) { _ev_http_server->register_handler(HttpMethod::POST, "/api/pad_rowset", pad_rowset_action); ReportAction* report_tablet_action = _pool.add(new ReportAction( - _env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN, "REPORT_OLAP_TABLE")); + _env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN, "REPORT_OLAP_TABLET")); _ev_http_server->register_handler(HttpMethod::GET, "/api/report/tablet", report_tablet_action); ReportAction* report_disk_action = _pool.add(new ReportAction( diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 8217bd11bb91563..89b43ec52235013 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -886,13 +886,10 @@ void PInternalService::fetch_arrow_flight_schema(google::protobuf::RpcController Status PInternalService::_tablet_fetch_data(const PTabletKeyLookupRequest* request, PTabletKeyLookupResponse* response) { - PointQueryExecutor lookup_util; - RETURN_IF_ERROR(lookup_util.init(request, response)); - RETURN_IF_ERROR(lookup_util.lookup_up()); - if (VLOG_DEBUG_IS_ON) { - VLOG_DEBUG << lookup_util.print_profile(); - } - LOG_EVERY_N(INFO, 500) << lookup_util.print_profile(); + PointQueryExecutor executor; + RETURN_IF_ERROR(executor.init(request, response)); + RETURN_IF_ERROR(executor.lookup_up()); + executor.print_profile(); return Status::OK(); } @@ -1159,7 +1156,10 @@ void PInternalService::fetch_remote_tablet_schema(google::protobuf::RpcControlle LOG(WARNING) << "tablet does not exist, tablet id is " << tablet_id; continue; } - tablet_schemas.push_back(res.value()->merged_tablet_schema()); + auto schema = res.value()->merged_tablet_schema(); + if (schema != nullptr) { + tablet_schemas.push_back(schema); + } } if (!tablet_schemas.empty()) { // merge all diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index 9719a672b8dff49..74dab4663403301 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #include "olap/olap_tuple.h" #include "olap/row_cursor.h" #include "olap/rowset/beta_rowset.h" +#include "olap/rowset/rowset_fwd.h" #include "olap/storage_engine.h" #include "olap/tablet_manager.h" #include "olap/tablet_schema.h" @@ -313,34 +315,48 @@ Status PointQueryExecutor::lookup_up() { return Status::OK(); } -std::string PointQueryExecutor::print_profile() { +void PointQueryExecutor::print_profile() { auto init_us = _profile_metrics.init_ns.value() / 1000; auto init_key_us = _profile_metrics.init_key_ns.value() / 1000; auto lookup_key_us = _profile_metrics.lookup_key_ns.value() / 1000; auto lookup_data_us = _profile_metrics.lookup_data_ns.value() / 1000; auto output_data_us = _profile_metrics.output_data_ns.value() / 1000; + auto load_segments_key_us = _profile_metrics.load_segment_key_stage_ns.value() / 1000; + auto load_segments_data_us = _profile_metrics.load_segment_data_stage_ns.value() / 1000; auto total_us = init_us + lookup_key_us + lookup_data_us + output_data_us; auto read_stats = _profile_metrics.read_stats; - return fmt::format( - "" + const std::string stats_str = fmt::format( "[lookup profile:{}us] init:{}us, init_key:{}us," - "" - "" - "lookup_key:{}us, lookup_data:{}us, output_data:{}us, hit_lookup_cache:{}" - "" - "" + " lookup_key:{}us, load_segments_key:{}us, lookup_data:{}us, load_segments_data:{}us," + " output_data:{}us, " + "hit_lookup_cache:{}" ", is_binary_row:{}, output_columns:{}, total_keys:{}, row_cache_hits:{}" ", hit_cached_pages:{}, total_pages_read:{}, compressed_bytes_read:{}, " "io_latency:{}ns, " "uncompressed_bytes_read:{}, result_data_bytes:{}, row_hits:{}" - ", rs_column_uid:{}" - "", - total_us, init_us, init_key_us, lookup_key_us, lookup_data_us, output_data_us, - _profile_metrics.hit_lookup_cache, _binary_row_format, _reusable->output_exprs().size(), - _row_read_ctxs.size(), _profile_metrics.row_cache_hits, read_stats.cached_pages_num, + ", rs_column_uid:{}, bytes_read_from_local:{}, bytes_read_from_remote:{}, " + "local_io_timer:{}, remote_io_timer:{}, local_write_timer:{}", + total_us, init_us, init_key_us, lookup_key_us, load_segments_key_us, lookup_data_us, + load_segments_data_us, output_data_us, _profile_metrics.hit_lookup_cache, + _binary_row_format, _reusable->output_exprs().size(), _row_read_ctxs.size(), + _profile_metrics.row_cache_hits, read_stats.cached_pages_num, read_stats.total_pages_num, read_stats.compressed_bytes_read, read_stats.io_ns, read_stats.uncompressed_bytes_read, _profile_metrics.result_data_bytes, _row_hits, - _reusable->rs_column_uid()); + _reusable->rs_column_uid(), + _profile_metrics.read_stats.file_cache_stats.bytes_read_from_local, + _profile_metrics.read_stats.file_cache_stats.bytes_read_from_remote, + _profile_metrics.read_stats.file_cache_stats.local_io_timer, + _profile_metrics.read_stats.file_cache_stats.remote_io_timer, + _profile_metrics.read_stats.file_cache_stats.write_cache_io_timer); + + constexpr static int kSlowThreholdUs = 50 * 1000; // 50ms + if (total_us > kSlowThreholdUs) { + LOG(WARNING) << "slow query, " << stats_str; + } else if (VLOG_DEBUG_IS_ON) { + VLOG_DEBUG << stats_str; + } else { + LOG_EVERY_N(INFO, 1000) << stats_str; + } } Status PointQueryExecutor::_init_keys(const PTabletKeyLookupRequest* request) { @@ -380,6 +396,17 @@ Status PointQueryExecutor::_lookup_row_key() { specified_rowsets = _tablet->get_rowset_by_ids(nullptr); } std::vector> segment_caches(specified_rowsets.size()); + // init segment_cache + { + SCOPED_TIMER(&_profile_metrics.load_segment_key_stage_ns); + for (size_t i = 0; i < specified_rowsets.size(); i++) { + auto& rs = specified_rowsets[i]; + segment_caches[i] = std::make_unique(); + RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( + std::static_pointer_cast(rs), segment_caches[i].get(), true, true, + &_profile_metrics.read_stats)); + } + } for (size_t i = 0; i < _row_read_ctxs.size(); ++i) { RowLocation location; if (!config::disable_storage_row_cache) { @@ -396,7 +423,8 @@ Status PointQueryExecutor::_lookup_row_key() { auto rowset_ptr = std::make_unique(); st = (_tablet->lookup_row_key(_row_read_ctxs[i]._primary_key, nullptr, false, specified_rowsets, &location, INT32_MAX /*rethink?*/, - segment_caches, rowset_ptr.get(), false)); + segment_caches, rowset_ptr.get(), false, nullptr, + &_profile_metrics.read_stats)); if (st.is()) { continue; } @@ -459,7 +487,11 @@ Status PointQueryExecutor::_lookup_row_data() { BetaRowsetSharedPtr rowset = std::static_pointer_cast(_tablet->get_rowset(row_loc.rowset_id)); SegmentCacheHandle segment_cache; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); + { + SCOPED_TIMER(&_profile_metrics.load_segment_data_stage_ns); + RETURN_IF_ERROR( + SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); + } // find segment auto it = std::find_if(segment_cache.get_segments().cbegin(), segment_cache.get_segments().cend(), diff --git a/be/src/service/point_query_executor.h b/be/src/service/point_query_executor.h index b22dc5bfd1d73f3..89f4ecff9b137ac 100644 --- a/be/src/service/point_query_executor.h +++ b/be/src/service/point_query_executor.h @@ -276,12 +276,16 @@ struct Metrics { init_key_ns(TUnit::TIME_NS), lookup_key_ns(TUnit::TIME_NS), lookup_data_ns(TUnit::TIME_NS), - output_data_ns(TUnit::TIME_NS) {} + output_data_ns(TUnit::TIME_NS), + load_segment_key_stage_ns(TUnit::TIME_NS), + load_segment_data_stage_ns(TUnit::TIME_NS) {} RuntimeProfile::Counter init_ns; RuntimeProfile::Counter init_key_ns; RuntimeProfile::Counter lookup_key_ns; RuntimeProfile::Counter lookup_data_ns; RuntimeProfile::Counter output_data_ns; + RuntimeProfile::Counter load_segment_key_stage_ns; + RuntimeProfile::Counter load_segment_data_stage_ns; OlapReaderStatistics read_stats; size_t row_cache_hits = 0; bool hit_lookup_cache = false; @@ -297,7 +301,9 @@ class PointQueryExecutor { Status lookup_up(); - std::string print_profile(); + void print_profile(); + + const OlapReaderStatistics& read_stats() const { return _read_stats; } private: Status _init_keys(const PTabletKeyLookupRequest* request); diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp index 2c6ed52ddde65fa..0cbb6bcd0c8916d 100644 --- a/be/src/util/arrow/row_batch.cpp +++ b/be/src/util/arrow/row_batch.cpp @@ -157,17 +157,8 @@ Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptr* field, - const std::string& timezone) { - std::shared_ptr type; - RETURN_IF_ERROR(convert_to_arrow_type(desc->type(), &type, timezone)); - *field = arrow::field(desc->col_name(), type, desc->is_nullable()); - return Status::OK(); -} - -Status convert_block_arrow_schema(const vectorized::Block& block, - std::shared_ptr* result, - const std::string& timezone) { +Status get_arrow_schema(const vectorized::Block& block, std::shared_ptr* result, + const std::string& timezone) { std::vector> fields; for (const auto& type_and_name : block) { std::shared_ptr arrow_type; @@ -180,21 +171,6 @@ Status convert_block_arrow_schema(const vectorized::Block& block, return Status::OK(); } -Status convert_to_arrow_schema(const RowDescriptor& row_desc, - std::shared_ptr* result, - const std::string& timezone) { - std::vector> fields; - for (auto tuple_desc : row_desc.tuple_descriptors()) { - for (auto desc : tuple_desc->slots()) { - std::shared_ptr field; - RETURN_IF_ERROR(convert_to_arrow_field(desc, &field, timezone)); - fields.push_back(field); - } - } - *result = arrow::schema(std::move(fields)); - return Status::OK(); -} - Status convert_expr_ctxs_arrow_schema(const vectorized::VExprContextSPtrs& output_vexpr_ctxs, std::shared_ptr* result, const std::string& timezone) { diff --git a/be/src/util/arrow/row_batch.h b/be/src/util/arrow/row_batch.h index 9a33719a1cfbcce..3993003baf6e95c 100644 --- a/be/src/util/arrow/row_batch.h +++ b/be/src/util/arrow/row_batch.h @@ -45,12 +45,8 @@ Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptr* result, const std::string& timezone); - -Status convert_block_arrow_schema(const vectorized::Block& block, - std::shared_ptr* result, - const std::string& timezone); +Status get_arrow_schema(const vectorized::Block& block, std::shared_ptr* result, + const std::string& timezone); Status convert_expr_ctxs_arrow_schema(const vectorized::VExprContextSPtrs& output_vexpr_ctxs, std::shared_ptr* result, diff --git a/be/src/util/block_compression.cpp b/be/src/util/block_compression.cpp index d13c0c091b9ceda..d1788b0948a6f2c 100644 --- a/be/src/util/block_compression.cpp +++ b/be/src/util/block_compression.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include "common/config.h" diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index dc70b1c9f9c40b7..e9ac72c5ccdcb46 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -134,7 +134,7 @@ class HashUtil { static const uint32_t MURMUR3_32_SEED = 104729; // modify from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp - static uint32_t murmur_hash3_32(const void* key, int32_t len, uint32_t seed) { + static uint32_t murmur_hash3_32(const void* key, int64_t len, uint32_t seed) { uint32_t out = 0; murmur_hash3_x86_32(key, len, seed, &out); return out; @@ -227,7 +227,7 @@ class HashUtil { // Our hash function is MurmurHash2, 64 bit version. // It was modified in order to provide the same result in // big and little endian archs (endian neutral). - static uint64_t murmur_hash64A(const void* key, int32_t len, unsigned int seed) { + static uint64_t murmur_hash64A(const void* key, int64_t len, unsigned int seed) { const uint64_t m = MURMUR_PRIME; const int r = 47; uint64_t h = seed ^ (len * m); diff --git a/be/src/util/jni-util.cpp b/be/src/util/jni-util.cpp index 02d20ed9a4fe809..6ad0790ef0859ef 100644 --- a/be/src/util/jni-util.cpp +++ b/be/src/util/jni-util.cpp @@ -317,6 +317,7 @@ Status JniUtil::GetJniExceptionMsg(JNIEnv* env, bool log_stack, const string& pr } jobject JniUtil::convert_to_java_map(JNIEnv* env, const std::map& map) { + //TODO: ADD EXCEPTION CHECK. jclass hashmap_class = env->FindClass("java/util/HashMap"); jmethodID hashmap_constructor = env->GetMethodID(hashmap_class, "", "(I)V"); jobject hashmap_object = env->NewObject(hashmap_class, hashmap_constructor, map.size()); @@ -399,16 +400,26 @@ std::map JniUtil::convert_to_cpp_map(JNIEnv* env, jobj Status JniUtil::GetGlobalClassRef(JNIEnv* env, const char* class_str, jclass* class_ref) { *class_ref = NULL; - jclass local_cl = env->FindClass(class_str); - RETURN_ERROR_IF_EXC(env); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF(jclass, local_cl, env, FindClass(class_str)); RETURN_IF_ERROR(LocalToGlobalRef(env, local_cl, reinterpret_cast(class_ref))); - env->DeleteLocalRef(local_cl); - RETURN_ERROR_IF_EXC(env); return Status::OK(); } Status JniUtil::LocalToGlobalRef(JNIEnv* env, jobject local_ref, jobject* global_ref) { *global_ref = env->NewGlobalRef(local_ref); + // NewGlobalRef: + // Returns a global reference to the given obj. + // + //May return NULL if: + // obj refers to null + // the system has run out of memory + // obj was a weak global reference and has already been garbage collected + if (*global_ref == NULL) { + return Status::InternalError( + "LocalToGlobalRef fail,global ref is NULL,maybe the system has run out of memory."); + } + + //NewGlobalRef not throw exception,maybe we just need check NULL. RETURN_ERROR_IF_EXC(env); return Status::OK(); } diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h index 666a5e526dfbdaf..df332951afebb8a 100644 --- a/be/src/util/jni-util.h +++ b/be/src/util/jni-util.h @@ -28,6 +28,7 @@ #include "common/status.h" #include "jni_md.h" +#include "util/defer_op.h" #include "util/thrift_util.h" #ifdef USE_HADOOP_HDFS @@ -38,12 +39,25 @@ extern "C" JNIEnv* getJNIEnv(void); namespace doris { class JniUtil; -#define RETURN_ERROR_IF_EXC(env) \ - do { \ - jthrowable exc = (env)->ExceptionOccurred(); \ - if (exc != nullptr) return JniUtil::GetJniExceptionMsg(env); \ +#define RETURN_ERROR_IF_EXC(env) \ + do { \ + if (env->ExceptionCheck()) [[unlikely]] \ + return JniUtil::GetJniExceptionMsg(env); \ } while (false) +#define JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF(type, result, env, func) \ + type result = env->func; \ + DEFER(env->DeleteLocalRef(result)); \ + RETURN_ERROR_IF_EXC(env) + +#define JNI_CALL_METHOD_CHECK_EXCEPTION(type, result, env, func) \ + type result = env->func; \ + RETURN_ERROR_IF_EXC(env) + +//In order to reduce the potential risks caused by not handling exceptions, +// you need to refer to https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/functions.html +// to confirm whether the jni method will throw an exception. + class JniUtil { public: static Status Init() WARN_UNUSED_RESULT; @@ -65,6 +79,10 @@ class JniUtil { return Status::OK(); } + //jclass is generally a local reference. + //Method ID and field ID values are forever. + //If you want to use the jclass across multiple threads or multiple calls into the JNI code you need + // to create a global reference to it with GetGlobalClassRef(). static Status GetGlobalClassRef(JNIEnv* env, const char* class_str, jclass* class_ref) WARN_UNUSED_RESULT; diff --git a/be/src/util/jvm_metrics.cpp b/be/src/util/jvm_metrics.cpp index fc30d1073acdc6a..4cb71f5e827878d 100644 --- a/be/src/util/jvm_metrics.cpp +++ b/be/src/util/jvm_metrics.cpp @@ -22,7 +22,9 @@ #include #include "common/config.h" +#include "util/defer_op.h" #include "util/metrics.h" + namespace doris { #define DEFINE_JVM_SIZE_BYTES_METRIC(name, type) \ @@ -90,9 +92,13 @@ JvmMetrics::JvmMetrics(MetricRegistry* registry, JNIEnv* env) { break; } try { - _jvm_stats.init(env); + Status st = _jvm_stats.init(env); + if (!st) { + LOG(WARNING) << "jvm Stats Init Fail. " << st.to_string(); + break; + } } catch (...) { - LOG(WARNING) << "JVM STATS INIT FAIL"; + LOG(WARNING) << "jvm Stats Throw Exception Init Fail."; break; } if (!_jvm_stats.init_complete()) { @@ -133,21 +139,22 @@ JvmMetrics::JvmMetrics(MetricRegistry* registry, JNIEnv* env) { void JvmMetrics::update() { static long fail_count = 0; - bool have_exception = false; try { - _jvm_stats.refresh(this); + Status st = _jvm_stats.refresh(this); + if (!st) { + fail_count++; + LOG(WARNING) << "Jvm Stats update Fail! " << st.to_string(); + } else { + fail_count = 0; + } } catch (...) { - have_exception = true; - LOG(WARNING) << "JVM MONITOR UPDATE FAIL!"; + LOG(WARNING) << "Jvm Stats update throw Exception!"; fail_count++; } //When 30 consecutive exceptions occur, turn off jvm information collection. - if (!have_exception) { - fail_count = 0; - } if (fail_count >= 30) { - LOG(WARNING) << "JVM MONITOR CLOSE!"; + LOG(WARNING) << "Jvm Stats CLOSE!"; _jvm_stats.set_complete(false); _server_entity->deregister_hook(_s_hook_name); @@ -182,193 +189,257 @@ void JvmMetrics::update() { } } -void JvmStats::init(JNIEnv* ENV) { - env = ENV; - _managementFactoryClass = env->FindClass("java/lang/management/ManagementFactory"); - if (_managementFactoryClass == nullptr) { - LOG(WARNING) - << "Class java/lang/management/ManagementFactory Not Find.JVM monitoring fails."; - return; - } +Status JvmStats::init(JNIEnv* env) { + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/management/ManagementFactory", + &_managementFactoryClass)); - _getMemoryMXBeanMethod = env->GetStaticMethodID(_managementFactoryClass, "getMemoryMXBean", - "()Ljava/lang/management/MemoryMXBean;"); + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getMemoryMXBeanMethod, env, + GetStaticMethodID(_managementFactoryClass, "getMemoryMXBean", + "()Ljava/lang/management/MemoryMXBean;")); - _memoryUsageClass = env->FindClass("java/lang/management/MemoryUsage"); - if (_memoryUsageClass == nullptr) { - LOG(WARNING) << "Class java/lang/management/MemoryUsage Not Find.JVM monitoring fails."; - return; - } - _getMemoryUsageUsedMethod = env->GetMethodID(_memoryUsageClass, "getUsed", "()J"); - _getMemoryUsageCommittedMethod = env->GetMethodID(_memoryUsageClass, "getCommitted", "()J"); - _getMemoryUsageMaxMethod = env->GetMethodID(_memoryUsageClass, "getMax", "()J"); + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/management/MemoryUsage", + &_memoryUsageClass)); - _memoryMXBeanClass = env->FindClass("java/lang/management/MemoryMXBean"); - if (_memoryMXBeanClass == nullptr) { - LOG(WARNING) << "Class java/lang/management/MemoryMXBean Not Find.JVM monitoring fails."; - return; - } - _getHeapMemoryUsageMethod = env->GetMethodID(_memoryMXBeanClass, "getHeapMemoryUsage", - "()Ljava/lang/management/MemoryUsage;"); - _getNonHeapMemoryUsageMethod = env->GetMethodID(_memoryMXBeanClass, "getNonHeapMemoryUsage", - "()Ljava/lang/management/MemoryUsage;"); + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getMemoryUsageUsedMethod, env, + GetMethodID(_memoryUsageClass, "getUsed", "()J")); - _getMemoryPoolMXBeansMethod = env->GetStaticMethodID( - _managementFactoryClass, "getMemoryPoolMXBeans", "()Ljava/util/List;"); + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getMemoryUsageCommittedMethod, env, + GetMethodID(_memoryUsageClass, "getCommitted", "()J")); - _listClass = env->FindClass("java/util/List"); - if (_listClass == nullptr) { - LOG(WARNING) << "Class java/util/List Not Find.JVM monitoring fails."; - return; - } - _getListSizeMethod = env->GetMethodID(_listClass, "size", "()I"); - _getListUseIndexMethod = env->GetMethodID(_listClass, "get", "(I)Ljava/lang/Object;"); + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getMemoryUsageMaxMethod, env, + GetMethodID(_memoryUsageClass, "getMax", "()J")); - _memoryPoolMXBeanClass = env->FindClass("java/lang/management/MemoryPoolMXBean"); - if (_memoryPoolMXBeanClass == nullptr) { - LOG(WARNING) - << "Class java/lang/management/MemoryPoolMXBean Not Find.JVM monitoring fails."; - return; - } - _getMemoryPoolMXBeanUsageMethod = env->GetMethodID(_memoryPoolMXBeanClass, "getUsage", - "()Ljava/lang/management/MemoryUsage;"); - _getMemoryPollMXBeanPeakMethod = env->GetMethodID(_memoryPoolMXBeanClass, "getPeakUsage", - "()Ljava/lang/management/MemoryUsage;"); - _getMemoryPollMXBeanNameMethod = - env->GetMethodID(_memoryPoolMXBeanClass, "getName", "()Ljava/lang/String;"); - - _getThreadMXBeanMethod = env->GetStaticMethodID(_managementFactoryClass, "getThreadMXBean", - "()Ljava/lang/management/ThreadMXBean;"); - - _getGarbageCollectorMXBeansMethod = env->GetStaticMethodID( - _managementFactoryClass, "getGarbageCollectorMXBeans", "()Ljava/util/List;"); - - _garbageCollectorMXBeanClass = env->FindClass("java/lang/management/GarbageCollectorMXBean"); - if (_garbageCollectorMXBeanClass == nullptr) { - LOG(WARNING) << "Class java/lang/management/GarbageCollectorMXBean Not Find.JVM monitoring " - "fails."; - return; - } - _getGCNameMethod = - env->GetMethodID(_garbageCollectorMXBeanClass, "getName", "()Ljava/lang/String;"); - _getGCCollectionCountMethod = - env->GetMethodID(_garbageCollectorMXBeanClass, "getCollectionCount", "()J"); - _getGCCollectionTimeMethod = - env->GetMethodID(_garbageCollectorMXBeanClass, "getCollectionTime", "()J"); - - _threadMXBeanClass = env->FindClass("java/lang/management/ThreadMXBean"); - if (_threadMXBeanClass == nullptr) { - LOG(WARNING) << "Class java/lang/management/ThreadMXBean Not Find.JVM monitoring fails."; - return; - } - _getAllThreadIdsMethod = env->GetMethodID(_threadMXBeanClass, "getAllThreadIds", "()[J"); - _getThreadInfoMethod = env->GetMethodID(_threadMXBeanClass, "getThreadInfo", - "([JI)[Ljava/lang/management/ThreadInfo;"); - _getPeakThreadCountMethod = env->GetMethodID(_threadMXBeanClass, "getPeakThreadCount", "()I"); - - _threadInfoClass = env->FindClass("java/lang/management/ThreadInfo"); - if (_threadInfoClass == nullptr) { - LOG(WARNING) << "Class java/lang/management/ThreadInfo Not Find.JVM monitoring fails."; - return; - } + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/management/MemoryMXBean", + &_memoryMXBeanClass)); - _getThreadStateMethod = - env->GetMethodID(_threadInfoClass, "getThreadState", "()Ljava/lang/Thread$State;"); + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getHeapMemoryUsageMethod, env, + GetMethodID(_memoryMXBeanClass, "getHeapMemoryUsage", + "()Ljava/lang/management/MemoryUsage;")); + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getNonHeapMemoryUsageMethod, env, + GetMethodID(_memoryMXBeanClass, "getNonHeapMemoryUsage", + "()Ljava/lang/management/MemoryUsage;")); - _threadStateClass = env->FindClass("java/lang/Thread$State"); - if (_threadStateClass == nullptr) { - LOG(WARNING) << "Class java/lang/Thread$State Not Find.JVM monitoring fails."; - return; - } + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _getMemoryPoolMXBeansMethod, env, + GetStaticMethodID(_managementFactoryClass, "getMemoryPoolMXBeans", + "()Ljava/util/List;")); - jfieldID newThreadFieldID = - env->GetStaticFieldID(_threadStateClass, "NEW", "Ljava/lang/Thread$State;"); - jfieldID runnableThreadFieldID = - env->GetStaticFieldID(_threadStateClass, "RUNNABLE", "Ljava/lang/Thread$State;"); - jfieldID blockedThreadFieldID = - env->GetStaticFieldID(_threadStateClass, "BLOCKED", "Ljava/lang/Thread$State;"); - jfieldID waitingThreadFieldID = - env->GetStaticFieldID(_threadStateClass, "WAITING", "Ljava/lang/Thread$State;"); - jfieldID timedWaitingThreadFieldID = - env->GetStaticFieldID(_threadStateClass, "TIMED_WAITING", "Ljava/lang/Thread$State;"); - jfieldID terminatedThreadFieldID = - env->GetStaticFieldID(_threadStateClass, "TERMINATED", "Ljava/lang/Thread$State;"); - - _newThreadStateObj = env->GetStaticObjectField(_threadStateClass, newThreadFieldID); - _runnableThreadStateObj = env->GetStaticObjectField(_threadStateClass, runnableThreadFieldID); - _blockedThreadStateObj = env->GetStaticObjectField(_threadStateClass, blockedThreadFieldID); - _waitingThreadStateObj = env->GetStaticObjectField(_threadStateClass, waitingThreadFieldID); - _timedWaitingThreadStateObj = - env->GetStaticObjectField(_threadStateClass, timedWaitingThreadFieldID); - _terminatedThreadStateObj = - env->GetStaticObjectField(_threadStateClass, terminatedThreadFieldID); + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/util/List", &_listClass)); - LOG(INFO) << "Start JVM monitoring."; + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getListSizeMethod, env, + GetMethodID(_listClass, "size", "()I")); + + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getListUseIndexMethod, env, + GetMethodID(_listClass, "get", "(I)Ljava/lang/Object;")); + + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/management/MemoryPoolMXBean", + &_memoryPoolMXBeanClass)); + + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getMemoryPoolMXBeanUsageMethod, env, + GetMethodID(_memoryPoolMXBeanClass, "getUsage", + "()Ljava/lang/management/MemoryUsage;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getMemoryPollMXBeanPeakMethod, env, + GetMethodID(_memoryPoolMXBeanClass, "getPeakUsage", + "()Ljava/lang/management/MemoryUsage;")); + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _getMemoryPollMXBeanNameMethod, env, + GetMethodID(_memoryPoolMXBeanClass, "getName", "()Ljava/lang/String;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION(, _getThreadMXBeanMethod, env, + GetStaticMethodID(_managementFactoryClass, "getThreadMXBean", + "()Ljava/lang/management/ThreadMXBean;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _getGarbageCollectorMXBeansMethod, env, + GetStaticMethodID(_managementFactoryClass, "getGarbageCollectorMXBeans", + "()Ljava/util/List;")); + + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/management/GarbageCollectorMXBean", + &_garbageCollectorMXBeanClass)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _getGCNameMethod, env, + GetMethodID(_garbageCollectorMXBeanClass, "getName", "()Ljava/lang/String;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _getGCCollectionCountMethod, env, + GetMethodID(_garbageCollectorMXBeanClass, "getCollectionCount", "()J")); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _getGCCollectionTimeMethod, env, + GetMethodID(_garbageCollectorMXBeanClass, "getCollectionTime", "()J")); + + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/management/ThreadMXBean", + &_threadMXBeanClass)); + + JNI_CALL_METHOD_CHECK_EXCEPTION(, + + _getAllThreadIdsMethod, env, + GetMethodID(_threadMXBeanClass, "getAllThreadIds", "()[J")); + + JNI_CALL_METHOD_CHECK_EXCEPTION(, + + _getThreadInfoMethod, env, + GetMethodID(_threadMXBeanClass, "getThreadInfo", + "([JI)[Ljava/lang/management/ThreadInfo;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION(, + + _getPeakThreadCountMethod, env, + GetMethodID(_threadMXBeanClass, "getPeakThreadCount", "()I")); + + RETURN_IF_ERROR( + JniUtil::GetGlobalClassRef(env, "java/lang/management/ThreadInfo", &_threadInfoClass)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , + + _getThreadStateMethod, env, + GetMethodID(_threadInfoClass, "getThreadState", "()Ljava/lang/Thread$State;")); + + RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/Thread$State", &_threadStateClass)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + jfieldID, newThreadFieldID, env, + GetStaticFieldID(_threadStateClass, "NEW", "Ljava/lang/Thread$State;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + jfieldID, runnableThreadFieldID, env, + GetStaticFieldID(_threadStateClass, "RUNNABLE", "Ljava/lang/Thread$State;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + jfieldID, blockedThreadFieldID, env, + GetStaticFieldID(_threadStateClass, "BLOCKED", "Ljava/lang/Thread$State;")); + JNI_CALL_METHOD_CHECK_EXCEPTION( + jfieldID, waitingThreadFieldID, env, + GetStaticFieldID(_threadStateClass, "WAITING", "Ljava/lang/Thread$State;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + jfieldID, timedWaitingThreadFieldID, env, + GetStaticFieldID(_threadStateClass, "TIMED_WAITING", "Ljava/lang/Thread$State;")); + JNI_CALL_METHOD_CHECK_EXCEPTION( + jfieldID, terminatedThreadFieldID, env, + GetStaticFieldID(_threadStateClass, "TERMINATED", "Ljava/lang/Thread$State;")); + + JNI_CALL_METHOD_CHECK_EXCEPTION(jobject, newThreadStateObj, env, + GetStaticObjectField(_threadStateClass, newThreadFieldID)); + RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, newThreadStateObj, &_newThreadStateObj)); + + JNI_CALL_METHOD_CHECK_EXCEPTION(jobject, runnableThreadStateObj, env, + GetStaticObjectField(_threadStateClass, runnableThreadFieldID)); + RETURN_IF_ERROR( + JniUtil::LocalToGlobalRef(env, runnableThreadStateObj, &_runnableThreadStateObj)); + + JNI_CALL_METHOD_CHECK_EXCEPTION(jobject, blockedThreadStateObj, env, + GetStaticObjectField(_threadStateClass, blockedThreadFieldID)); + RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, blockedThreadStateObj, &_blockedThreadStateObj)); + + JNI_CALL_METHOD_CHECK_EXCEPTION(jobject, waitingThreadStateObj, env, + GetStaticObjectField(_threadStateClass, waitingThreadFieldID)); + RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, waitingThreadStateObj, &_waitingThreadStateObj)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + jobject, timedWaitingThreadStateObj, env, + GetStaticObjectField(_threadStateClass, timedWaitingThreadFieldID)); + RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, timedWaitingThreadStateObj, + &_timedWaitingThreadStateObj)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + jobject, terminatedThreadStateObj, env, + GetStaticObjectField(_threadStateClass, terminatedThreadFieldID)); + RETURN_IF_ERROR( + JniUtil::LocalToGlobalRef(env, terminatedThreadStateObj, &_terminatedThreadStateObj)); _init_complete = true; - return; + + LOG(INFO) << "Start JVM monitoring."; + return Status::OK(); } -void JvmStats::refresh(JvmMetrics* jvm_metrics) { +Status JvmStats::refresh(JvmMetrics* jvm_metrics) const { if (!_init_complete) { - return; + return Status::InternalError("Jvm Stats not init complete."); } - Status st = JniUtil::GetJNIEnv(&env); - if (!st.ok()) { - LOG(WARNING) << "JVM STATS GET JNI ENV FAIL"; - return; - } + JNIEnv* env = nullptr; + RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); + + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, memoryMXBeanObj, env, + CallStaticObjectMethod(_managementFactoryClass, _getMemoryMXBeanMethod)); + + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, heapMemoryUsageObj, env, + CallObjectMethod(memoryMXBeanObj, _getHeapMemoryUsageMethod)); - jobject memoryMXBeanObj = - env->CallStaticObjectMethod(_managementFactoryClass, _getMemoryMXBeanMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION(jlong, heapMemoryUsed, env, + CallLongMethod(heapMemoryUsageObj, _getMemoryUsageUsedMethod)); - jobject heapMemoryUsageObj = env->CallObjectMethod(memoryMXBeanObj, _getHeapMemoryUsageMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION( + jlong, heapMemoryCommitted, env, + CallLongMethod(heapMemoryUsageObj, _getMemoryUsageCommittedMethod)); - jlong heapMemoryUsed = env->CallLongMethod(heapMemoryUsageObj, _getMemoryUsageUsedMethod); - jlong heapMemoryCommitted = - env->CallLongMethod(heapMemoryUsageObj, _getMemoryUsageCommittedMethod); - jlong heapMemoryMax = env->CallLongMethod(heapMemoryUsageObj, _getMemoryUsageMaxMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION(jlong, heapMemoryMax, env, + CallLongMethod(heapMemoryUsageObj, _getMemoryUsageMaxMethod)); jvm_metrics->jvm_heap_size_bytes_used->set_value(heapMemoryUsed < 0 ? 0 : heapMemoryUsed); jvm_metrics->jvm_heap_size_bytes_committed->set_value( heapMemoryCommitted < 0 ? 0 : heapMemoryCommitted); jvm_metrics->jvm_heap_size_bytes_max->set_value(heapMemoryMax < 0 ? 0 : heapMemoryMax); - jobject nonHeapMemoryUsageObj = - env->CallObjectMethod(memoryMXBeanObj, _getNonHeapMemoryUsageMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, nonHeapMemoryUsageObj, env, + CallObjectMethod(memoryMXBeanObj, _getNonHeapMemoryUsageMethod)); - jlong nonHeapMemoryCommitted = - env->CallLongMethod(nonHeapMemoryUsageObj, _getMemoryUsageCommittedMethod); - jlong nonHeapMemoryUsed = env->CallLongMethod(nonHeapMemoryUsageObj, _getMemoryUsageUsedMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION( + jlong, nonHeapMemoryCommitted, env, + CallLongMethod(nonHeapMemoryUsageObj, _getMemoryUsageCommittedMethod)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + jlong, nonHeapMemoryUsed, env, + CallLongMethod(nonHeapMemoryUsageObj, _getMemoryUsageUsedMethod)); jvm_metrics->jvm_non_heap_size_bytes_committed->set_value( nonHeapMemoryCommitted < 0 ? 0 : nonHeapMemoryCommitted); jvm_metrics->jvm_non_heap_size_bytes_used->set_value(nonHeapMemoryUsed < 0 ? 0 : nonHeapMemoryUsed); - jobject memoryPoolMXBeansList = - env->CallStaticObjectMethod(_managementFactoryClass, _getMemoryPoolMXBeansMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, memoryPoolMXBeansList, env, + CallStaticObjectMethod(_managementFactoryClass, _getMemoryPoolMXBeansMethod)); - jint size = env->CallIntMethod(memoryPoolMXBeansList, _getListSizeMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION(jint, size, env, + CallIntMethod(memoryPoolMXBeansList, _getListSizeMethod)); for (int i = 0; i < size; ++i) { - jobject memoryPoolMXBean = - env->CallObjectMethod(memoryPoolMXBeansList, _getListUseIndexMethod, i); - jobject usageObject = - env->CallObjectMethod(memoryPoolMXBean, _getMemoryPoolMXBeanUsageMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, memoryPoolMXBean, env, + CallObjectMethod(memoryPoolMXBeansList, _getListUseIndexMethod, i)); + + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, usageObject, env, + CallObjectMethod(memoryPoolMXBean, _getMemoryPoolMXBeanUsageMethod)); + + JNI_CALL_METHOD_CHECK_EXCEPTION(jlong, used, env, + CallLongMethod(usageObject, _getMemoryUsageUsedMethod)); + + JNI_CALL_METHOD_CHECK_EXCEPTION(jlong, max, env, + CallLongMethod(usageObject, _getMemoryUsageMaxMethod)); - jlong used = env->CallLongMethod(usageObject, _getMemoryUsageUsedMethod); - jlong max = env->CallLongMethod(usageObject, _getMemoryUsageMaxMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, peakUsageObject, env, + CallObjectMethod(memoryPoolMXBean, _getMemoryPollMXBeanPeakMethod)); - jobject peakUsageObject = - env->CallObjectMethod(memoryPoolMXBean, _getMemoryPollMXBeanPeakMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION(jlong, peakUsed, env, + CallLongMethod(peakUsageObject, _getMemoryUsageUsedMethod)); - jlong peakUsed = env->CallLongMethod(peakUsageObject, _getMemoryUsageUsedMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, name, env, + CallObjectMethod(memoryPoolMXBean, _getMemoryPollMXBeanNameMethod)); - jstring name = - (jstring)env->CallObjectMethod(memoryPoolMXBean, _getMemoryPollMXBeanNameMethod); - const char* nameStr = env->GetStringUTFChars(name, nullptr); + const char* nameStr = env->GetStringUTFChars( + (jstring)name, nullptr); // GetStringUTFChars not throw exception if (nameStr != nullptr) { auto it = _memoryPoolName.find(nameStr); if (it == _memoryPoolName.end()) { @@ -385,36 +456,46 @@ void JvmStats::refresh(JvmMetrics* jvm_metrics) { jvm_metrics->jvm_old_size_bytes_max->set_value(max < 0 ? 0 : max); } - env->ReleaseStringUTFChars(name, nameStr); + env->ReleaseStringUTFChars((jstring)name, + nameStr); // ReleaseStringUTFChars not throw exception } - env->DeleteLocalRef(memoryPoolMXBean); - env->DeleteLocalRef(usageObject); - env->DeleteLocalRef(peakUsageObject); } + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, threadMXBean, env, + CallStaticObjectMethod(_managementFactoryClass, _getThreadMXBeanMethod)); - jobject threadMXBean = - env->CallStaticObjectMethod(_managementFactoryClass, _getThreadMXBeanMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, threadIdsObject, env, CallObjectMethod(threadMXBean, _getAllThreadIdsMethod)); - jlongArray threadIds = (jlongArray)env->CallObjectMethod(threadMXBean, _getAllThreadIdsMethod); - jint threadCount = env->GetArrayLength(threadIds); + auto threadIds = (jlongArray)threadIdsObject; - jobjectArray threadInfos = - (jobjectArray)env->CallObjectMethod(threadMXBean, _getThreadInfoMethod, threadIds, 0); + JNI_CALL_METHOD_CHECK_EXCEPTION(jint, threadCount, env, GetArrayLength(threadIds)); + + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, threadInfos, env, + CallObjectMethod(threadMXBean, _getThreadInfoMethod, (jlongArray)threadIds, 0)); int threadsNew = 0, threadsRunnable = 0, threadsBlocked = 0, threadsWaiting = 0, threadsTimedWaiting = 0, threadsTerminated = 0; - jint peakThreadCount = env->CallIntMethod(threadMXBean, _getPeakThreadCountMethod); + + JNI_CALL_METHOD_CHECK_EXCEPTION(jint, peakThreadCount, env, + CallIntMethod(threadMXBean, _getPeakThreadCountMethod)); jvm_metrics->jvm_thread_peak_count->set_value(peakThreadCount < 0 ? 0 : peakThreadCount); jvm_metrics->jvm_thread_count->set_value(threadCount < 0 ? 0 : threadCount); for (int i = 0; i < threadCount; i++) { - jobject threadInfo = env->GetObjectArrayElement(threadInfos, i); + JNI_CALL_METHOD_CHECK_EXCEPTION(jobject, threadInfo, env, + GetObjectArrayElement((jobjectArray)threadInfos, i)); + if (threadInfo == nullptr) { continue; } - jobject threadState = env->CallObjectMethod(threadInfo, _getThreadStateMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, threadState, env, CallObjectMethod(threadInfo, _getThreadStateMethod)); + + //IsSameObject not throw exception if (env->IsSameObject(threadState, _newThreadStateObj)) { threadsNew++; } else if (env->IsSameObject(threadState, _runnableThreadStateObj)) { @@ -428,8 +509,6 @@ void JvmStats::refresh(JvmMetrics* jvm_metrics) { } else if (env->IsSameObject(threadState, _terminatedThreadStateObj)) { threadsTerminated++; } - env->DeleteLocalRef(threadInfo); - env->DeleteLocalRef(threadState); } jvm_metrics->jvm_thread_new_count->set_value(threadsNew < 0 ? 0 : threadsNew); @@ -441,18 +520,27 @@ void JvmStats::refresh(JvmMetrics* jvm_metrics) { jvm_metrics->jvm_thread_terminated_count->set_value(threadsTerminated < 0 ? 0 : threadsTerminated); - jobject gcMXBeansList = - env->CallStaticObjectMethod(_managementFactoryClass, _getGarbageCollectorMXBeansMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, gcMXBeansList, env, + CallStaticObjectMethod(_managementFactoryClass, _getGarbageCollectorMXBeansMethod)); - jint numCollectors = env->CallIntMethod(gcMXBeansList, _getListSizeMethod); + JNI_CALL_METHOD_CHECK_EXCEPTION(jint, numCollectors, env, + CallIntMethod(gcMXBeansList, _getListSizeMethod)); for (int i = 0; i < numCollectors; i++) { - jobject gcMXBean = env->CallObjectMethod(gcMXBeansList, _getListUseIndexMethod, i); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, gcMXBean, env, CallObjectMethod(gcMXBeansList, _getListUseIndexMethod, i)); + + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF(jobject, gcName, env, + CallObjectMethod(gcMXBean, _getGCNameMethod)); - jstring gcName = (jstring)env->CallObjectMethod(gcMXBean, _getGCNameMethod); - jlong gcCollectionCount = env->CallLongMethod(gcMXBean, _getGCCollectionCountMethod); - jlong gcCollectionTime = env->CallLongMethod(gcMXBean, _getGCCollectionTimeMethod); - const char* gcNameStr = env->GetStringUTFChars(gcName, NULL); + JNI_CALL_METHOD_CHECK_EXCEPTION(jlong, gcCollectionCount, env, + CallLongMethod(gcMXBean, _getGCCollectionCountMethod)); + + JNI_CALL_METHOD_CHECK_EXCEPTION(jlong, gcCollectionTime, env, + CallLongMethod(gcMXBean, _getGCCollectionTimeMethod)); + + const char* gcNameStr = env->GetStringUTFChars((jstring)gcName, NULL); if (gcNameStr != nullptr) { if (strcmp(gcNameStr, "G1 Young Generation") == 0) { jvm_metrics->jvm_gc_g1_young_generation_count->set_value(gcCollectionCount); @@ -463,31 +551,40 @@ void JvmStats::refresh(JvmMetrics* jvm_metrics) { jvm_metrics->jvm_gc_g1_old_generation_time_ms->set_value(gcCollectionTime); } - env->ReleaseStringUTFChars(gcName, gcNameStr); + env->ReleaseStringUTFChars((jstring)gcName, gcNameStr); } - env->DeleteLocalRef(gcMXBean); } - env->DeleteLocalRef(memoryMXBeanObj); - env->DeleteLocalRef(heapMemoryUsageObj); - env->DeleteLocalRef(nonHeapMemoryUsageObj); - env->DeleteLocalRef(memoryPoolMXBeansList); - env->DeleteLocalRef(threadMXBean); - env->DeleteLocalRef(gcMXBeansList); + + return Status::OK(); } JvmStats::~JvmStats() { if (!_init_complete) { return; } try { - env->DeleteLocalRef(_newThreadStateObj); - env->DeleteLocalRef(_runnableThreadStateObj); - env->DeleteLocalRef(_blockedThreadStateObj); - env->DeleteLocalRef(_waitingThreadStateObj); - env->DeleteLocalRef(_timedWaitingThreadStateObj); - env->DeleteLocalRef(_terminatedThreadStateObj); + JNIEnv* env = nullptr; + Status st = JniUtil::GetJNIEnv(&env); + if (!st.ok()) { + return; + } + env->DeleteGlobalRef(_managementFactoryClass); + env->DeleteGlobalRef(_memoryUsageClass); + env->DeleteGlobalRef(_memoryMXBeanClass); + env->DeleteGlobalRef(_listClass); + env->DeleteGlobalRef(_memoryPoolMXBeanClass); + env->DeleteGlobalRef(_threadMXBeanClass); + env->DeleteGlobalRef(_threadInfoClass); + env->DeleteGlobalRef(_threadStateClass); + env->DeleteGlobalRef(_garbageCollectorMXBeanClass); + + env->DeleteGlobalRef(_newThreadStateObj); + env->DeleteGlobalRef(_runnableThreadStateObj); + env->DeleteGlobalRef(_blockedThreadStateObj); + env->DeleteGlobalRef(_waitingThreadStateObj); + env->DeleteGlobalRef(_timedWaitingThreadStateObj); + env->DeleteGlobalRef(_terminatedThreadStateObj); } catch (...) { - // When be is killed, DeleteLocalRef may fail. // In order to exit more gracefully, we catch the exception here. } } diff --git a/be/src/util/jvm_metrics.h b/be/src/util/jvm_metrics.h index 459a3cbf938f79b..78346c022b0abae 100644 --- a/be/src/util/jvm_metrics.h +++ b/be/src/util/jvm_metrics.h @@ -27,7 +27,6 @@ class JvmMetrics; class JvmStats { private: - JNIEnv* env = nullptr; jclass _managementFactoryClass = nullptr; jmethodID _getMemoryMXBeanMethod = nullptr; jclass _memoryUsageClass = nullptr; @@ -96,11 +95,10 @@ class JvmStats { bool _init_complete = false; public: - // JvmStats(JNIEnv* ENV); - void init(JNIEnv* ENV); + Status init(JNIEnv* env); bool init_complete() const { return _init_complete; } void set_complete(bool val) { _init_complete = val; } - void refresh(JvmMetrics* jvm_metrics); + Status refresh(JvmMetrics* jvm_metrics) const; ~JvmStats(); }; diff --git a/be/src/util/murmur_hash3.cpp b/be/src/util/murmur_hash3.cpp index 96568d6978e2251..edd1c44f3384737 100644 --- a/be/src/util/murmur_hash3.cpp +++ b/be/src/util/murmur_hash3.cpp @@ -85,7 +85,7 @@ FORCE_INLINE uint64_t fmix64(uint64_t k) { //----------------------------------------------------------------------------- -void murmur_hash3_x86_32(const void* key, int len, uint32_t seed, void* out) { +void murmur_hash3_x86_32(const void* key, int64_t len, uint32_t seed, void* out) { const uint8_t* data = (const uint8_t*)key; const int nblocks = len / 4; @@ -435,7 +435,7 @@ void murmur_hash3_x64_128(const void* key, const int len, const uint32_t seed, v ((uint64_t*)out)[1] = h2; } -void murmur_hash3_x64_64(const void* key, const int len, const uint64_t seed, void* out) { +void murmur_hash3_x64_64(const void* key, const int64_t len, const uint64_t seed, void* out) { const uint8_t* data = (const uint8_t*)key; const int nblocks = len / 8; uint64_t h1 = seed; diff --git a/be/src/util/murmur_hash3.h b/be/src/util/murmur_hash3.h index c8e8964bf6a20e1..249966460221a36 100644 --- a/be/src/util/murmur_hash3.h +++ b/be/src/util/murmur_hash3.h @@ -25,12 +25,12 @@ typedef unsigned __int64 uint64_t; //----------------------------------------------------------------------------- -void murmur_hash3_x86_32(const void* key, int len, uint32_t seed, void* out); +void murmur_hash3_x86_32(const void* key, int64_t len, uint32_t seed, void* out); void murmur_hash3_x86_128(const void* key, int len, uint32_t seed, void* out); void murmur_hash3_x64_128(const void* key, int len, uint32_t seed, void* out); -void murmur_hash3_x64_64(const void* key, int len, uint64_t seed, void* out); +void murmur_hash3_x64_64(const void* key, int64_t len, uint64_t seed, void* out); //----------------------------------------------------------------------------- diff --git a/be/src/util/mysql_row_buffer.cpp b/be/src/util/mysql_row_buffer.cpp index 4823920508a9404..3e20a2d9de72fec 100644 --- a/be/src/util/mysql_row_buffer.cpp +++ b/be/src/util/mysql_row_buffer.cpp @@ -87,9 +87,9 @@ MysqlRowBuffer::MysqlRowBuffer() _len_pos(0) {} template -void MysqlRowBuffer::start_binary_row(uint32_t num_cols) { +void MysqlRowBuffer::start_binary_row(uint64_t num_cols) { assert(is_binary_format); - int bit_fields = (num_cols + 9) / 8; + auto bit_fields = (num_cols + 9) / 8; reserve(bit_fields + 1); memset(_pos, 0, 1 + bit_fields); _pos += bit_fields + 1; diff --git a/be/src/util/mysql_row_buffer.h b/be/src/util/mysql_row_buffer.h index b740efa7764ed1f..50b17c91c170ca8 100644 --- a/be/src/util/mysql_row_buffer.h +++ b/be/src/util/mysql_row_buffer.h @@ -62,7 +62,7 @@ class MysqlRowBuffer { // Prepare for binary row buffer // init bitmap - void start_binary_row(uint32_t num_cols); + void start_binary_row(uint64_t num_cols); // TODO(zhaochun): add signed/unsigned support int push_tinyint(int8_t data); diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp index b2f4cdc3ce78857..18058469ee45660 100644 --- a/be/src/util/s3_util.cpp +++ b/be/src/util/s3_util.cpp @@ -401,15 +401,15 @@ S3Conf S3Conf::get_s3_conf(const cloud::ObjectStoreInfoPB& info) { S3Conf ret { .bucket = info.bucket(), .prefix = info.prefix(), - .client_conf { - .endpoint = info.endpoint(), - .region = info.region(), - .ak = info.ak(), - .sk = info.sk(), - .token {}, - .bucket = info.bucket(), - .provider = io::ObjStorageType::AWS, - }, + .client_conf {.endpoint = info.endpoint(), + .region = info.region(), + .ak = info.ak(), + .sk = info.sk(), + .token {}, + .bucket = info.bucket(), + .provider = io::ObjStorageType::AWS, + .use_virtual_addressing = + info.has_use_path_style() ? !info.use_path_style() : true}, .sse_enabled = info.sse_enabled(), }; diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h index 7e2e7c8202569d2..5953c651dc6f782 100644 --- a/be/src/util/simd/bits.h +++ b/be/src/util/simd/bits.h @@ -19,6 +19,7 @@ #include #include +#include #include #if defined(__ARM_NEON) && defined(__aarch64__) @@ -27,8 +28,7 @@ #include "util/sse_util.hpp" -namespace doris { -namespace simd { +namespace doris::simd { consteval auto bits_mask_length() { #if defined(__ARM_NEON) && defined(__aarch64__) @@ -70,7 +70,7 @@ inline uint64_t bytes16_mask_to_bits64_mask(const uint8_t* data) { inline uint32_t bytes32_mask_to_bits32_mask(const uint8_t* data) { #ifdef __AVX2__ auto zero32 = _mm256_setzero_si256(); - uint32_t mask = static_cast(_mm256_movemask_epi8( + auto mask = static_cast(_mm256_movemask_epi8( _mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(data)), zero32))); #elif defined(__SSE2__) auto zero16 = _mm_setzero_si128(); @@ -125,8 +125,10 @@ void iterate_through_bits_mask(Func func, decltype(bytes_mask_to_bits_mask(nullp #endif } -inline size_t count_zero_num(const int8_t* __restrict data, size_t size) { - size_t num = 0; +template + requires requires { std::is_unsigned_v; } +inline T count_zero_num(const int8_t* __restrict data, T size) { + T num = 0; const int8_t* end = data + size; #if defined(__SSE2__) && defined(__POPCNT__) const __m128i zero16 = _mm_setzero_si128(); @@ -138,13 +140,13 @@ inline size_t count_zero_num(const int8_t* __restrict data, size_t size) { _mm_loadu_si128(reinterpret_cast(data)), zero16))) | (static_cast(_mm_movemask_epi8(_mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(data + 16)), zero16))) - << 16u) | + << 16U) | (static_cast(_mm_movemask_epi8(_mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(data + 32)), zero16))) - << 32u) | + << 32U) | (static_cast(_mm_movemask_epi8(_mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(data + 48)), zero16))) - << 48u)); + << 48U)); } #endif for (; data < end; ++data) { @@ -153,9 +155,10 @@ inline size_t count_zero_num(const int8_t* __restrict data, size_t size) { return num; } -inline size_t count_zero_num(const int8_t* __restrict data, const uint8_t* __restrict null_map, - size_t size) { - size_t num = 0; +template + requires requires { std::is_unsigned_v; } +inline T count_zero_num(const int8_t* __restrict data, const uint8_t* __restrict null_map, T size) { + T num = 0; const int8_t* end = data + size; #if defined(__SSE2__) && defined(__POPCNT__) const __m128i zero16 = _mm_setzero_si128(); @@ -172,19 +175,19 @@ inline size_t count_zero_num(const int8_t* __restrict data, const uint8_t* __res _mm_loadu_si128(reinterpret_cast(data + 16)), zero16), _mm_loadu_si128(reinterpret_cast(null_map + 16))))) - << 16u) | + << 16U) | (static_cast(_mm_movemask_epi8(_mm_or_si128( _mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(data + 32)), zero16), _mm_loadu_si128(reinterpret_cast(null_map + 32))))) - << 32u) | + << 32U) | (static_cast(_mm_movemask_epi8(_mm_or_si128( _mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(data + 48)), zero16), _mm_loadu_si128(reinterpret_cast(null_map + 48))))) - << 48u)); + << 48U)); } #endif for (; data < end; ++data, ++null_map) { @@ -235,5 +238,4 @@ inline size_t find_zero(const std::vector& vec, size_t start) { return find_byte(vec, start, 0); } -} // namespace simd -} // namespace doris +} // namespace doris::simd diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index 67ab41cc1c7bee6..5771434c4c6321a 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -128,7 +128,7 @@ class StringParser { // Convert a string s representing a number in given base into a decimal number. template - static inline T string_to_int(const char* __restrict s, int len, int base, + static inline T string_to_int(const char* __restrict s, int64_t len, int base, ParseResult* result) { T ans = string_to_int_internal(s, len, base, result); if (LIKELY(*result == PARSE_SUCCESS)) { @@ -207,7 +207,7 @@ class StringParser { // Convert a string s representing a number in given base into a decimal number. // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. template - static inline T string_to_int_internal(const char* __restrict s, int len, int base, + static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base, ParseResult* result); // Converts an ascii string to an integer of type T assuming it cannot overflow @@ -385,7 +385,7 @@ T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int le } template -T StringParser::string_to_int_internal(const char* __restrict s, int len, int base, +T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base, ParseResult* result) { typedef typename std::make_unsigned::type UnsignedT; UnsignedT val = 0; diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h index cd1f8922e1b4595..39de0324d1415fb 100644 --- a/be/src/vec/aggregate_functions/aggregate_function.h +++ b/be/src/vec/aggregate_functions/aggregate_function.h @@ -20,6 +20,8 @@ #pragma once +#include "common/exception.h" +#include "common/status.h" #include "util/defer_op.h" #include "vec/columns/column_complex.h" #include "vec/columns/column_string.h" @@ -30,6 +32,7 @@ #include "vec/core/column_numbers.h" #include "vec/core/field.h" #include "vec/core/types.h" +#include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_string.h" namespace doris::vectorized { @@ -115,21 +118,21 @@ class IAggregateFunction { * Additional parameter arena should be used instead of standard memory allocator if the addition requires memory allocation. */ virtual void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const = 0; + Arena*) const = 0; virtual void add_many(AggregateDataPtr __restrict place, const IColumn** columns, - std::vector& rows, Arena* arena) const {} + std::vector& rows, Arena*) const {} /// Merges state (on which place points to) with other state of current aggregation function. virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const = 0; + Arena*) const = 0; virtual void merge_vec(const AggregateDataPtr* places, size_t offset, ConstAggregateDataPtr rhs, - Arena* arena, const size_t num_rows) const = 0; + Arena*, const size_t num_rows) const = 0; // same as merge_vec, but only call "merge" function when place is not nullptr virtual void merge_vec_selected(const AggregateDataPtr* places, size_t offset, - ConstAggregateDataPtr rhs, Arena* arena, + ConstAggregateDataPtr rhs, Arena*, const size_t num_rows) const = 0; /// Serializes state (to transmit it over the network, for example). @@ -146,21 +149,21 @@ class IAggregateFunction { /// Deserializes state. This function is called only for empty (just created) states. virtual void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, - Arena* arena) const = 0; + Arena*) const = 0; - virtual void deserialize_vec(AggregateDataPtr places, const ColumnString* column, Arena* arena, + virtual void deserialize_vec(AggregateDataPtr places, const ColumnString* column, Arena*, size_t num_rows) const = 0; virtual void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const = 0; + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const = 0; virtual void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const = 0; + Arena*, const size_t num_rows) const = 0; - virtual void deserialize_from_column(AggregateDataPtr places, const IColumn& column, - Arena* arena, size_t num_rows) const = 0; + virtual void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, + size_t num_rows) const = 0; /// Deserializes state and merge it with current aggregation function. virtual void deserialize_and_merge(AggregateDataPtr __restrict place, @@ -169,10 +172,10 @@ class IAggregateFunction { virtual void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, - size_t end, Arena* arena) const = 0; + size_t end, Arena*) const = 0; virtual void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, - const IColumn& column, Arena* arena) const = 0; + const IColumn& column, Arena*) const = 0; /// Inserts results into a column. virtual void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const = 0; @@ -185,33 +188,32 @@ class IAggregateFunction { * and do a single call to "add_batch" for devirtualization and inlining. */ virtual void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset, - const IColumn** columns, Arena* arena, bool agg_many = false) const = 0; + const IColumn** columns, Arena*, bool agg_many = false) const = 0; // same as add_batch, but only call "add" function when place is not nullptr virtual void add_batch_selected(size_t batch_size, AggregateDataPtr* places, - size_t place_offset, const IColumn** columns, - Arena* arena) const = 0; + size_t place_offset, const IColumn** columns, Arena*) const = 0; /** The same for single place. */ virtual void add_batch_single_place(size_t batch_size, AggregateDataPtr place, - const IColumn** columns, Arena* arena) const = 0; + const IColumn** columns, Arena*) const = 0; // only used at agg reader virtual void add_batch_range(size_t batch_begin, size_t batch_end, AggregateDataPtr place, - const IColumn** columns, Arena* arena, bool has_null = false) = 0; + const IColumn** columns, Arena*, bool has_null = false) = 0; // only used at window function virtual void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const = 0; + Arena*) const = 0; virtual void streaming_agg_serialize(const IColumn** columns, BufferWritable& buf, - const size_t num_rows, Arena* arena) const = 0; + const size_t num_rows, Arena*) const = 0; virtual void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const = 0; + const size_t num_rows, Arena*) const = 0; const DataTypes& get_argument_types() const { return argument_types; } @@ -223,6 +225,10 @@ class IAggregateFunction { virtual AggregateFunctionPtr transmit_to_stable() { return nullptr; } + /// Verify function signature + virtual Status verify_result_type(const bool without_key, const DataTypes& argument_types, + const DataTypePtr result_type) const = 0; + protected: DataTypes argument_types; int version {}; @@ -495,6 +501,43 @@ class IAggregateFunctionHelper : public IAggregateFunction { arena); assert_cast(this)->merge(place, rhs, arena); } + + Status verify_result_type(const bool without_key, const DataTypes& argument_types_with_nullable, + const DataTypePtr result_type_with_nullable) const override { + DataTypePtr function_result_type = assert_cast(this)->get_return_type(); + + if (function_result_type->equals(*result_type_with_nullable)) { + return Status::OK(); + } + + if (!remove_nullable(function_result_type) + ->equals(*remove_nullable(result_type_with_nullable))) { + return Status::InternalError( + "Result type of {} is not matched, planner expect {}, but get {}, with group " + "by: " + "{}", + get_name(), result_type_with_nullable->get_name(), + function_result_type->get_name(), !without_key); + } + + if (without_key == true) { + if (result_type_with_nullable->is_nullable()) { + // This branch is decicated for NullableAggregateFunction. + // When they are executed without group by key, the result from planner will be AlwaysNullable + // since Planer does not know whether there are any invalid input at runtime, if so, the result + // should be Null, so the result type must be nullable. + // Backend will wrap a ColumnNullable in this situation. For example: AggLocalState::_get_without_key_result + return Status::OK(); + } + } + + // Executed with group by key, result type must be exactly same with the return type from Planner. + return Status::InternalError( + "Result type of {} is not matched, planner expect {}, but get {}, with group by: " + "{}", + get_name(), result_type_with_nullable->get_name(), function_result_type->get_name(), + !without_key); + } }; /// Implements several methods for manipulation with data. T - type of structure with data for aggregation. diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.h b/be/src/vec/aggregate_functions/aggregate_function_avg.h index 8a18a88839b4db4..62fbb8078ea949e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_avg.h @@ -184,7 +184,7 @@ class AggregateFunctionAvg final column.get_data().push_back(this->data(place).template result()); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, size_t num_rows) const override { auto& col = assert_cast(column); DCHECK(col.size() >= num_rows) << "source column's size should greater than num_rows"; @@ -205,7 +205,7 @@ class AggregateFunctionAvg final } void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { auto* src_data = assert_cast(*columns[0]).get_data().data(); auto& dst_col = assert_cast(*dst); dst_col.set_item_size(sizeof(Data)); @@ -219,7 +219,7 @@ class AggregateFunctionAvg final } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { auto& col = assert_cast(column); const size_t num_rows = column.size(); DCHECK(col.size() >= num_rows) << "source column's size should greater than num_rows"; @@ -233,7 +233,7 @@ class AggregateFunctionAvg final void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); auto& col = assert_cast(column); @@ -245,19 +245,19 @@ class AggregateFunctionAvg final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + this->merge_vec(places, offset, rhs, nullptr, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + this->merge_vec_selected(places, offset, rhs, nullptr, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h index 6c504b91bf4abd1..b0619a63e1ffe89 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -159,7 +159,7 @@ class AggregateFunctionBitmapSerializationHelper : IAggregateFunctionDataHelper(argument_types_) {} void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { if (version >= BITMAP_SERDE) { auto& col = assert_cast(*dst); char place[sizeof(Data)]; @@ -171,11 +171,11 @@ class AggregateFunctionBitmapSerializationHelper assert_cast(this)->destroy(place); }); assert_cast(this)->add(place, columns, - i, arena); + i, nullptr); data[i] = std::move(this->data(place).value); } } else { - BaseHelper::streaming_agg_serialize_to_column(columns, dst, num_rows, arena); + BaseHelper::streaming_agg_serialize_to_column(columns, dst, num_rows, nullptr); } } @@ -194,7 +194,7 @@ class AggregateFunctionBitmapSerializationHelper } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { if (version >= BITMAP_SERDE) { auto& col = assert_cast(column); const size_t num_rows = column.size(); @@ -204,13 +204,13 @@ class AggregateFunctionBitmapSerializationHelper this->data(place).merge(data[i]); } } else { - BaseHelper::deserialize_and_merge_from_column(place, column, arena); + BaseHelper::deserialize_and_merge_from_column(place, column, nullptr); } } void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); if (version >= BITMAP_SERDE) { @@ -220,12 +220,12 @@ class AggregateFunctionBitmapSerializationHelper this->data(place).merge(data[i]); } } else { - BaseHelper::deserialize_and_merge_from_column_range(place, column, begin, end, arena); + BaseHelper::deserialize_and_merge_from_column_range(place, column, begin, end, nullptr); } } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { if (version >= BITMAP_SERDE) { const auto& col = assert_cast(*column); @@ -234,13 +234,13 @@ class AggregateFunctionBitmapSerializationHelper this->data(places[i] + offset).merge(data[i]); } } else { - BaseHelper::deserialize_and_merge_vec(places, offset, rhs, column, arena, num_rows); + BaseHelper::deserialize_and_merge_vec(places, offset, rhs, column, nullptr, num_rows); } } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { if (version >= BITMAP_SERDE) { const auto& col = assert_cast(*column); const auto* data = col.get_data().data(); @@ -250,7 +250,7 @@ class AggregateFunctionBitmapSerializationHelper } } } else { - BaseHelper::deserialize_and_merge_vec_selected(places, offset, rhs, column, arena, + BaseHelper::deserialize_and_merge_vec_selected(places, offset, rhs, column, nullptr, num_rows); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h index 19352e022fa7a27..5747faf1b8e8c12 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h @@ -72,7 +72,7 @@ class AggregateFunctionBitmapAgg final DataTypePtr get_return_type() const override { return std::make_shared(); } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { DCHECK_LT(row_num, columns[0]->size()); if constexpr (arg_nullable) { auto& nullable_col = @@ -90,7 +90,7 @@ class AggregateFunctionBitmapAgg final } void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { if constexpr (arg_nullable) { auto& nullable_column = assert_cast(*columns[0]); const auto& column = @@ -111,7 +111,7 @@ class AggregateFunctionBitmapAgg final void reset(AggregateDataPtr place) const override { this->data(place).reset(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const override { + Arena*) const override { this->data(place).merge(this->data(rhs)); } @@ -130,7 +130,7 @@ class AggregateFunctionBitmapAgg final } void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { auto& col = assert_cast(*dst); char place[sizeof(Data)]; col.resize(num_rows); @@ -138,12 +138,12 @@ class AggregateFunctionBitmapAgg final for (size_t i = 0; i != num_rows; ++i) { this->create(place); DEFER({ this->destroy(place); }); - this->add(place, columns, i, arena); + this->add(place, columns, i, nullptr); data[i] = std::move(this->data(place).value); } } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, size_t num_rows) const override { auto& col = assert_cast(column); DCHECK(col.size() >= num_rows) << "source column's size should greater than num_rows"; @@ -165,7 +165,7 @@ class AggregateFunctionBitmapAgg final } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { auto& col = assert_cast(column); const size_t num_rows = column.size(); auto* data = col.get_data().data(); @@ -177,7 +177,7 @@ class AggregateFunctionBitmapAgg final void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); auto& col = assert_cast(column); @@ -188,7 +188,7 @@ class AggregateFunctionBitmapAgg final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { const auto& col = assert_cast(*column); const auto* data = col.get_data().data(); @@ -198,8 +198,8 @@ class AggregateFunctionBitmapAgg final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { const auto& col = assert_cast(*column); const auto* data = col.get_data().data(); for (size_t i = 0; i != num_rows; ++i) { diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.h b/be/src/vec/aggregate_functions/aggregate_function_count.h index 62aa869771c0a53..7b54d074683b043 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count.h +++ b/be/src/vec/aggregate_functions/aggregate_function_count.h @@ -91,7 +91,7 @@ class AggregateFunctionCount final assert_cast(to).get_data().push_back(data(place).count); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, size_t num_rows) const override { auto data = assert_cast(column).get_data().data(); memcpy(places, data, sizeof(Data) * num_rows); @@ -111,7 +111,7 @@ class AggregateFunctionCount final } void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { auto& dst_col = assert_cast(*dst); DCHECK(dst_col.item_size() == sizeof(Data)) << "size is not equal: " << dst_col.item_size() << " " << sizeof(Data); @@ -124,7 +124,7 @@ class AggregateFunctionCount final } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { auto& col = assert_cast(column); const size_t num_rows = column.size(); auto* data = reinterpret_cast(col.get_data().data()); @@ -135,7 +135,7 @@ class AggregateFunctionCount final void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); auto& col = assert_cast(column); @@ -146,19 +146,19 @@ class AggregateFunctionCount final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + this->merge_vec(places, offset, rhs, nullptr, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + this->merge_vec_selected(places, offset, rhs, nullptr, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, @@ -229,7 +229,7 @@ class AggregateFunctionCountNotNullUnary final } } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, size_t num_rows) const override { auto data = assert_cast(column).get_data().data(); memcpy(places, data, sizeof(Data) * num_rows); @@ -249,7 +249,7 @@ class AggregateFunctionCountNotNullUnary final } void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { auto& col = assert_cast(*dst); DCHECK(col.item_size() == sizeof(Data)) << "size is not equal: " << col.item_size() << " " << sizeof(Data); @@ -263,7 +263,7 @@ class AggregateFunctionCountNotNullUnary final } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { auto& col = assert_cast(column); const size_t num_rows = column.size(); auto* data = reinterpret_cast(col.get_data().data()); @@ -274,7 +274,7 @@ class AggregateFunctionCountNotNullUnary final void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); auto& col = assert_cast(column); @@ -286,19 +286,19 @@ class AggregateFunctionCountNotNullUnary final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + this->merge_vec(places, offset, rhs, nullptr, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + this->merge_vec_selected(places, offset, rhs, nullptr, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, diff --git a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h index 5d4a3dde3550a12..1f5093de68263e0 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h +++ b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h @@ -197,7 +197,7 @@ class AggregateFunctionCountByEnum final DataTypePtr get_return_type() const override { return std::make_shared(); } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { for (int i = 0; i < arg_count; i++) { const auto* nullable_column = check_and_get_column(columns[i]); if (nullable_column == nullptr) { @@ -217,7 +217,7 @@ class AggregateFunctionCountByEnum final void reset(AggregateDataPtr place) const override { this->data(place).reset(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const override { + Arena*) const override { this->data(place).merge(this->data(rhs)); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_concat.h b/be/src/vec/aggregate_functions/aggregate_function_group_concat.h index a62ffb8da619f99..a0cac9ab78016d6 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_concat.h +++ b/be/src/vec/aggregate_functions/aggregate_function_group_concat.h @@ -43,20 +43,27 @@ class IColumn; namespace doris::vectorized { struct AggregateFunctionGroupConcatData { - std::string data; + ColumnString::Chars data; std::string separator; bool inited = false; void add(StringRef ref, StringRef sep) { + auto delta_size = ref.size; if (!inited) { - inited = true; separator.assign(sep.data, sep.data + sep.size); } else { - data += separator; + delta_size += separator.size(); } + auto offset = data.size(); + data.resize(data.size() + delta_size); - data.resize(data.length() + ref.size); - memcpy(data.data() + data.length() - ref.size, ref.data, ref.size); + if (!inited) { + inited = true; + } else { + memcpy(data.data() + offset, separator.data(), separator.size()); + offset += separator.size(); + } + memcpy(data.data() + offset, ref.data, ref.size); } void merge(const AggregateFunctionGroupConcatData& rhs) { @@ -67,17 +74,23 @@ struct AggregateFunctionGroupConcatData { if (!inited) { inited = true; separator = rhs.separator; - data = rhs.data; + data.assign(rhs.data); } else { - data += separator; - data += rhs.data; + auto offset = data.size(); + + auto delta_size = separator.size() + rhs.data.size(); + data.resize(data.size() + delta_size); + + memcpy(data.data() + offset, separator.data(), separator.size()); + offset += separator.size(); + memcpy(data.data() + offset, rhs.data.data(), rhs.data.size()); } } - const std::string& get() const { return data; } + StringRef get() const { return StringRef {data.data(), data.size()}; } void write(BufferWritable& buf) const { - write_binary(data, buf); + write_binary(StringRef {data.data(), data.size()}, buf); write_binary(separator, buf); write_binary(inited, buf); } @@ -89,7 +102,7 @@ struct AggregateFunctionGroupConcatData { } void reset() { - data = ""; + data.clear(); separator = ""; inited = false; } @@ -150,8 +163,8 @@ class AggregateFunctionGroupConcat final } void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { - const std::string& result = this->data(place).get(); - assert_cast(to).insert_data(result.c_str(), result.length()); + const auto result = this->data(place).get(); + assert_cast(to).insert_data(result.data, result.size); } }; diff --git a/be/src/vec/aggregate_functions/aggregate_function_histogram.h b/be/src/vec/aggregate_functions/aggregate_function_histogram.h index 25fc6957321586e..1d2c5725ed370f2 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_histogram.h +++ b/be/src/vec/aggregate_functions/aggregate_function_histogram.h @@ -192,7 +192,7 @@ class AggregateFunctionHistogram final DataTypePtr get_return_type() const override { return std::make_shared(); } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { if constexpr (has_input_param) { Int32 input_max_num_buckets = assert_cast(columns[1])->get_element(row_num); @@ -220,7 +220,7 @@ class AggregateFunctionHistogram final void reset(AggregateDataPtr place) const override { this->data(place).reset(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const override { + Arena*) const override { this->data(place).merge(this->data(rhs)); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h index 1cf6dc7f2a29a9a..44835194eb4b88d 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h @@ -122,7 +122,7 @@ class AggregateFunctionHLLUnion } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { this->data(place).add(columns[0], row_num); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_java_udaf.h b/be/src/vec/aggregate_functions/aggregate_function_java_udaf.h index d314cba7a656a9f..d16da1a34e66e38 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_java_udaf.h +++ b/be/src/vec/aggregate_functions/aggregate_function_java_udaf.h @@ -148,6 +148,7 @@ struct AggregateJavaUdafData { jbyteArray arr = env->NewByteArray(len); env->SetByteArrayRegion(arr, 0, len, reinterpret_cast(serialize_data.data())); env->CallNonvirtualVoidMethod(executor_obj, executor_cl, executor_merge_id, place, arr); + RETURN_IF_ERROR(JniUtil::GetJniExceptionMsg(env)); jbyte* pBytes = env->GetByteArrayElements(arr, nullptr); env->ReleaseByteArrayElements(arr, pBytes, JNI_ABORT); env->DeleteLocalRef(arr); @@ -332,7 +333,7 @@ class AggregateJavaUdaf final } void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset, - const IColumn** columns, Arena* /*arena*/, bool /*agg_many*/) const override { + const IColumn** columns, Arena*, bool /*agg_many*/) const override { int64_t places_address = reinterpret_cast(places); Status st = this->data(_exec_place) .add(places_address, false, columns, 0, batch_size, argument_types, @@ -343,7 +344,7 @@ class AggregateJavaUdaf final } void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, - Arena* /*arena*/) const override { + Arena*) const override { int64_t places_address = reinterpret_cast(place); Status st = this->data(_exec_place) .add(places_address, true, columns, 0, batch_size, argument_types, 0); @@ -354,7 +355,7 @@ class AggregateJavaUdaf final void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { frame_start = std::max(frame_start, partition_start); frame_end = std::min(frame_end, partition_end); int64_t places_address = reinterpret_cast(place); diff --git a/be/src/vec/aggregate_functions/aggregate_function_linear_histogram.h b/be/src/vec/aggregate_functions/aggregate_function_linear_histogram.h index 80572e4c2235db9..173324b9463750e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_linear_histogram.h +++ b/be/src/vec/aggregate_functions/aggregate_function_linear_histogram.h @@ -199,7 +199,7 @@ class AggregateFunctionLinearHistogram final DataTypePtr get_return_type() const override { return std::make_shared(); } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { double interval = assert_cast(*columns[1]) .get_data()[row_num]; @@ -233,7 +233,7 @@ class AggregateFunctionLinearHistogram final void reset(AggregateDataPtr place) const override { this->data(place).reset(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const override { + Arena*) const override { this->data(place).merge(this->data(rhs)); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.h b/be/src/vec/aggregate_functions/aggregate_function_map.h index d56cbf21f31136f..3ec25cdc7061525 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_map.h +++ b/be/src/vec/aggregate_functions/aggregate_function_map.h @@ -203,7 +203,7 @@ class AggregateFunctionMapAgg final } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { if (columns[0]->is_nullable()) { const auto& nullable_col = assert_cast(*columns[0]); @@ -234,7 +234,7 @@ class AggregateFunctionMapAgg final void reset(AggregateDataPtr place) const override { this->data(place).reset(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const override { + Arena*) const override { this->data(place).merge(this->data(rhs)); } @@ -248,7 +248,7 @@ class AggregateFunctionMapAgg final } void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { auto& col = assert_cast(*dst); for (size_t i = 0; i != num_rows; ++i) { Field key, value; @@ -263,7 +263,7 @@ class AggregateFunctionMapAgg final } } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, size_t num_rows) const override { const auto& col = assert_cast(column); auto* data = &(this->data(places)); @@ -282,7 +282,7 @@ class AggregateFunctionMapAgg final } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { auto& col = assert_cast(column); const size_t num_rows = column.size(); for (size_t i = 0; i != num_rows; ++i) { @@ -293,7 +293,7 @@ class AggregateFunctionMapAgg final void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); const auto& col = assert_cast(column); @@ -304,7 +304,7 @@ class AggregateFunctionMapAgg final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { const auto& col = assert_cast(*column); for (size_t i = 0; i != num_rows; ++i) { @@ -314,8 +314,8 @@ class AggregateFunctionMapAgg final } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { const auto& col = assert_cast(*column); for (size_t i = 0; i != num_rows; ++i) { if (places[i]) { diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.h b/be/src/vec/aggregate_functions/aggregate_function_min_max.h index a5423cd72f511a4..efc2854ff149c89 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max.h +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.h @@ -104,7 +104,7 @@ struct SingleValueDataFixed { } } - void read(BufferReadable& buf, Arena* arena) { + void read(BufferReadable& buf, Arena*) { read_binary(has_value, buf); if (has()) { read_binary(value, buf); @@ -123,53 +123,53 @@ struct SingleValueDataFixed { value = to.value; } - bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) { + bool change_if_less(const IColumn& column, size_t row_num, Arena*) { if (!has() || assert_cast&, TypeCheckOnRelease::DISABLE>(column) .get_data()[row_num] < value) { - change(column, row_num, arena); + change(column, row_num, nullptr); return true; } else { return false; } } - bool change_if_less(const Self& to, Arena* arena) { + bool change_if_less(const Self& to, Arena*) { if (to.has() && (!has() || to.value < value)) { - change(to, arena); + change(to, nullptr); return true; } else { return false; } } - bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) { + bool change_if_greater(const IColumn& column, size_t row_num, Arena*) { if (!has() || assert_cast&, TypeCheckOnRelease::DISABLE>(column) .get_data()[row_num] > value) { - change(column, row_num, arena); + change(column, row_num, nullptr); return true; } else { return false; } } - bool change_if_greater(const Self& to, Arena* arena) { + bool change_if_greater(const Self& to, Arena*) { if (to.has() && (!has() || to.value > value)) { - change(to, arena); + change(to, nullptr); return true; } else { return false; } } - void change_first_time(const IColumn& column, size_t row_num, Arena* arena) { + void change_first_time(const IColumn& column, size_t row_num, Arena*) { if (UNLIKELY(!has())) { - change(column, row_num, arena); + change(column, row_num, nullptr); } } - void change_first_time(const Self& to, Arena* arena) { + void change_first_time(const Self& to, Arena*) { if (UNLIKELY(!has() && to.has())) { - change(to, arena); + change(to, nullptr); } } }; @@ -226,7 +226,7 @@ struct SingleValueDataDecimal { } } - void read(BufferReadable& buf, Arena* arena) { + void read(BufferReadable& buf, Arena*) { read_binary(has_value, buf); if (has()) { read_binary(value, buf); @@ -245,53 +245,53 @@ struct SingleValueDataDecimal { value = to.value; } - bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) { + bool change_if_less(const IColumn& column, size_t row_num, Arena*) { if (!has() || assert_cast&, TypeCheckOnRelease::DISABLE>(column) .get_data()[row_num] < value) { - change(column, row_num, arena); + change(column, row_num, nullptr); return true; } else { return false; } } - bool change_if_less(const Self& to, Arena* arena) { + bool change_if_less(const Self& to, Arena*) { if (to.has() && (!has() || to.value < value)) { - change(to, arena); + change(to, nullptr); return true; } else { return false; } } - bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) { + bool change_if_greater(const IColumn& column, size_t row_num, Arena*) { if (!has() || assert_cast&, TypeCheckOnRelease::DISABLE>(column) .get_data()[row_num] > value) { - change(column, row_num, arena); + change(column, row_num, nullptr); return true; } else { return false; } } - bool change_if_greater(const Self& to, Arena* arena) { + bool change_if_greater(const Self& to, Arena*) { if (to.has() && (!has() || to.value > value)) { - change(to, arena); + change(to, nullptr); return true; } else { return false; } } - void change_first_time(const IColumn& column, size_t row_num, Arena* arena) { + void change_first_time(const IColumn& column, size_t row_num, Arena*) { if (UNLIKELY(!has())) { - change(column, row_num, arena); + change(column, row_num, nullptr); } } - void change_first_time(const Self& to, Arena* arena) { + void change_first_time(const Self& to, Arena*) { if (UNLIKELY(!has() && to.has())) { - change(to, arena); + change(to, nullptr); } } }; @@ -349,7 +349,7 @@ struct SingleValueDataString { } } - void read(BufferReadable& buf, Arena* arena) { + void read(BufferReadable& buf, Arena*) { Int32 rhs_size; read_binary(rhs_size, buf); @@ -380,7 +380,7 @@ struct SingleValueDataString { StringRef get_string_ref() const { return StringRef(get_data(), size); } /// Assuming to.has() - void change_impl(StringRef value, Arena* arena) { + void change_impl(StringRef value, Arena*) { Int32 value_size = value.size; if (value_size <= MAX_SMALL_STRING_SIZE) { @@ -402,64 +402,64 @@ struct SingleValueDataString { } } - void change(const IColumn& column, size_t row_num, Arena* arena) { + void change(const IColumn& column, size_t row_num, Arena*) { change_impl( assert_cast(column).get_data_at( row_num), - arena); + nullptr); } - void change(const Self& to, Arena* arena) { change_impl(to.get_string_ref(), arena); } + void change(const Self& to, Arena*) { change_impl(to.get_string_ref(), nullptr); } - bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) { + bool change_if_less(const IColumn& column, size_t row_num, Arena*) { if (!has() || assert_cast(column).get_data_at( row_num) < get_string_ref()) { - change(column, row_num, arena); + change(column, row_num, nullptr); return true; } else { return false; } } - bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) { + bool change_if_greater(const IColumn& column, size_t row_num, Arena*) { if (!has() || assert_cast(column).get_data_at( row_num) > get_string_ref()) { - change(column, row_num, arena); + change(column, row_num, nullptr); return true; } else { return false; } } - bool change_if_less(const Self& to, Arena* arena) { + bool change_if_less(const Self& to, Arena*) { if (to.has() && (!has() || to.get_string_ref() < get_string_ref())) { - change(to, arena); + change(to, nullptr); return true; } else { return false; } } - bool change_if_greater(const Self& to, Arena* arena) { + bool change_if_greater(const Self& to, Arena*) { if (to.has() && (!has() || to.get_string_ref() > get_string_ref())) { - change(to, arena); + change(to, nullptr); return true; } else { return false; } } - void change_first_time(const IColumn& column, size_t row_num, Arena* arena) { + void change_first_time(const IColumn& column, size_t row_num, Arena*) { if (UNLIKELY(!has())) { - change(column, row_num, arena); + change(column, row_num, nullptr); } } - void change_first_time(const Self& to, Arena* arena) { + void change_first_time(const Self& to, Arena*) { if (UNLIKELY(!has() && to.has())) { - change(to, arena); + change(to, nullptr); } } }; @@ -472,15 +472,15 @@ struct AggregateFunctionMaxData : public Data { AggregateFunctionMaxData() { reset(); } - void change_if_better(const IColumn& column, size_t row_num, Arena* arena) { + void change_if_better(const IColumn& column, size_t row_num, Arena*) { if constexpr (Data::IsFixedLength) { this->change_if(column, row_num, false); } else { - this->change_if_greater(column, row_num, arena); + this->change_if_greater(column, row_num, nullptr); } } - void change_if_better(const Self& to, Arena* arena) { this->change_if_greater(to, arena); } + void change_if_better(const Self& to, Arena*) { this->change_if_greater(to, nullptr); } void reset() { if constexpr (Data::IsFixedLength) { @@ -500,14 +500,14 @@ struct AggregateFunctionMinData : Data { AggregateFunctionMinData() { reset(); } - void change_if_better(const IColumn& column, size_t row_num, Arena* arena) { + void change_if_better(const IColumn& column, size_t row_num, Arena*) { if constexpr (Data::IsFixedLength) { this->change_if(column, row_num, true); } else { - this->change_if_less(column, row_num, arena); + this->change_if_less(column, row_num, nullptr); } } - void change_if_better(const Self& to, Arena* arena) { this->change_if_less(to, arena); } + void change_if_better(const Self& to, Arena*) { this->change_if_less(to, nullptr); } void reset() { if constexpr (Data::IsFixedLength) { @@ -525,10 +525,10 @@ struct AggregateFunctionAnyData : Data { using Data::IsFixedLength; constexpr static bool IS_ANY = true; - void change_if_better(const IColumn& column, size_t row_num, Arena* arena) { - this->change_first_time(column, row_num, arena); + void change_if_better(const IColumn& column, size_t row_num, Arena*) { + this->change_first_time(column, row_num, nullptr); } - void change_if_better(const Self& to, Arena* arena) { this->change_first_time(to, arena); } + void change_if_better(const Self& to, Arena*) { this->change_first_time(to, nullptr); } static const char* name() { return "any"; } }; @@ -560,25 +560,25 @@ class AggregateFunctionsSingleValue final DataTypePtr get_return_type() const override { return type; } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { - this->data(place).change_if_better(*columns[0], row_num, arena); + Arena*) const override { + this->data(place).change_if_better(*columns[0], row_num, nullptr); } void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { if constexpr (Data::IS_ANY) { DCHECK_GT(batch_size, 0); - this->data(place).change_if_better(*columns[0], 0, arena); + this->data(place).change_if_better(*columns[0], 0, nullptr); } else { - Base::add_batch_single_place(batch_size, place, columns, arena); + Base::add_batch_single_place(batch_size, place, columns, nullptr); } } void reset(AggregateDataPtr place) const override { this->data(place).reset(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const override { - this->data(place).change_if_better(this->data(rhs), arena); + Arena*) const override { + this->data(place).change_if_better(this->data(rhs), nullptr); } void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { @@ -586,15 +586,15 @@ class AggregateFunctionsSingleValue final } void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, - Arena* arena) const override { - this->data(place).read(buf, arena); + Arena*) const override { + this->data(place).read(buf, nullptr); } void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { this->data(place).insert_result_into(to); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, size_t num_rows) const override { if constexpr (Data::IsFixedLength) { const auto& col = assert_cast(column); @@ -604,7 +604,7 @@ class AggregateFunctionsSingleValue final data[i] = column_data[i]; } } else { - Base::deserialize_from_column(places, column, arena, num_rows); + Base::deserialize_from_column(places, column, nullptr, num_rows); } } @@ -623,63 +623,63 @@ class AggregateFunctionsSingleValue final } void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { if constexpr (Data::IsFixedLength) { auto& dst_column = assert_cast(*dst); dst_column.resize(num_rows); auto* dst_data = reinterpret_cast(dst_column.get_data().data()); for (size_t i = 0; i != num_rows; ++i) { - dst_data[i].change(*columns[0], i, arena); + dst_data[i].change(*columns[0], i, nullptr); } } else { - Base::streaming_agg_serialize_to_column(columns, dst, num_rows, arena); + Base::streaming_agg_serialize_to_column(columns, dst, num_rows, nullptr); } } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { if constexpr (Data::IsFixedLength) { const auto& col = assert_cast(column); auto* column_data = reinterpret_cast(col.get_data().data()); const size_t num_rows = column.size(); for (size_t i = 0; i != num_rows; ++i) { - this->data(place).change_if_better(column_data[i], arena); + this->data(place).change_if_better(column_data[i], nullptr); } } else { - Base::deserialize_and_merge_from_column(place, column, arena); + Base::deserialize_and_merge_from_column(place, column, nullptr); } } void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { if constexpr (Data::IsFixedLength) { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); auto& col = assert_cast(column); auto* data = reinterpret_cast(col.get_data().data()); for (size_t i = begin; i <= end; ++i) { - this->data(place).change_if_better(data[i], arena); + this->data(place).change_if_better(data[i], nullptr); } } else { - Base::deserialize_and_merge_from_column_range(place, column, begin, end, arena); + Base::deserialize_and_merge_from_column_range(place, column, begin, end, nullptr); } } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + this->merge_vec(places, offset, rhs, nullptr, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + this->merge_vec_selected(places, offset, rhs, nullptr, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h b/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h index 4caded0011a81b6..5c73ac9aa67cbe4 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h @@ -64,7 +64,7 @@ struct BitmapValueData { } } - void read(BufferReadable& buf, Arena* arena) { + void read(BufferReadable& buf, Arena*) { read_binary(has_value, buf); if (has()) { DataTypeBitMap::deserialize_as_stream(value, buf); @@ -101,9 +101,9 @@ struct AggregateFunctionMinMaxByBaseData { key.write(buf); } - void read(BufferReadable& buf, Arena* arena) { - value.read(buf, arena); - key.read(buf, arena); + void read(BufferReadable& buf, Arena*) { + value.read(buf, nullptr); + key.read(buf, nullptr); } }; @@ -111,15 +111,15 @@ template struct AggregateFunctionMaxByData : public AggregateFunctionMinMaxByBaseData { using Self = AggregateFunctionMaxByData; void change_if_better(const IColumn& value_column, const IColumn& key_column, size_t row_num, - Arena* arena) { - if (this->key.change_if_greater(key_column, row_num, arena)) { - this->value.change(value_column, row_num, arena); + Arena*) { + if (this->key.change_if_greater(key_column, row_num, nullptr)) { + this->value.change(value_column, row_num, nullptr); } } - void change_if_better(const Self& to, Arena* arena) { - if (this->key.change_if_greater(to.key, arena)) { - this->value.change(to.value, arena); + void change_if_better(const Self& to, Arena*) { + if (this->key.change_if_greater(to.key, nullptr)) { + this->value.change(to.value, nullptr); } } @@ -130,15 +130,15 @@ template struct AggregateFunctionMinByData : public AggregateFunctionMinMaxByBaseData { using Self = AggregateFunctionMinByData; void change_if_better(const IColumn& value_column, const IColumn& key_column, size_t row_num, - Arena* arena) { - if (this->key.change_if_less(key_column, row_num, arena)) { - this->value.change(value_column, row_num, arena); + Arena*) { + if (this->key.change_if_less(key_column, row_num, nullptr)) { + this->value.change(value_column, row_num, nullptr); } } - void change_if_better(const Self& to, Arena* arena) { - if (this->key.change_if_less(to.key, arena)) { - this->value.change(to.value, arena); + void change_if_better(const Self& to, Arena*) { + if (this->key.change_if_less(to.key, nullptr)) { + this->value.change(to.value, nullptr); } } @@ -169,15 +169,15 @@ class AggregateFunctionsMinMaxBy final DataTypePtr get_return_type() const override { return value_type; } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { - this->data(place).change_if_better(*columns[0], *columns[1], row_num, arena); + Arena*) const override { + this->data(place).change_if_better(*columns[0], *columns[1], row_num, nullptr); } void reset(AggregateDataPtr place) const override { this->data(place).reset(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const override { - this->data(place).change_if_better(this->data(rhs), arena); + Arena*) const override { + this->data(place).change_if_better(this->data(rhs), nullptr); } void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { @@ -185,8 +185,8 @@ class AggregateFunctionsMinMaxBy final } void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, - Arena* arena) const override { - this->data(place).read(buf, arena); + Arena*) const override { + this->data(place).read(buf, nullptr); } void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { diff --git a/be/src/vec/aggregate_functions/aggregate_function_percentile.h b/be/src/vec/aggregate_functions/aggregate_function_percentile.h index a1e739d8758fa78..0766c59f3de1c34 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_percentile.h +++ b/be/src/vec/aggregate_functions/aggregate_function_percentile.h @@ -433,7 +433,7 @@ class AggregateFunctionPercentile final } void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { const auto& sources = assert_cast(*columns[0]); const auto& quantile = diff --git a/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h b/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h index 60ab42b5298e8ed..1a6ac288583b3eb 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h +++ b/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h @@ -223,7 +223,7 @@ class ReaderFunctionData final } void add(AggregateDataPtr place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { this->data(place).add(row_num, columns); } @@ -231,7 +231,7 @@ class ReaderFunctionData final void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "ReaderFunctionData do not support add_range_single_place"); __builtin_unreachable(); diff --git a/be/src/vec/aggregate_functions/aggregate_function_rpc.h b/be/src/vec/aggregate_functions/aggregate_function_rpc.h index c92e96aaf9d9356..f055d2c8c103a0a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_rpc.h +++ b/be/src/vec/aggregate_functions/aggregate_function_rpc.h @@ -364,7 +364,7 @@ class AggregateRpcUdaf final } void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { static_cast(this->data(place).add(columns, 0, batch_size, argument_types)); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_sort.h b/be/src/vec/aggregate_functions/aggregate_function_sort.h index 145a07d5446b5c4..981580f8e6ac38b 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_sort.h +++ b/be/src/vec/aggregate_functions/aggregate_function_sort.h @@ -142,12 +142,12 @@ class AggregateFunctionSort } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { this->data(place).add(columns, _arguments.size(), row_num); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, - Arena* arena) const override { + Arena*) const override { this->data(place).merge(this->data(rhs)); } @@ -156,7 +156,7 @@ class AggregateFunctionSort } void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, - Arena* arena) const override { + Arena*) const override { this->data(place).deserialize(buf); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.h b/be/src/vec/aggregate_functions/aggregate_function_sum.h index 846104915b1e692..13fb3864bd1aaff 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_sum.h +++ b/be/src/vec/aggregate_functions/aggregate_function_sum.h @@ -126,7 +126,7 @@ class AggregateFunctionSum final column.get_data().push_back(this->data(place).get()); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, size_t num_rows) const override { auto& col = assert_cast(column); auto* data = col.get_data().data(); @@ -147,7 +147,7 @@ class AggregateFunctionSum final } void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { auto& col = assert_cast(*dst); auto& src = assert_cast(*columns[0]); DCHECK(col.item_size() == sizeof(Data)) @@ -162,7 +162,7 @@ class AggregateFunctionSum final } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { auto& col = assert_cast(column); const size_t num_rows = column.size(); auto* data = reinterpret_cast(col.get_data().data()); @@ -173,7 +173,7 @@ class AggregateFunctionSum final void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { DCHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); auto& col = assert_cast(column); @@ -184,19 +184,19 @@ class AggregateFunctionSum final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + this->merge_vec(places, offset, rhs, nullptr, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + this->merge_vec_selected(places, offset, rhs, nullptr, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h b/be/src/vec/aggregate_functions/aggregate_function_uniq.h index e97923a08e6a2df..a3bdad635057fd1 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h +++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h @@ -141,7 +141,7 @@ class AggregateFunctionUniq final } void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset, - const IColumn** columns, Arena* arena, bool /*agg_many*/) const override { + const IColumn** columns, Arena*, bool /*agg_many*/) const override { std::vector keys_container; const KeyType* keys = get_keys(keys_container, *columns[0], batch_size); @@ -175,7 +175,7 @@ class AggregateFunctionUniq final } void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { std::vector keys_container; const KeyType* keys = get_keys(keys_container, *columns[0], batch_size); auto& set = this->data(place).set; @@ -197,7 +197,7 @@ class AggregateFunctionUniq final } void deserialize_and_merge(AggregateDataPtr __restrict place, AggregateDataPtr __restrict rhs, - BufferReadable& buf, Arena* arena) const override { + BufferReadable& buf, Arena*) const override { auto& set = this->data(place).set; UInt64 size; read_var_uint(size, buf); @@ -212,7 +212,7 @@ class AggregateFunctionUniq final } void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, - Arena* arena) const override { + Arena*) const override { auto& set = this->data(place).set; UInt64 size; read_var_uint(size, buf); diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h index 4c3fa67e1626aea..90d137c62384f6d 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h +++ b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h @@ -112,7 +112,7 @@ class AggregateFunctionUniqDistributeKey final } void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset, - const IColumn** columns, Arena* arena, bool /*agg_many*/) const override { + const IColumn** columns, Arena*, bool /*agg_many*/) const override { std::vector keys_container; const KeyType* keys = get_keys(keys_container, *columns[0], batch_size); @@ -133,7 +133,7 @@ class AggregateFunctionUniqDistributeKey final } void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { std::vector keys_container; const KeyType* keys = get_keys(keys_container, *columns[0], batch_size); auto& set = this->data(place).set; @@ -164,7 +164,7 @@ class AggregateFunctionUniqDistributeKey final assert_cast(to).get_data().push_back(this->data(place).count); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, + void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena*, size_t num_rows) const override { auto data = reinterpret_cast( assert_cast(column).get_data().data()); @@ -188,7 +188,7 @@ class AggregateFunctionUniqDistributeKey final } void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena* arena) const override { + const size_t num_rows, Arena*) const override { auto& dst_col = assert_cast(*dst); CHECK(dst_col.item_size() == sizeof(UInt64)) << "size is not equal: " << dst_col.item_size() << " " << sizeof(UInt64); @@ -200,7 +200,7 @@ class AggregateFunctionUniqDistributeKey final } void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena* arena) const override { + Arena*) const override { auto& col = assert_cast(column); const size_t num_rows = column.size(); auto* data = reinterpret_cast(col.get_data().data()); @@ -211,7 +211,7 @@ class AggregateFunctionUniqDistributeKey final void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, - Arena* arena) const override { + Arena*) const override { CHECK(end <= column.size() && begin <= end) << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); auto& col = assert_cast(column); @@ -222,19 +222,19 @@ class AggregateFunctionUniqDistributeKey final } void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, Arena* arena, + AggregateDataPtr rhs, const IColumn* column, Arena*, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + this->merge_vec(places, offset, rhs, nullptr, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, - AggregateDataPtr rhs, const IColumn* column, - Arena* arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); + AggregateDataPtr rhs, const IColumn* column, Arena*, + const size_t num_rows) const override { + this->deserialize_from_column(rhs, *column, nullptr, num_rows); DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + this->merge_vec_selected(places, offset, rhs, nullptr, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index 517871e2fb642d3..cb038fe31168b9e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -66,7 +66,7 @@ class WindowFunctionRowNumber final void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, - Arena* arena) const override { + Arena*) const override { ++data(place).count; } @@ -104,7 +104,7 @@ class WindowFunctionRank final : public IAggregateFunctionDataHelperdata(place).add_range_single_place(partition_start, partition_end, frame_start, frame_end, columns); } @@ -554,7 +554,7 @@ class WindowFunctionData final } void add(AggregateDataPtr place, const IColumn** columns, ssize_t row_num, - Arena* arena) const override { + Arena*) const override { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "WindowFunctionLeadLagData do not support add"); __builtin_unreachable(); diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index b1ecf8475833e97..ca1f49a67f067f6 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -25,6 +25,7 @@ #include #include +#include "common/status.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" @@ -130,7 +131,7 @@ Field ColumnArray::operator[](size_t n) const { if (size > max_array_size_as_field) throw doris::Exception( - ErrorCode::INTERNAL_ERROR, + ErrorCode::INVALID_ARGUMENT, "Array of size {}, is too large to be manipulated as single field, maximum size {}", size, max_array_size_as_field); @@ -147,7 +148,7 @@ void ColumnArray::get(size_t n, Field& res) const { if (size > max_array_size_as_field) throw doris::Exception( - ErrorCode::INTERNAL_ERROR, + ErrorCode::INVALID_ARGUMENT, "Array of size {}, is too large to be manipulated as single field, maximum size {}", size, max_array_size_as_field); diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index 7550803dac24823..85964ca967b095a 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -105,7 +105,7 @@ Field ColumnMap::operator[](size_t n) const { size_t element_size = size_at(n); if (element_size > max_array_size_as_field) { - throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, + throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "element size {} is too large to be manipulated as single map " "field, maximum size {}", element_size, max_array_size_as_field); diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index acf27cac8486493..3d392e2addbae57 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1311,14 +1311,14 @@ rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const PathInDat // 2. nested array with only nulls, eg. [null. null],todo: think a better way to deal distinguish array null value and real null value. // 3. empty root jsonb value(not null) // 4. type is nothing -bool skip_empty_json(const ColumnNullable* nullable, const DataTypePtr& type, int row, - const PathInData& path) { +bool skip_empty_json(const ColumnNullable* nullable, const DataTypePtr& type, + TypeIndex base_type_id, int row, const PathInData& path) { // skip nulls if (nullable && nullable->is_null_at(row)) { return true; } // check if it is empty nested json array, then skip - if (type->equals(*ColumnObject::NESTED_TYPE)) { + if (base_type_id == TypeIndex::VARIANT && type->equals(*ColumnObject::NESTED_TYPE)) { Field field = (*nullable)[row]; if (field.get_type() == Field::Types::Array) { const auto& array = field.get(); @@ -1338,7 +1338,7 @@ bool skip_empty_json(const ColumnNullable* nullable, const DataTypePtr& type, in return true; } // skip nothing type - if (WhichDataType(remove_nullable(get_base_type_of_array(type))).is_nothing()) { + if (base_type_id == TypeIndex::Nothing) { return true; } return false; @@ -1346,17 +1346,19 @@ bool skip_empty_json(const ColumnNullable* nullable, const DataTypePtr& type, in Status find_and_set_leave_value(const IColumn* column, const PathInData& path, const DataTypeSerDeSPtr& type_serde, const DataTypePtr& type, - rapidjson::Value& root, + TypeIndex base_type_index, rapidjson::Value& root, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int row) { +#ifndef NDEBUG // sanitize type and column if (column->get_name() != type->create_column()->get_name()) { return Status::InternalError( "failed to set value for path {}, expected type {}, but got {} at row {}", path.get_path(), type->get_name(), column->get_name(), row); } +#endif const auto* nullable = check_and_get_column(column); - if (skip_empty_json(nullable, type, row, path)) { + if (skip_empty_json(nullable, type, base_type_index, row, path)) { return Status::OK(); } // TODO could cache the result of leaf nodes with it's path info @@ -1476,11 +1478,12 @@ Status ColumnObject::serialize_one_row_to_json_format(int row, rapidjson::String VLOG_DEBUG << "dump structure " << JsonFunctions::print_json_value(*doc_structure); #endif for (const auto& subcolumn : subcolumns) { - RETURN_IF_ERROR(find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(), - subcolumn->path, - subcolumn->data.get_least_common_type_serde(), - subcolumn->data.get_least_common_type(), root, - doc_structure->GetAllocator(), mem_pool, row)); + RETURN_IF_ERROR(find_and_set_leave_value( + subcolumn->data.get_finalized_column_ptr(), subcolumn->path, + subcolumn->data.get_least_common_type_serde(), + subcolumn->data.get_least_common_type(), + subcolumn->data.least_common_type.get_base_type_id(), root, + doc_structure->GetAllocator(), mem_pool, row)); if (subcolumn->path.empty() && !root.IsObject()) { // root was modified, only handle root node break; @@ -1549,10 +1552,11 @@ Status ColumnObject::merge_sparse_to_root_column() { ++null_count; continue; } - bool succ = find_and_set_leave_value(column, subcolumn->path, - subcolumn->data.get_least_common_type_serde(), - subcolumn->data.get_least_common_type(), root, - doc_structure->GetAllocator(), mem_pool, i); + bool succ = find_and_set_leave_value( + column, subcolumn->path, subcolumn->data.get_least_common_type_serde(), + subcolumn->data.get_least_common_type(), + subcolumn->data.least_common_type.get_base_type_id(), root, + doc_structure->GetAllocator(), mem_pool, i); if (succ && subcolumn->path.empty() && !root.IsObject()) { // root was modified, only handle root node break; diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h index 4bdbf51444fbbf8..6a59c5964e47a4e 100644 --- a/be/src/vec/common/columns_hashing.h +++ b/be/src/vec/common/columns_hashing.h @@ -38,11 +38,6 @@ namespace doris::vectorized { using Sizes = std::vector; -inline bool has_nullable_key(const std::vector& data_types) { - return std::ranges::any_of(data_types.begin(), data_types.end(), - [](auto t) { return t->is_nullable(); }); -} - inline Sizes get_key_sizes(const std::vector& data_types) { Sizes key_sizes; for (const auto& data_type : data_types) { @@ -101,17 +96,14 @@ struct HashMethodSerialized }; /// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits. -template +template struct HashMethodKeysFixed - : private columns_hashing_impl::BaseStateKeysFixed, - public columns_hashing_impl::HashMethodBase< - HashMethodKeysFixed, Value, Mapped, - false> { - using Self = HashMethodKeysFixed; + : public columns_hashing_impl::HashMethodBase, + Value, Mapped, false> { + using Self = HashMethodKeysFixed; using BaseHashed = columns_hashing_impl::HashMethodBase; - using Base = columns_hashing_impl::BaseStateKeysFixed; - HashMethodKeysFixed(const ColumnRawPtrs& key_columns) : Base(key_columns) {} + HashMethodKeysFixed(const ColumnRawPtrs& key_columns) {} }; template diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h index 2665d9b797903a5..a11ec17ec705276 100644 --- a/be/src/vec/common/columns_hashing_impl.h +++ b/be/src/vec/common/columns_hashing_impl.h @@ -149,64 +149,6 @@ class HashMethodBase { } }; -template -struct MappedCache : public PaddedPODArray {}; - -template <> -struct MappedCache {}; - -/// This class is designed to provide the functionality that is required for -/// supporting nullable keys in HashMethodKeysFixed. If there are -/// no nullable keys, this class is merely implemented as an empty shell. -template -class BaseStateKeysFixed; - -/// Case where nullable keys are supported. -template -class BaseStateKeysFixed { -protected: - BaseStateKeysFixed(const ColumnRawPtrs& key_columns) { - null_maps.reserve(key_columns.size()); - actual_columns.reserve(key_columns.size()); - - for (const auto& col : key_columns) { - if (auto* nullable_col = check_and_get_column(col)) { - actual_columns.push_back(&nullable_col->get_nested_column()); - null_maps.push_back(&nullable_col->get_null_map_column()); - } else { - actual_columns.push_back(col); - null_maps.push_back(nullptr); - } - } - } - - /// Return the columns which actually contain the values of the keys. - /// For a given key column, if it is nullable, we return its nested - /// column. Otherwise we return the key column itself. - const ColumnRawPtrs& get_actual_columns() const { return actual_columns; } - - const ColumnRawPtrs& get_nullmap_columns() const { return null_maps; } - -private: - ColumnRawPtrs actual_columns; - ColumnRawPtrs null_maps; -}; - -/// Case where nullable keys are not supported. -template -class BaseStateKeysFixed { -protected: - BaseStateKeysFixed(const ColumnRawPtrs& columns) : actual_columns(columns) {} - - const ColumnRawPtrs& get_actual_columns() const { return actual_columns; } - - const ColumnRawPtrs& get_nullmap_columns() const { return null_maps; } - -private: - ColumnRawPtrs actual_columns; - ColumnRawPtrs null_maps; -}; - } // namespace columns_hashing_impl } // namespace ColumnsHashing diff --git a/be/src/vec/common/hash_table/hash_key_type.h b/be/src/vec/common/hash_table/hash_key_type.h index 38802fe716711f4..2c14e4ab687f87b 100644 --- a/be/src/vec/common/hash_table/hash_key_type.h +++ b/be/src/vec/common/hash_table/hash_key_type.h @@ -97,16 +97,16 @@ inline HashKeyType get_hash_key_type(const std::vector& return HashKeyType::without_key; } - if (!data_types[0]->have_maximum_size_of_value()) { - if (is_string(data_types[0])) { + auto t = remove_nullable(data_types[0]); + // serialized cannot be used in the case of single column, because the join operator will have some processing of column nullable, resulting in incorrect serialized results. + if (!t->have_maximum_size_of_value()) { + if (is_string(t)) { return HashKeyType::string_key; - } else { - return HashKeyType::serialized; } + throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, type={}", t->get_name()); } - size_t size = - data_types[0]->get_maximum_size_of_value_in_memory() - data_types[0]->is_nullable(); + size_t size = t->get_maximum_size_of_value_in_memory(); if (size == sizeof(vectorized::UInt8)) { return HashKeyType::int8_key; } else if (size == sizeof(vectorized::UInt16)) { @@ -121,7 +121,7 @@ inline HashKeyType get_hash_key_type(const std::vector& return HashKeyType::int256_key; } else { throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type size, size={}, type={}", size, - data_types[0]->get_name()); + t->get_name()); } } diff --git a/be/src/vec/common/hash_table/hash_map.h b/be/src/vec/common/hash_table/hash_map.h index 018d134d875ca86..448ddd5b7c5dbe1 100644 --- a/be/src/vec/common/hash_table/hash_map.h +++ b/be/src/vec/common/hash_table/hash_map.h @@ -198,9 +198,6 @@ template , typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> using HashMap = HashMapTable, Hash, Grower, Allocator>; -template > -using NormalHashMap = HashMapTable, Hash>; - template > using JoinHashMap = JoinHashTable; diff --git a/be/src/vec/common/hash_table/hash_map_context.h b/be/src/vec/common/hash_table/hash_map_context.h index 973f04f064fea34..16a793d75008c80 100644 --- a/be/src/vec/common/hash_table/hash_map_context.h +++ b/be/src/vec/common/hash_table/hash_map_context.h @@ -27,7 +27,6 @@ #include "vec/common/arena.h" #include "vec/common/assert_cast.h" #include "vec/common/columns_hashing.h" -#include "vec/common/hash_table/partitioned_hash_map.h" #include "vec/common/hash_table/string_hash_map.h" #include "vec/common/string_ref.h" #include "vec/common/typeid_cast.h" @@ -375,7 +374,7 @@ struct MethodOneNumber : public MethodBase { } }; -template +template struct MethodKeysFixed : public MethodBase { using Base = MethodBase; using typename Base::Key; @@ -384,8 +383,7 @@ struct MethodKeysFixed : public MethodBase { using Base::hash_table; using Base::iterator; - using State = ColumnsHashing::HashMethodKeysFixed; + using State = ColumnsHashing::HashMethodKeysFixed; // need keep until the hash probe end. use only in join std::vector build_stored_keys; @@ -469,20 +467,22 @@ struct MethodKeysFixed : public MethodBase { bool is_build = false, uint32_t bucket_size = 0) override { ColumnRawPtrs actual_columns; ColumnRawPtrs null_maps; - if (has_nullable_keys) { - actual_columns.reserve(key_columns.size()); - null_maps.reserve(key_columns.size()); - for (const auto& col : key_columns) { - if (const auto* nullable_col = check_and_get_column(col)) { - actual_columns.push_back(&nullable_col->get_nested_column()); - null_maps.push_back(&nullable_col->get_null_map_column()); - } else { - actual_columns.push_back(col); - null_maps.push_back(nullptr); - } + actual_columns.reserve(key_columns.size()); + null_maps.reserve(key_columns.size()); + bool has_nullable_key = false; + + for (const auto& col : key_columns) { + if (const auto* nullable_col = check_and_get_column(col)) { + actual_columns.push_back(&nullable_col->get_nested_column()); + null_maps.push_back(&nullable_col->get_null_map_column()); + has_nullable_key = true; + } else { + actual_columns.push_back(col); + null_maps.push_back(nullptr); } - } else { - actual_columns = key_columns; + } + if (!has_nullable_key) { + null_maps.clear(); } if (is_build) { @@ -503,7 +503,13 @@ struct MethodKeysFixed : public MethodBase { void insert_keys_into_columns(std::vector& input_keys, MutableColumns& key_columns, const size_t num_rows) override { // In any hash key value, column values to be read start just after the bitmap, if it exists. - size_t pos = has_nullable_keys ? get_bitmap_size(key_columns.size()) : 0; + size_t pos = 0; + for (size_t i = 0; i < key_columns.size(); ++i) { + if (key_columns[i]->is_nullable()) { + pos = get_bitmap_size(key_columns.size()); + break; + } + } for (size_t i = 0; i < key_columns.size(); ++i) { size_t size = key_sizes[i]; @@ -607,10 +613,4 @@ struct MethodSingleNullableColumn : public SingleColumnMethod { } }; -template -using PrimaryTypeHashTableContext = MethodOneNumber>>; - -template -using FixedKeyHashTableContext = MethodKeysFixed>, has_null>; - } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/common/hash_table/hash_map_util.h b/be/src/vec/common/hash_table/hash_map_util.h index 200e6372ea8ac4e..d949fafecf95025 100644 --- a/be/src/vec/common/hash_table/hash_map_util.h +++ b/be/src/vec/common/hash_table/hash_map_util.h @@ -33,13 +33,10 @@ inline std::vector get_data_types( template Status init_hash_method(DataVariants* data, const std::vector& data_types, bool is_first_phase) { - auto type = get_hash_key_type_with_phase(get_hash_key_type(data_types), !is_first_phase); + auto type = HashKeyType::EMPTY; try { - if (has_nullable_key(data_types)) { - data->template init(data_types, type); - } else { - data->template init(data_types, type); - } + type = get_hash_key_type_with_phase(get_hash_key_type(data_types), !is_first_phase); + data->init(data_types, type); } catch (const Exception& e) { // method_variant may meet valueless_by_exception, so we set it to monostate data->method_variant.template emplace(); @@ -48,7 +45,7 @@ Status init_hash_method(DataVariants* data, const std::vectormethod_variant.valueless_by_exception()); - if (type != HashKeyType::without_key && + if (type != HashKeyType::without_key && type != HashKeyType::EMPTY && data->method_variant.index() == 0) { // index is 0 means variant is monostate return Status::InternalError("method_variant init failed"); } @@ -57,15 +54,15 @@ Status init_hash_method(DataVariants* data, const std::vector typename MethodNullable, template typename MethodOneNumber, - template typename MethodFixed, template typename DataNullable> + template typename DataNullable> struct DataVariants { DataVariants() = default; DataVariants(const DataVariants&) = delete; DataVariants& operator=(const DataVariants&) = delete; MethodVariants method_variant; - template - void emplace_single() { + template + void emplace_single(bool nullable) { if (nullable) { method_variant.template emplace>>>(); } else { diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index 490cd5016927c25..809868e2beea86f 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -419,28 +419,12 @@ class HashTable : private boost::noncopyable, Cell* buf = nullptr; /// A piece of memory for all elements except the element with zero key. Grower grower; int64_t _resize_timer_ns; - // the bucket count threshold above which it's converted to partioned hash table - // > 0: enable convert dynamically - // 0: convert is disabled - int _partitioned_threshold = 0; - // if need resize and bucket count after resize will be >= _partitioned_threshold, - // this flag is set to true, and resize does not actually happen, - // PartitionedHashTable will convert this hash table to partitioned hash table - bool _need_partition = false; //factor that will trigger growing the hash table on insert. static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.5f; mutable size_t collisions = 0; - void set_partitioned_threshold(int threshold) { _partitioned_threshold = threshold; } - - bool check_if_need_partition(size_t bucket_count) { - return _partitioned_threshold > 0 && bucket_count >= _partitioned_threshold; - } - - bool need_partition() { return _need_partition; } - /// Find a cell with the same key or an empty cell, starting from the specified position and further along the collision resolution chain. size_t ALWAYS_INLINE find_cell(const Key& x, size_t hash_value, size_t place_value) const { while (!buf[place_value].is_zero(*this) && @@ -609,8 +593,6 @@ class HashTable : private boost::noncopyable, std::swap(buf, rhs.buf); std::swap(m_size, rhs.m_size); std::swap(grower, rhs.grower); - std::swap(_need_partition, rhs._need_partition); - std::swap(_partitioned_threshold, rhs._partitioned_threshold); Hash::operator=(std::move(rhs)); // NOLINT(bugprone-use-after-move) Allocator::operator=(std::move(rhs)); // NOLINT(bugprone-use-after-move) @@ -740,12 +722,10 @@ class HashTable : private boost::noncopyable, throw; } - if (LIKELY(!_need_partition)) { - // The hash table was rehashed, so we have to re-find the key. - size_t new_place = find_cell(key, hash_value, grower.place(hash_value)); - assert(!buf[new_place].is_zero(*this)); - it = &buf[new_place]; - } + // The hash table was rehashed, so we have to re-find the key. + size_t new_place = find_cell(key, hash_value, grower.place(hash_value)); + assert(!buf[new_place].is_zero(*this)); + it = &buf[new_place]; } } @@ -776,12 +756,10 @@ class HashTable : private boost::noncopyable, throw; } - if (LIKELY(!_need_partition)) { - // The hash table was rehashed, so we have to re-find the key. - size_t new_place = find_cell(key, hash_value, grower.place(hash_value)); - assert(!buf[new_place].is_zero(*this)); - it = &buf[new_place]; - } + // The hash table was rehashed, so we have to re-find the key. + size_t new_place = find_cell(key, hash_value, grower.place(hash_value)); + assert(!buf[new_place].is_zero(*this)); + it = &buf[new_place]; } } @@ -1060,13 +1038,6 @@ class HashTable : private boost::noncopyable, } else new_grower.increase_size(); - // new bucket count exceed partitioned hash table bucket count threshold, - // don't resize and set need partition flag - if (check_if_need_partition(new_grower.buf_size())) { - _need_partition = true; - return; - } - /// Expand the space. buf = reinterpret_cast(Allocator::realloc(buf, get_buffer_size_in_bytes(), new_grower.buf_size() * sizeof(Cell))); diff --git a/be/src/vec/common/hash_table/hash_table_set_build.h b/be/src/vec/common/hash_table/hash_table_set_build.h index f9aeeeef14c94e6..b90cafc088376a2 100644 --- a/be/src/vec/common/hash_table/hash_table_set_build.h +++ b/be/src/vec/common/hash_table/hash_table_set_build.h @@ -24,7 +24,7 @@ constexpr size_t CHECK_FRECUENCY = 65536; template struct HashTableBuild { template - HashTableBuild(Parent* parent, int rows, ColumnRawPtrs& build_raw_ptrs, RuntimeState* state) + HashTableBuild(Parent* parent, size_t rows, ColumnRawPtrs& build_raw_ptrs, RuntimeState* state) : _rows(rows), _build_raw_ptrs(build_raw_ptrs), _state(state) {} Status operator()(HashTableContext& hash_table_ctx, Arena& arena) { @@ -50,7 +50,7 @@ struct HashTableBuild { } private: - const int _rows; + const size_t _rows; ColumnRawPtrs& _build_raw_ptrs; RuntimeState* _state = nullptr; }; diff --git a/be/src/vec/common/hash_table/join_hash_table.h b/be/src/vec/common/hash_table/join_hash_table.h index 317987541cdbe1c..25ca8844cd280f2 100644 --- a/be/src/vec/common/hash_table/join_hash_table.h +++ b/be/src/vec/common/hash_table/join_hash_table.h @@ -71,20 +71,16 @@ class JoinHashTable { std::vector& get_visited() { return visited; } - template - void build(const Key* __restrict keys, const uint32_t* __restrict bucket_nums, - size_t num_elem) { + void build(const Key* __restrict keys, const uint32_t* __restrict bucket_nums, size_t num_elem, + bool keep_null_key) { build_keys = keys; for (size_t i = 1; i < num_elem; i++) { uint32_t bucket_num = bucket_nums[i]; next[i] = first[bucket_num]; first[bucket_num] = i; } - if constexpr ((JoinOpType != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && - JoinOpType != TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) || - !with_other_conjuncts) { - /// Only null aware join with other conjuncts need to access the null value in hash table - first[bucket_size] = 0; // index = bucket_num means null + if (!keep_null_key) { + first[bucket_size] = 0; // index = bucket_size means null } } @@ -142,7 +138,7 @@ class JoinHashTable { JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { return _find_batch_right_semi_anti(keys, build_idx_map, probe_idx, probe_rows); } - return std::tuple {0, 0U, 0}; + return std::tuple {0, 0U, 0U}; } /** @@ -163,7 +159,7 @@ class JoinHashTable { uint32_t* __restrict build_idxs, uint8_t* __restrict null_flags, bool picking_null_keys) { - auto matched_cnt = 0; + uint32_t matched_cnt = 0; const auto batch_size = max_batch_size; auto do_the_probe = [&]() { @@ -274,7 +270,7 @@ class JoinHashTable { uint32_t* __restrict build_idxs) { static_assert(JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN); - auto matched_cnt = 0; + uint32_t matched_cnt = 0; const auto batch_size = max_batch_size; while (probe_idx < probe_rows && matched_cnt < batch_size) { @@ -300,14 +296,14 @@ class JoinHashTable { } probe_idx++; } - return std::tuple {probe_idx, 0U, 0}; + return std::tuple {probe_idx, 0U, 0U}; } template auto _find_batch_left_semi_anti(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs) { - auto matched_cnt = 0; + uint32_t matched_cnt = 0; const auto batch_size = max_batch_size; while (probe_idx < probe_rows && matched_cnt < batch_size) { @@ -334,7 +330,7 @@ class JoinHashTable { auto _find_batch_conjunct(const Key* __restrict keys, const uint32_t* __restrict build_idx_map, int probe_idx, uint32_t build_idx, int probe_rows, uint32_t* __restrict probe_idxs, uint32_t* __restrict build_idxs) { - auto matched_cnt = 0; + uint32_t matched_cnt = 0; const auto batch_size = max_batch_size; auto do_the_probe = [&]() { @@ -405,7 +401,7 @@ class JoinHashTable { uint32_t build_idx, int probe_rows, uint32_t* __restrict probe_idxs, bool& probe_visited, uint32_t* __restrict build_idxs) { - auto matched_cnt = 0; + uint32_t matched_cnt = 0; const auto batch_size = max_batch_size; auto do_the_probe = [&]() { diff --git a/be/src/vec/common/hash_table/partitioned_hash_map.h b/be/src/vec/common/hash_table/partitioned_hash_map.h deleted file mode 100644 index a2db6fece35207f..000000000000000 --- a/be/src/vec/common/hash_table/partitioned_hash_map.h +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/TwoLevelHashMap.h -// and modified by Doris -#pragma once - -#include "vec/common/hash_table/hash_map.h" -#include "vec/common/hash_table/partitioned_hash_table.h" -#include "vec/common/hash_table/ph_hash_map.h" -namespace doris { -template -class PartitionedHashMapTable : public PartitionedHashTable { -public: - using Impl = ImplTable; - using Base = PartitionedHashTable; - using Key = typename ImplTable::key_type; - using LookupResult = typename Impl::LookupResult; - - auto& ALWAYS_INLINE operator[](const Key& x) { - LookupResult it; - bool inserted = false; - this->emplace(x, it, inserted); - - if (inserted) { - new (lookup_result_get_mapped(it)) Base::mapped_type(); - } - - return *lookup_result_get_mapped(it); - } - - template - void for_each_mapped(Func&& func) { - for (auto& v : *this) { - func(v.get_second()); - } - } -}; - -template > -using PartitionedHashMap = - PartitionedHashMapTable>>; - -template > -using PHNormalHashMap = PHHashMap; -} // namespace doris \ No newline at end of file diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h b/be/src/vec/common/hash_table/partitioned_hash_table.h deleted file mode 100644 index c6a19b36d3a0c38..000000000000000 --- a/be/src/vec/common/hash_table/partitioned_hash_table.h +++ /dev/null @@ -1,550 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/TwoLevelHashTable.h -// and modified by Doris -#pragma once - -#include "vec/common/hash_table/hash_table.h" - -/** Partitioned hash table. - * Represents 16 (or 1ULL << BITS_FOR_SUB_TABLE) small hash tables (sub table count of the first level). - * To determine which one to use, one of the bytes of the hash function is taken. - * - * Usually works a little slower than a simple hash table. - * However, it has advantages in some cases: - * - if you need to merge two hash tables together, then you can easily parallelize it by sub tables; - * - delay during resizes is amortized, since the small hash tables will be resized separately; - * - in theory, resizes are cache-local in a larger range of sizes. - */ - -template -struct PartitionedHashTableGrower : public HashTableGrowerWithPrecalculation { - /// Increase the size of the hash table. - void increase_size() { this->increase_size_degree(this->size_degree() >= 15 ? 1 : 2); } -}; - -template -class PartitionedHashTable : private boost::noncopyable, Impl::Hash { -public: - using key_type = typename Impl::key_type; - using mapped_type = typename Impl::mapped_type; - using value_type = typename Impl::value_type; - using cell_type = typename Impl::cell_type; - using Key = typename Impl::key_type; - - using LookupResult = typename Impl::LookupResult; - using ConstLookupResult = typename Impl::ConstLookupResult; - -protected: - using Self = PartitionedHashTable; - -private: - static constexpr size_t NUM_LEVEL1_SUB_TABLES = 1ULL << BITS_FOR_SUB_TABLE; - static constexpr size_t MAX_SUB_TABLE = NUM_LEVEL1_SUB_TABLES - 1; - - //factor that will trigger growing the hash table on insert. - static constexpr float MAX_SUB_TABLE_OCCUPANCY_FRACTION = 0.5f; - - Impl level0_sub_table; - Impl level1_sub_tables[NUM_LEVEL1_SUB_TABLES]; - - bool _is_partitioned = false; - - int64_t _convert_timer_ns = 0; - -public: - PartitionedHashTable() = default; - - PartitionedHashTable(PartitionedHashTable&& rhs) { *this = std::move(rhs); } - - PartitionedHashTable& operator=(PartitionedHashTable&& rhs) { - std::swap(_is_partitioned, rhs._is_partitioned); - - level0_sub_table = std::move(rhs.level0_sub_table); - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) { - level1_sub_tables[i] = std::move(rhs.level1_sub_tables[i]); - } - - return *this; - } - - size_t hash(const Key& x) const { return level0_sub_table.hash(x); } - - float get_factor() const { return MAX_SUB_TABLE_OCCUPANCY_FRACTION; } - - int64_t get_convert_timer_value() const { return _convert_timer_ns; } - - bool should_be_shrink(int64_t valid_row) const { - if (_is_partitioned) { - return false; - } else { - return level0_sub_table.should_be_shrink(valid_row); - } - } - - size_t size() { - size_t count = 0; - if (_is_partitioned) { - for (auto i = 0u; i < this->NUM_LEVEL1_SUB_TABLES; ++i) { - count += this->level1_sub_tables[i].size(); - } - } else { - count = level0_sub_table.size(); - } - return count; - } - - void init_buf_size(size_t reserve_for_num_elements) { - if (_is_partitioned) { - for (auto& impl : level1_sub_tables) { - impl.init_buf_size(reserve_for_num_elements / NUM_LEVEL1_SUB_TABLES); - } - } else { - if (level0_sub_table.check_if_need_partition(reserve_for_num_elements)) { - level0_sub_table.clear_and_shrink(); - _is_partitioned = true; - - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) { - level1_sub_tables[i].init_buf_size(reserve_for_num_elements / - NUM_LEVEL1_SUB_TABLES); - } - } else { - level0_sub_table.init_buf_size(reserve_for_num_elements); - } - } - } - - void delete_zero_key(Key key) { - if (_is_partitioned) { - const auto key_hash = hash(key); - size_t sub_table_idx = get_sub_table_from_hash(key_hash); - level1_sub_tables[sub_table_idx].delete_zero_key(key); - } else { - level0_sub_table.delete_zero_key(key); - } - } - - int64_t get_collisions() const { - size_t collisions = level0_sub_table.get_collisions(); - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; i++) { - collisions += level1_sub_tables[i].get_collisions(); - } - return collisions; - } - - size_t get_buffer_size_in_bytes() const { - if (_is_partitioned) { - size_t buff_size = 0; - for (const auto& impl : level1_sub_tables) buff_size += impl.get_buffer_size_in_bytes(); - return buff_size; - } else { - return level0_sub_table.get_buffer_size_in_bytes(); - } - } - - size_t get_buffer_size_in_cells() const { - if (_is_partitioned) { - size_t buff_size = 0; - for (const auto& impl : level1_sub_tables) buff_size += impl.get_buffer_size_in_cells(); - return buff_size; - } else { - return level0_sub_table.get_buffer_size_in_cells(); - } - } - - std::vector get_buffer_sizes_in_cells() const { - std::vector sizes; - if (_is_partitioned) { - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) { - sizes.push_back(level1_sub_tables[i].get_buffer_size_in_cells()); - } - } else { - sizes.push_back(level0_sub_table.get_buffer_size_in_cells()); - } - return sizes; - } - - void reset_resize_timer() { - if (_is_partitioned) { - for (auto& impl : level1_sub_tables) { - impl.reset_resize_timer(); - } - } else { - level0_sub_table.reset_resize_timer(); - } - } - int64_t get_resize_timer_value() const { - if (_is_partitioned) { - int64_t resize_timer_ns = 0; - for (const auto& impl : level1_sub_tables) { - resize_timer_ns += impl.get_resize_timer_value(); - } - return resize_timer_ns; - } else { - return level0_sub_table.get_resize_timer_value(); - } - } - - bool has_null_key_data() const { return false; } - template - char* get_null_key_data() { - return nullptr; - } - -protected: - typename Impl::iterator begin_of_next_non_empty_sub_table_idx(size_t& sub_table_idx) { - while (sub_table_idx != NUM_LEVEL1_SUB_TABLES && level1_sub_tables[sub_table_idx].empty()) - ++sub_table_idx; - - if (sub_table_idx != NUM_LEVEL1_SUB_TABLES) return level1_sub_tables[sub_table_idx].begin(); - - --sub_table_idx; - return level1_sub_tables[MAX_SUB_TABLE].end(); - } - - typename Impl::const_iterator begin_of_next_non_empty_sub_table_idx( - size_t& sub_table_idx) const { - while (sub_table_idx != NUM_LEVEL1_SUB_TABLES && level1_sub_tables[sub_table_idx].empty()) - ++sub_table_idx; - - if (sub_table_idx != NUM_LEVEL1_SUB_TABLES) return level1_sub_tables[sub_table_idx].begin(); - - --sub_table_idx; - return level1_sub_tables[MAX_SUB_TABLE].end(); - } - -public: - void set_partitioned_threshold(int threshold) { - level0_sub_table.set_partitioned_threshold(threshold); - } - - class iterator /// NOLINT - { - Self* container {}; - size_t sub_table_idx {}; - typename Impl::iterator current_it {}; - - friend class PartitionedHashTable; - - iterator(Self* container_, size_t sub_table_idx_, typename Impl::iterator current_it_) - : container(container_), sub_table_idx(sub_table_idx_), current_it(current_it_) {} - - public: - iterator() = default; - - bool operator==(const iterator& rhs) const { - return sub_table_idx == rhs.sub_table_idx && current_it == rhs.current_it; - } - bool operator!=(const iterator& rhs) const { return !(*this == rhs); } - - iterator& operator++() { - ++current_it; - if (container->_is_partitioned) { - if (current_it == container->level1_sub_tables[sub_table_idx].end()) { - ++sub_table_idx; - current_it = container->begin_of_next_non_empty_sub_table_idx(sub_table_idx); - } - } - - return *this; - } - - auto& operator*() { return *current_it; } - auto* operator->() { return current_it.get_ptr(); } - - auto* get_ptr() { return current_it.get_ptr(); } - size_t get_hash() { return current_it.get_hash(); } - }; - - class const_iterator /// NOLINT - { - Self* container {}; - size_t sub_table_idx {}; - typename Impl::const_iterator current_it {}; - - friend class PartitionedHashTable; - - const_iterator(Self* container_, size_t sub_table_idx_, - typename Impl::const_iterator current_it_) - : container(container_), sub_table_idx(sub_table_idx_), current_it(current_it_) {} - - public: - const_iterator() = default; - const_iterator(const iterator& rhs) - : container(rhs.container), - sub_table_idx(rhs.sub_table_idx), - current_it(rhs.current_it) {} /// NOLINT - - bool operator==(const const_iterator& rhs) const { - return sub_table_idx == rhs.sub_table_idx && current_it == rhs.current_it; - } - bool operator!=(const const_iterator& rhs) const { return !(*this == rhs); } - - const_iterator& operator++() { - ++current_it; - if (container->_is_partitioned) { - if (current_it == container->level1_sub_tables[sub_table_idx].end()) { - ++sub_table_idx; - current_it = container->begin_of_next_non_empty_sub_table_idx(sub_table_idx); - } - } - - return *this; - } - - const auto& operator*() const { return *current_it; } - const auto* operator->() const { return current_it->get_ptr(); } - - const auto* get_ptr() const { return current_it.get_ptr(); } - size_t get_hash() const { return current_it.get_hash(); } - }; - - const_iterator begin() const { - if (_is_partitioned) { - size_t sub_table_idx = 0; - typename Impl::const_iterator impl_it = - begin_of_next_non_empty_sub_table_idx(sub_table_idx); - return {this, sub_table_idx, impl_it}; - } else { - return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.begin()}; - } - } - - iterator begin() { - if (_is_partitioned) { - size_t sub_table_idx = 0; - typename Impl::iterator impl_it = begin_of_next_non_empty_sub_table_idx(sub_table_idx); - return {this, sub_table_idx, impl_it}; - } else { - return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.begin()}; - } - } - - const_iterator end() const { - if (_is_partitioned) { - return {this, MAX_SUB_TABLE, level1_sub_tables[MAX_SUB_TABLE].end()}; - } else { - return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.end()}; - } - } - iterator end() { - if (_is_partitioned) { - return {this, MAX_SUB_TABLE, level1_sub_tables[MAX_SUB_TABLE].end()}; - } else { - return {this, NUM_LEVEL1_SUB_TABLES, level0_sub_table.end()}; - } - } - - /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function. - std::pair ALWAYS_INLINE insert(const value_type& x) { - size_t hash_value = hash(cell_type::get_key(x)); - - std::pair res; - emplace(cell_type::get_key(x), res.first, res.second, hash_value); - - if (res.second) insert_set_mapped(lookup_result_get_mapped(res.first), x); - - return res; - } - - void expanse_for_add_elem(size_t num_elem) { - if (_is_partitioned) { - size_t num_elem_per_sub_table = - (num_elem + NUM_LEVEL1_SUB_TABLES - 1) / NUM_LEVEL1_SUB_TABLES; - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) { - level1_sub_tables[i].expanse_for_add_elem(num_elem_per_sub_table); - } - } else { - level0_sub_table.expanse_for_add_elem(num_elem); - if (UNLIKELY(level0_sub_table.need_partition())) { - convert_to_partitioned(); - } - } - } - - template - void ALWAYS_INLINE prefetch(const Key& key, size_t hash_value) { - if (_is_partitioned) { - const auto sub_table_idx = get_sub_table_from_hash(hash_value); - level1_sub_tables[sub_table_idx].template prefetch(hash_value); - } else { - level0_sub_table.template prefetch(hash_value); - } - } - - /** Insert the key, - * return an iterator to a position that can be used for `placement new` of value, - * as well as the flag - whether a new key was inserted. - * - * You have to make `placement new` values if you inserted a new key, - * since when destroying a hash table, the destructor will be invoked for it! - * - * Example usage: - * - * Map::iterator it; - * bool inserted; - * map.emplace(key, it, inserted); - * if (inserted) - * new(&it->second) Mapped(value); - */ - template - void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& inserted) { - size_t hash_value = hash(key_holder); - emplace(key_holder, it, inserted, hash_value); - } - - /// Same, but with a precalculated values of hash function. - template - void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& inserted, - size_t hash_value) { - if (_is_partitioned) { - size_t sub_table_idx = get_sub_table_from_hash(hash_value); - level1_sub_tables[sub_table_idx].emplace(key_holder, it, inserted, hash_value); - } else { - level0_sub_table.emplace(key_holder, it, inserted, hash_value); - if (UNLIKELY(level0_sub_table.need_partition())) { - convert_to_partitioned(); - - // The hash table was converted to partitioned, so we have to re-find the key. - size_t sub_table_id = get_sub_table_from_hash(hash_value); - it = level1_sub_tables[sub_table_id].find(key_holder, hash_value); - } - } - } - - template - void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value, - bool& inserted) { - emplace(key_holder, it, inserted, hash_value); - } - - template - void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) { - size_t hash_value = hash(key_holder); - lazy_emplace(key_holder, it, hash_value, std::forward(f)); - } - - template - void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value, - Func&& f) { - if (_is_partitioned) { - size_t sub_table_idx = get_sub_table_from_hash(hash_value); - level1_sub_tables[sub_table_idx].lazy_emplace(key_holder, it, hash_value, - std::forward(f)); - } else { - level0_sub_table.lazy_emplace(key_holder, it, hash_value, std::forward(f)); - if (UNLIKELY(level0_sub_table.need_partition())) { - convert_to_partitioned(); - - // The hash table was converted to partitioned, so we have to re-find the key. - size_t sub_table_id = get_sub_table_from_hash(hash_value); - it = level1_sub_tables[sub_table_id].find(key_holder, hash_value); - } - } - } - - LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) { - if (_is_partitioned) { - size_t sub_table_idx = get_sub_table_from_hash(hash_value); - return level1_sub_tables[sub_table_idx].find(x, hash_value); - } else { - return level0_sub_table.find(x, hash_value); - } - } - - ConstLookupResult ALWAYS_INLINE find(Key x, size_t hash_value) const { - return const_cast*>(this)->find(x, hash_value); - } - - LookupResult ALWAYS_INLINE find(Key x) { return find(x, hash(x)); } - - ConstLookupResult ALWAYS_INLINE find(Key x) const { return find(x, hash(x)); } - - size_t size() const { - if (_is_partitioned) { - size_t res = 0; - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) { - res += level1_sub_tables[i].size(); - } - return res; - } else { - return level0_sub_table.size(); - } - } - - std::vector sizes() const { - std::vector sizes; - if (_is_partitioned) { - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) { - sizes.push_back(level1_sub_tables[i].size()); - } - } else { - sizes.push_back(level0_sub_table.size()); - } - return sizes; - } - - bool empty() const { - if (_is_partitioned) { - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) - if (!level1_sub_tables[i].empty()) return false; - return true; - } else { - return level0_sub_table.empty(); - } - } - - bool add_elem_size_overflow(size_t row) const { - return !_is_partitioned && level0_sub_table.add_elem_size_overflow(row); - } - -private: - void convert_to_partitioned() { - SCOPED_RAW_TIMER(&_convert_timer_ns); - - DCHECK(!_is_partitioned); - _is_partitioned = true; - - auto bucket_count = level0_sub_table.get_buffer_size_in_cells(); - for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) { - level1_sub_tables[i] = std::move(Impl(bucket_count / NUM_LEVEL1_SUB_TABLES)); - } - - auto it = level0_sub_table.begin(); - - /// It is assumed that the zero key (stored separately) is first in iteration order. - if (it != level0_sub_table.end() && it.get_ptr()->is_zero(level0_sub_table)) { - insert(it->get_value()); - ++it; - } - - for (; it != level0_sub_table.end(); ++it) { - const auto* cell = it.get_ptr(); - size_t hash_value = cell->get_hash(level0_sub_table); - size_t sub_table_idx = get_sub_table_from_hash(hash_value); - level1_sub_tables[sub_table_idx].insert_unique_non_zero(cell, hash_value); - } - - level0_sub_table.clear_and_shrink(); - } - - /// NOTE Bad for hash tables with more than 2^32 cells. - static size_t get_sub_table_from_hash(size_t hash_value) { - return (hash_value >> (32 - BITS_FOR_SUB_TABLE)) & MAX_SUB_TABLE; - } -}; diff --git a/be/src/vec/common/hash_table/ph_hash_map.h b/be/src/vec/common/hash_table/ph_hash_map.h index 50cf218dc87c065..de3204252234278 100644 --- a/be/src/vec/common/hash_table/ph_hash_map.h +++ b/be/src/vec/common/hash_table/ph_hash_map.h @@ -30,8 +30,7 @@ ALWAYS_INLINE inline auto lookup_result_get_mapped(std::pair* return &(it->second); } -template , - bool PartitionedHashTable = false> +template > class PHHashMap : private boost::noncopyable { public: using Self = PHHashMap; @@ -58,9 +57,6 @@ class PHHashMap : private boost::noncopyable { PHHashMap& operator=(PHHashMap&& rhs) { _hash_map.clear(); _hash_map = std::move(rhs._hash_map); - std::swap(_need_partition, rhs._need_partition); - std::swap(_partitioned_threshold, rhs._partitioned_threshold); - return *this; } @@ -130,19 +126,11 @@ class PHHashMap : private boost::noncopyable { inserted = true; ctor(key_holder, nullptr); }); - - if constexpr (PartitionedHashTable) { - _check_if_need_partition(); - } } template void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) { it = &*_hash_map.lazy_emplace(key_holder, [&](const auto& ctor) { f(ctor, key_holder); }); - - if constexpr (PartitionedHashTable) { - _check_if_need_partition(); - } } template @@ -157,10 +145,6 @@ class PHHashMap : private boost::noncopyable { ctor(key, mapped_type()); } }); - - if constexpr (PartitionedHashTable) { - _check_if_need_partition(); - } } template @@ -168,10 +152,6 @@ class PHHashMap : private boost::noncopyable { Func&& f) { it = &*_hash_map.lazy_emplace_with_hash(key, hash_value, [&](const auto& ctor) { f(ctor, key, key); }); - - if constexpr (PartitionedHashTable) { - _check_if_need_partition(); - } } void ALWAYS_INLINE insert(const Key& key, size_t hash_value, const Mapped& value) { @@ -225,18 +205,6 @@ class PHHashMap : private boost::noncopyable { } bool has_null_key_data() const { return false; } - bool need_partition() { return _need_partition; } - - void set_partitioned_threshold(int threshold) { _partitioned_threshold = threshold; } - - bool check_if_need_partition(size_t bucket_count) { - if constexpr (PartitionedHashTable) { - return _partitioned_threshold > 0 && bucket_count >= _partitioned_threshold; - } else { - return false; - } - } - bool empty() const { return _hash_map.empty(); } void clear_and_shrink() { _hash_map.clear(); } @@ -244,19 +212,5 @@ class PHHashMap : private boost::noncopyable { void expanse_for_add_elem(size_t num_elem) { _hash_map.reserve(num_elem); } private: - void _check_if_need_partition() { - if (UNLIKELY(check_if_need_partition(_hash_map.size() + 1))) { - _need_partition = add_elem_size_overflow(1); - } - } - HashMapImpl _hash_map; - // the bucket count threshold above which it's converted to partioned hash table - // > 0: enable convert dynamically - // 0: convert is disabled - int _partitioned_threshold = 0; - // if need resize and bucket count after resize will be >= _partitioned_threshold, - // this flag is set to true, and resize does not actually happen, - // PartitionedHashTable will convert this hash table to partitioned hash table - bool _need_partition; }; diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index adaee6e9fe6a5d1..9692c02fda31e97 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -360,6 +360,7 @@ void update_least_sparse_column(const std::vector& schemas, void inherit_column_attributes(const TabletColumn& source, TabletColumn& target, TabletSchemaSPtr& target_schema) { + DCHECK(target.is_extracted_column()); if (target.type() != FieldType::OLAP_FIELD_TYPE_TINYINT && target.type() != FieldType::OLAP_FIELD_TYPE_ARRAY && target.type() != FieldType::OLAP_FIELD_TYPE_DOUBLE && @@ -368,18 +369,18 @@ void inherit_column_attributes(const TabletColumn& source, TabletColumn& target, target.set_is_bf_column(source.is_bf_column()); } target.set_aggregation_method(source.aggregation()); - const auto* source_index_meta = target_schema->get_inverted_index(source.unique_id(), ""); + const auto* source_index_meta = target_schema->inverted_index(source.unique_id()); if (source_index_meta != nullptr) { // add index meta TabletIndex index_info = *source_index_meta; index_info.set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path()); - // get_inverted_index: No need to check, just inherit directly - const auto* target_index_meta = target_schema->get_inverted_index(target, false); + const auto* target_index_meta = target_schema->inverted_index( + target.parent_unique_id(), target.path_info_ptr()->get_path()); if (target_index_meta != nullptr) { // already exist target_schema->update_index(target, index_info); } else { - target_schema->append_index(index_info); + target_schema->append_index(std::move(index_info)); } } } @@ -591,4 +592,20 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst) return Status::OK(); } +bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema, + int32_t new_col_idx, int32_t old_col_idx) { + const auto& column_new = new_schema->column(new_col_idx); + const auto& column_old = old_schema->column(old_col_idx); + + if (column_new.is_bf_column() != column_old.is_bf_column() || + column_new.has_bitmap_index() != column_old.has_bitmap_index()) { + return true; + } + + bool new_schema_has_inverted_index = new_schema->inverted_index(column_new); + bool old_schema_has_inverted_index = old_schema->inverted_index(column_old); + + return new_schema_has_inverted_index != old_schema_has_inverted_index; +} + } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index 080e6331dc1dd05..8ceb97a915630d8 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -121,4 +121,7 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst); std::string dump_column(DataTypePtr type, const ColumnPtr& col); +bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema, + int32_t new_col_idx, int32_t old_col_idx); + } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 2eb06e3c6a553e3..11075335fb17afe 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -1083,7 +1083,7 @@ Status MutableBlock::add_rows(const Block* block, size_t row_begin, size_t lengt return Status::OK(); } -Status MutableBlock::add_rows(const Block* block, std::vector rows) { +Status MutableBlock::add_rows(const Block* block, const std::vector& rows) { RETURN_IF_CATCH_EXCEPTION({ DCHECK_LE(columns(), block->columns()); const auto& block_data = block->get_columns_with_type_and_name(); @@ -1093,7 +1093,7 @@ Status MutableBlock::add_rows(const Block* block, std::vector rows) { auto& dst = _columns[i]; const auto& src = *block_data[i].column.get(); dst->reserve(dst->size() + length); - for (size_t row : rows) { + for (auto row : rows) { // we can introduce a new function like `insert_assume_reserved` for IColumn. dst->insert_from(src, row); } diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index bbcdd9472ae178c..2242db3f9058c29 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -194,7 +194,9 @@ class Block { // Skip the rows in block, use in OFFSET, LIMIT operation void skip_num_rows(int64_t& offset); - size_t columns() const { return data.size(); } + /// As the assumption we used around, the number of columns won't exceed int16 range. so no need to worry when we + /// assign it to int32. + uint32_t columns() const { return static_cast(data.size()); } /// Checks that every column in block is not nullptr and has same number of elements. void check_number_of_rows(bool allow_null_columns = false) const; @@ -624,7 +626,7 @@ class MutableBlock { Status add_rows(const Block* block, const uint32_t* row_begin, const uint32_t* row_end, const std::vector* column_offset = nullptr); Status add_rows(const Block* block, size_t row_begin, size_t length); - Status add_rows(const Block* block, std::vector rows); + Status add_rows(const Block* block, const std::vector& rows); /// remove the column with the specified name void erase(const String& name); diff --git a/be/src/vec/core/column_numbers.h b/be/src/vec/core/column_numbers.h index 25c8912f73c2ba4..32a0dd5804ab221 100644 --- a/be/src/vec/core/column_numbers.h +++ b/be/src/vec/core/column_numbers.h @@ -20,11 +20,10 @@ #pragma once -#include +#include #include namespace doris::vectorized { -using ColumnNumbers = std::vector; - +using ColumnNumbers = std::vector; } diff --git a/be/src/vec/core/decimal_comparison.h b/be/src/vec/core/decimal_comparison.h index c5c1e40e2ff4dff..9e9d9ad399ae04f 100644 --- a/be/src/vec/core/decimal_comparison.h +++ b/be/src/vec/core/decimal_comparison.h @@ -79,7 +79,7 @@ class DecimalComparison { using ArrayA = typename ColVecA::Container; using ArrayB = typename ColVecB::Container; - DecimalComparison(Block& block, size_t result, const ColumnWithTypeAndName& col_left, + DecimalComparison(Block& block, uint32_t result, const ColumnWithTypeAndName& col_left, const ColumnWithTypeAndName& col_right) { if (!apply(block, result, col_left, col_right)) { LOG(FATAL) << fmt::format("Wrong decimal comparison with {} and {}", @@ -87,7 +87,7 @@ class DecimalComparison { } } - static bool apply(Block& block, size_t result [[maybe_unused]], + static bool apply(Block& block, uint32_t result [[maybe_unused]], const ColumnWithTypeAndName& col_left, const ColumnWithTypeAndName& col_right) { if constexpr (_actual) { diff --git a/be/src/vec/data_types/data_type_string.cpp b/be/src/vec/data_types/data_type_string.cpp index d2c2ae2c0b03a2b..878e6c319a103ba 100644 --- a/be/src/vec/data_types/data_type_string.cpp +++ b/be/src/vec/data_types/data_type_string.cpp @@ -27,6 +27,8 @@ #include #include "agent/be_exec_version_manager.h" +#include "common/exception.h" +#include "common/status.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" #include "vec/columns/column_string.h" @@ -81,7 +83,7 @@ bool DataTypeString::equals(const IDataType& rhs) const { int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column, int be_exec_version) const { if (be_exec_version >= USE_CONST_SERDE) { - auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t); + int64_t size = sizeof(bool) + sizeof(size_t) + sizeof(size_t); bool is_const_column = is_column_const(column); const IColumn* string_column = &column; if (is_const_column) { @@ -99,9 +101,15 @@ int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column, upper_int32(offsets_size))); } size += sizeof(size_t); - if (auto bytes = data_column.get_chars().size(); bytes <= SERIALIZED_MEM_SIZE_LIMIT) { + if (size_t bytes = data_column.get_chars().size(); bytes <= SERIALIZED_MEM_SIZE_LIMIT) { size += bytes; } else { + if (bytes > LZ4_MAX_INPUT_SIZE) { + throw Exception(ErrorCode::BUFFER_OVERFLOW, + "LZ4_compressBound meet invalid input size, input_size={}, " + "LZ4_MAX_INPUT_SIZE={}", + bytes, LZ4_MAX_INPUT_SIZE); + } size += sizeof(size_t) + std::max(bytes, (size_t)LZ4_compressBound(bytes)); } return size; diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp index f09b6feb4a25ed1..612c9ce42227dd9 100644 --- a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp @@ -69,25 +69,21 @@ Status DataTypeIPv6SerDe::write_column_to_mysql(const IColumn& column, } void DataTypeIPv6SerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { - IPv6 val = 0; - const auto* str_value = static_cast(arg); - ReadBuffer rb(reinterpret_cast(str_value->getBlob()), - str_value->getBlobLen()); - if (!read_ipv6_text_impl(val, rb)) { - throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "parse ipv6 fail, string: '{}'", - rb.to_string()); - } - assert_cast(column).insert_value(val); + const auto* str_value = static_cast(arg); + column.deserialize_and_insert_from_arena(str_value->getBlob()); } void DataTypeIPv6SerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWriterT& result, Arena* mem_pool, int col_id, int row_num) const { - // we make ipv6 as string in jsonb + // we make ipv6 as BinaryValue in jsonb result.writeKey(col_id); - IPv6 data = assert_cast(column).get_element(row_num); - IPv6Value ipv6_value(data); - result.writeString(ipv6_value.to_string()); + const char* begin = nullptr; + // maybe serialize_value_into_arena should move to here later. + StringRef value = column.serialize_value_into_arena(row_num, *mem_pool, begin); + result.writeStartBinary(); + result.writeBinary(value.data, value.size); + result.writeEndBinary(); } Status DataTypeIPv6SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index 4db2d62b9949e77..7dfc3c528cd88e9 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -333,6 +333,11 @@ Status NewJsonReader::get_parsed_schema(std::vector* col_names, objectValue = _json_doc; } + if (!objectValue->IsObject()) { + return Status::DataQualityError("JSON data is not an object. but: {}", + objectValue->GetType()); + } + // use jsonpaths to col_names if (!_parsed_jsonpaths.empty()) { for (auto& _parsed_jsonpath : _parsed_jsonpaths) { diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 6b6639f2feb2446..5d6ae4024c75610 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -773,7 +773,7 @@ Status OrcReader::set_fill_columns( visit_slot(child.get()); } } else if (VInPredicate* in_predicate = typeid_cast(filter_impl)) { - if (in_predicate->children().size() > 0) { + if (in_predicate->get_num_children() > 0) { visit_slot(in_predicate->children()[0].get()); } } else { @@ -857,28 +857,79 @@ Status OrcReader::set_fill_columns( if (_colname_to_value_range == nullptr || !_init_search_argument(_colname_to_value_range)) { _lazy_read_ctx.can_lazy_read = false; } + try { + _row_reader_options.range(_range_start_offset, _range_size); + _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz); + _row_reader_options.include(_read_cols); + _row_reader_options.setEnableLazyDecoding(true); - if (!_lazy_read_ctx.can_lazy_read) { - for (auto& kv : _lazy_read_ctx.predicate_partition_columns) { - _lazy_read_ctx.partition_columns.emplace(kv.first, kv.second); + uint64_t number_of_stripes = _reader->getNumberOfStripes(); + auto all_stripes_needed = _reader->getNeedReadStripes(_row_reader_options); + + int64_t range_end_offset = _range_start_offset + _range_size; + + // If you set "orc_tiny_stripe_threshold_bytes" = 0, the use tiny stripes merge io optimization will not be used. + int64_t orc_tiny_stripe_threshold_bytes = 8L * 1024L * 1024L; + int64_t orc_once_max_read_bytes = 8L * 1024L * 1024L; + int64_t orc_max_merge_distance_bytes = 1L * 1024L * 1024L; + + if (_state != nullptr) { + orc_tiny_stripe_threshold_bytes = + _state->query_options().orc_tiny_stripe_threshold_bytes; + orc_once_max_read_bytes = _state->query_options().orc_once_max_read_bytes; + orc_max_merge_distance_bytes = _state->query_options().orc_max_merge_distance_bytes; } - for (auto& kv : _lazy_read_ctx.predicate_missing_columns) { - _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second); + + bool all_tiny_stripes = true; + std::vector tiny_stripe_ranges; + + for (uint64_t i = 0; i < number_of_stripes; i++) { + std::unique_ptr strip_info = _reader->getStripe(i); + uint64_t strip_start_offset = strip_info->getOffset(); + uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); + + if (strip_start_offset >= range_end_offset || strip_end_offset < _range_start_offset || + !all_stripes_needed[i]) { + continue; + } + if (strip_info->getLength() > orc_tiny_stripe_threshold_bytes) { + all_tiny_stripes = false; + break; + } + + tiny_stripe_ranges.emplace_back(strip_start_offset, strip_end_offset); } - } + if (all_tiny_stripes && number_of_stripes > 0) { + std::vector prefetch_merge_ranges = + io::PrefetchRange::merge_adjacent_seq_ranges(tiny_stripe_ranges, + orc_max_merge_distance_bytes, + orc_once_max_read_bytes); + auto range_finder = + std::make_shared(std::move(prefetch_merge_ranges)); - _fill_all_columns = true; + auto* orc_input_stream_ptr = static_cast(_reader->getStream()); + orc_input_stream_ptr->set_all_tiny_stripes(); + auto& orc_file_reader = orc_input_stream_ptr->get_file_reader(); + auto orc_inner_reader = orc_input_stream_ptr->get_inner_reader(); + orc_file_reader = std::make_shared(_profile, orc_inner_reader, + range_finder); + } - // create orc row reader - try { - _row_reader_options.range(_range_start_offset, _range_size); - _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz); - _row_reader_options.include(_read_cols); + if (!_lazy_read_ctx.can_lazy_read) { + for (auto& kv : _lazy_read_ctx.predicate_partition_columns) { + _lazy_read_ctx.partition_columns.emplace(kv.first, kv.second); + } + for (auto& kv : _lazy_read_ctx.predicate_missing_columns) { + _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second); + } + } + + _fill_all_columns = true; + // create orc row reader if (_lazy_read_ctx.can_lazy_read) { _row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns); _orc_filter = std::unique_ptr(new ORCFilterImpl(this)); } - _row_reader_options.setEnableLazyDecoding(true); if (!_lazy_read_ctx.conjuncts.empty()) { _string_dict_filter = std::make_unique(this); } @@ -2415,6 +2466,9 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column( void ORCFileInputStream::beforeReadStripe( std::unique_ptr current_strip_information, std::vector selected_columns) { + if (_is_all_tiny_stripes) { + return; + } if (_file_reader != nullptr) { _file_reader->collect_profile_before_close(); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 4aad5637ef544e4..0807f4949e58508 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -34,6 +34,7 @@ #include "common/status.h" #include "exec/olap_common.h" #include "io/file_factory.h" +#include "io/fs/buffered_reader.h" #include "io/fs/file_reader.h" #include "io/fs/file_reader_writer_fwd.h" #include "olap/olap_common.h" @@ -642,7 +643,11 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { _io_ctx(io_ctx), _profile(profile) {} - ~ORCFileInputStream() override = default; + ~ORCFileInputStream() override { + if (_file_reader != nullptr) { + _file_reader->collect_profile_before_close(); + } + } uint64_t getLength() const override { return _file_reader->size(); } @@ -655,6 +660,12 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { void beforeReadStripe(std::unique_ptr current_strip_information, std::vector selected_columns) override; + void set_all_tiny_stripes() { _is_all_tiny_stripes = true; } + + io::FileReaderSPtr& get_file_reader() { return _file_reader; } + + io::FileReaderSPtr& get_inner_reader() { return _inner_reader; } + protected: void _collect_profile_at_runtime() override {}; void _collect_profile_before_close() override; @@ -663,10 +674,10 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { const std::string& _file_name; io::FileReaderSPtr _inner_reader; io::FileReaderSPtr _file_reader; + bool _is_all_tiny_stripes = false; // Owned by OrcReader OrcReader::Statistics* _statistics = nullptr; const io::IOContext* _io_ctx = nullptr; RuntimeProfile* _profile = nullptr; }; - } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index 6c4e4983c70a43f..1a3ae2f885e4a06 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -412,7 +412,7 @@ Status ParquetReader::set_fill_columns( visit_slot(child.get()); } } else if (VInPredicate* in_predicate = typeid_cast(filter_impl)) { - if (in_predicate->children().size() > 0) { + if (in_predicate->get_num_children() > 0) { visit_slot(in_predicate->children()[0].get()); } } else { diff --git a/be/src/vec/exec/format/table/paimon_jni_reader.cpp b/be/src/vec/exec/format/table/paimon_jni_reader.cpp index a9ec243cf460b88..30358eace1aae2b 100644 --- a/be/src/vec/exec/format/table/paimon_jni_reader.cpp +++ b/be/src/vec/exec/format/table/paimon_jni_reader.cpp @@ -61,6 +61,9 @@ PaimonJniReader::PaimonJniReader(const std::vector& file_slot_d std::to_string(range.table_format_params.paimon_params.last_update_time); params["required_fields"] = join(column_names, ","); params["columns_types"] = join(column_types, "#"); + if (range.table_format_params.paimon_params.__isset.paimon_table) { + params["paimon_table"] = range.table_format_params.paimon_params.paimon_table; + } // Used to create paimon option for (auto& kv : range.table_format_params.paimon_params.paimon_options) { diff --git a/be/src/vec/exec/jni_connector.cpp b/be/src/vec/exec/jni_connector.cpp index f06524944ffc510..a7b0d5144ee623f 100644 --- a/be/src/vec/exec/jni_connector.cpp +++ b/be/src/vec/exec/jni_connector.cpp @@ -80,16 +80,13 @@ Status JniConnector::open(RuntimeState* state, RuntimeProfile* profile) { batch_size = _state->batch_size(); } RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); - if (env == nullptr) { - return Status::InternalError("Failed to get/create JVM"); - } SCOPED_TIMER(_open_scanner_time); _scanner_params.emplace("time_zone", _state->timezone()); RETURN_IF_ERROR(_init_jni_scanner(env, batch_size)); // Call org.apache.doris.common.jni.JniScanner#open env->CallVoidMethod(_jni_scanner_obj, _jni_scanner_open); - _scanner_opened = true; RETURN_ERROR_IF_EXC(env); + _scanner_opened = true; return Status::OK(); } diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index d59aebd98c73411..fae83854be09109 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -169,8 +169,7 @@ Status NewEsScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eo } Status NewEsScanner::_get_next(std::vector& columns) { - auto read_timer = _local_state->cast()._read_timer; - SCOPED_TIMER(read_timer); + SCOPED_TIMER(_local_state->cast()._read_timer); if (_line_eof && _batch_eof) { _es_eof = true; return Status::OK(); @@ -185,12 +184,8 @@ Status NewEsScanner::_get_next(std::vector& column } } - auto rows_read_counter = - _local_state->cast()._rows_read_counter; - auto materialize_timer = - _local_state->cast()._materialize_timer; - COUNTER_UPDATE(rows_read_counter, 1); - SCOPED_TIMER(materialize_timer); + COUNTER_UPDATE(_local_state->cast()._blocks_read_counter, 1); + SCOPED_TIMER(_local_state->cast()._materialize_timer); RETURN_IF_ERROR(_es_scroll_parser->fill_columns(_tuple_desc, columns, &_line_eof, _docvalue_context, _state->timezone_obj())); if (!_line_eof) { diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.cpp b/be/src/vec/exec/scan/new_jdbc_scanner.cpp index a23e83e2426c078..7eaa9ab3eab7881 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.cpp +++ b/be/src/vec/exec/scan/new_jdbc_scanner.cpp @@ -89,7 +89,6 @@ Status NewJdbcScanner::prepare(RuntimeState* state, const VExprContextSPtrs& con _jdbc_param.connection_pool_max_life_time = jdbc_table->connection_pool_max_life_time(); _jdbc_param.connection_pool_max_wait_time = jdbc_table->connection_pool_max_wait_time(); _jdbc_param.connection_pool_keep_alive = jdbc_table->connection_pool_keep_alive(); - _jdbc_param.enable_connection_pool = jdbc_table->enable_connection_pool(); _local_state->scanner_profile()->add_info_string("JdbcDriverClass", _jdbc_param.driver_class); _local_state->scanner_profile()->add_info_string("JdbcDriverUrl", _jdbc_param.driver_path); diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 602406186553222..f40f30f5b16e67e 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -226,8 +226,7 @@ Status NewOlapScanner::init() { Status NewOlapScanner::open(RuntimeState* state) { RETURN_IF_ERROR(VScanner::open(state)); - auto* timer = ((pipeline::OlapScanLocalState*)_local_state)->_reader_init_timer; - SCOPED_TIMER(timer); + SCOPED_TIMER(_local_state->cast()._reader_init_timer); auto res = _tablet_reader->init(_tablet_reader_params); if (!res.ok()) { @@ -543,11 +542,9 @@ void NewOlapScanner::_update_realtime_counters() { const OlapReaderStatistics& stats = _tablet_reader->stats(); COUNTER_UPDATE(local_state->_read_compressed_counter, stats.compressed_bytes_read); COUNTER_UPDATE(local_state->_scan_bytes, stats.compressed_bytes_read); - _scan_bytes += stats.compressed_bytes_read; _tablet_reader->mutable_stats()->compressed_bytes_read = 0; COUNTER_UPDATE(local_state->_scan_rows, stats.raw_rows_read); - _scan_rows += stats.raw_rows_read; // if raw_rows_read is reset, scanNode will scan all table rows which may cause BE crash _tablet_reader->mutable_stats()->raw_rows_read = 0; } @@ -562,97 +559,92 @@ void NewOlapScanner::_collect_profile_before_close() { VScanner::_collect_profile_before_close(); #ifndef INCR_COUNTER -#define INCR_COUNTER(Parent) \ - COUNTER_UPDATE(Parent->_io_timer, stats.io_ns); \ - COUNTER_UPDATE(Parent->_read_compressed_counter, stats.compressed_bytes_read); \ - COUNTER_UPDATE(Parent->_scan_bytes, stats.compressed_bytes_read); \ - _scan_bytes += stats.compressed_bytes_read; \ - COUNTER_UPDATE(Parent->_decompressor_timer, stats.decompress_ns); \ - COUNTER_UPDATE(Parent->_read_uncompressed_counter, stats.uncompressed_bytes_read); \ - COUNTER_UPDATE(Parent->_block_load_timer, stats.block_load_ns); \ - COUNTER_UPDATE(Parent->_block_load_counter, stats.blocks_load); \ - COUNTER_UPDATE(Parent->_block_fetch_timer, stats.block_fetch_ns); \ - COUNTER_UPDATE(Parent->_delete_bitmap_get_agg_timer, stats.delete_bitmap_get_agg_ns); \ - COUNTER_UPDATE(Parent->_block_convert_timer, stats.block_convert_ns); \ - COUNTER_UPDATE(Parent->_scan_rows, stats.raw_rows_read); \ - _scan_rows += _tablet_reader->mutable_stats()->raw_rows_read; \ - COUNTER_UPDATE(Parent->_vec_cond_timer, stats.vec_cond_ns); \ - COUNTER_UPDATE(Parent->_short_cond_timer, stats.short_cond_ns); \ - COUNTER_UPDATE(Parent->_expr_filter_timer, stats.expr_filter_ns); \ - COUNTER_UPDATE(Parent->_block_init_timer, stats.block_init_ns); \ - COUNTER_UPDATE(Parent->_block_init_seek_timer, stats.block_init_seek_ns); \ - COUNTER_UPDATE(Parent->_block_init_seek_counter, stats.block_init_seek_num); \ - COUNTER_UPDATE(Parent->_block_conditions_filtered_timer, stats.block_conditions_filtered_ns); \ - COUNTER_UPDATE(Parent->_block_conditions_filtered_bf_timer, \ - stats.block_conditions_filtered_bf_ns); \ - COUNTER_UPDATE(Parent->_collect_iterator_merge_next_timer, \ - stats.collect_iterator_merge_next_timer); \ - COUNTER_UPDATE(Parent->_block_conditions_filtered_zonemap_timer, \ - stats.block_conditions_filtered_zonemap_ns); \ - COUNTER_UPDATE(Parent->_block_conditions_filtered_zonemap_rp_timer, \ - stats.block_conditions_filtered_zonemap_rp_ns); \ - COUNTER_UPDATE(Parent->_block_conditions_filtered_dict_timer, \ - stats.block_conditions_filtered_dict_ns); \ - COUNTER_UPDATE(Parent->_first_read_timer, stats.first_read_ns); \ - COUNTER_UPDATE(Parent->_second_read_timer, stats.second_read_ns); \ - COUNTER_UPDATE(Parent->_first_read_seek_timer, stats.block_first_read_seek_ns); \ - COUNTER_UPDATE(Parent->_first_read_seek_counter, stats.block_first_read_seek_num); \ - COUNTER_UPDATE(Parent->_lazy_read_timer, stats.lazy_read_ns); \ - COUNTER_UPDATE(Parent->_lazy_read_seek_timer, stats.block_lazy_read_seek_ns); \ - COUNTER_UPDATE(Parent->_lazy_read_seek_counter, stats.block_lazy_read_seek_num); \ - COUNTER_UPDATE(Parent->_output_col_timer, stats.output_col_ns); \ - COUNTER_UPDATE(Parent->_rows_vec_cond_filtered_counter, stats.rows_vec_cond_filtered); \ - COUNTER_UPDATE(Parent->_rows_short_circuit_cond_filtered_counter, \ - stats.rows_short_circuit_cond_filtered); \ - COUNTER_UPDATE(Parent->_rows_vec_cond_input_counter, stats.vec_cond_input_rows); \ - COUNTER_UPDATE(Parent->_rows_short_circuit_cond_input_counter, \ - stats.short_circuit_cond_input_rows); \ - for (auto& [id, info] : stats.filter_info) { \ - Parent->add_filter_info(id, info); \ - } \ - COUNTER_UPDATE(Parent->_stats_filtered_counter, stats.rows_stats_filtered); \ - COUNTER_UPDATE(Parent->_stats_rp_filtered_counter, stats.rows_stats_rp_filtered); \ - COUNTER_UPDATE(Parent->_dict_filtered_counter, stats.rows_dict_filtered); \ - COUNTER_UPDATE(Parent->_bf_filtered_counter, stats.rows_bf_filtered); \ - COUNTER_UPDATE(Parent->_del_filtered_counter, stats.rows_del_filtered); \ - COUNTER_UPDATE(Parent->_del_filtered_counter, stats.rows_del_by_bitmap); \ - COUNTER_UPDATE(Parent->_del_filtered_counter, stats.rows_vec_del_cond_filtered); \ - COUNTER_UPDATE(Parent->_conditions_filtered_counter, stats.rows_conditions_filtered); \ - COUNTER_UPDATE(Parent->_key_range_filtered_counter, stats.rows_key_range_filtered); \ - COUNTER_UPDATE(Parent->_total_pages_num_counter, stats.total_pages_num); \ - COUNTER_UPDATE(Parent->_cached_pages_num_counter, stats.cached_pages_num); \ - COUNTER_UPDATE(Parent->_bitmap_index_filter_counter, stats.rows_bitmap_index_filtered); \ - COUNTER_UPDATE(Parent->_bitmap_index_filter_timer, stats.bitmap_index_filter_timer); \ - COUNTER_UPDATE(Parent->_inverted_index_filter_counter, stats.rows_inverted_index_filtered); \ - COUNTER_UPDATE(Parent->_inverted_index_filter_timer, stats.inverted_index_filter_timer); \ - COUNTER_UPDATE(Parent->_inverted_index_query_cache_hit_counter, \ - stats.inverted_index_query_cache_hit); \ - COUNTER_UPDATE(Parent->_inverted_index_query_cache_miss_counter, \ - stats.inverted_index_query_cache_miss); \ - COUNTER_UPDATE(Parent->_inverted_index_query_timer, stats.inverted_index_query_timer); \ - COUNTER_UPDATE(Parent->_inverted_index_query_null_bitmap_timer, \ - stats.inverted_index_query_null_bitmap_timer); \ - COUNTER_UPDATE(Parent->_inverted_index_query_bitmap_copy_timer, \ - stats.inverted_index_query_bitmap_copy_timer); \ - COUNTER_UPDATE(Parent->_inverted_index_query_bitmap_op_timer, \ - stats.inverted_index_query_bitmap_op_timer); \ - COUNTER_UPDATE(Parent->_inverted_index_searcher_open_timer, \ - stats.inverted_index_searcher_open_timer); \ - COUNTER_UPDATE(Parent->_inverted_index_searcher_search_timer, \ - stats.inverted_index_searcher_search_timer); \ - COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_hit_counter, \ - stats.inverted_index_searcher_cache_hit); \ - COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_miss_counter, \ - stats.inverted_index_searcher_cache_miss); \ - COUNTER_UPDATE(Parent->_inverted_index_downgrade_count_counter, \ - stats.inverted_index_downgrade_count); \ - if (config::enable_file_cache) { \ - io::FileCacheProfileReporter cache_profile(Parent->_segment_profile.get()); \ - cache_profile.update(&stats.file_cache_stats); \ - } \ - COUNTER_UPDATE(Parent->_output_index_result_column_timer, \ - stats.output_index_result_column_timer); \ - COUNTER_UPDATE(Parent->_filtered_segment_counter, stats.filtered_segment_number); \ +#define INCR_COUNTER(Parent) \ + COUNTER_UPDATE(Parent->_io_timer, stats.io_ns); \ + COUNTER_UPDATE(Parent->_read_compressed_counter, stats.compressed_bytes_read); \ + COUNTER_UPDATE(Parent->_scan_bytes, stats.compressed_bytes_read); \ + COUNTER_UPDATE(Parent->_decompressor_timer, stats.decompress_ns); \ + COUNTER_UPDATE(Parent->_read_uncompressed_counter, stats.uncompressed_bytes_read); \ + COUNTER_UPDATE(Parent->_block_load_timer, stats.block_load_ns); \ + COUNTER_UPDATE(Parent->_block_load_counter, stats.blocks_load); \ + COUNTER_UPDATE(Parent->_block_fetch_timer, stats.block_fetch_ns); \ + COUNTER_UPDATE(Parent->_delete_bitmap_get_agg_timer, stats.delete_bitmap_get_agg_ns); \ + COUNTER_UPDATE(Parent->_scan_rows, stats.raw_rows_read); \ + COUNTER_UPDATE(Parent->_vec_cond_timer, stats.vec_cond_ns); \ + COUNTER_UPDATE(Parent->_short_cond_timer, stats.short_cond_ns); \ + COUNTER_UPDATE(Parent->_expr_filter_timer, stats.expr_filter_ns); \ + COUNTER_UPDATE(Parent->_block_init_timer, stats.block_init_ns); \ + COUNTER_UPDATE(Parent->_block_init_seek_timer, stats.block_init_seek_ns); \ + COUNTER_UPDATE(Parent->_block_init_seek_counter, stats.block_init_seek_num); \ + COUNTER_UPDATE(Parent->_segment_generate_row_range_timer, stats.generate_row_ranges_ns); \ + COUNTER_UPDATE(Parent->_segment_generate_row_range_by_bf_timer, \ + stats.generate_row_ranges_by_bf_ns); \ + COUNTER_UPDATE(Parent->_collect_iterator_merge_next_timer, \ + stats.collect_iterator_merge_next_timer); \ + COUNTER_UPDATE(Parent->_segment_generate_row_range_by_zonemap_timer, \ + stats.generate_row_ranges_by_zonemap_ns); \ + COUNTER_UPDATE(Parent->_segment_generate_row_range_by_dict_timer, \ + stats.generate_row_ranges_by_dict_ns); \ + COUNTER_UPDATE(Parent->_predicate_column_read_timer, stats.predicate_column_read_ns); \ + COUNTER_UPDATE(Parent->_non_predicate_column_read_timer, stats.non_predicate_read_ns); \ + COUNTER_UPDATE(Parent->_predicate_column_read_seek_timer, \ + stats.predicate_column_read_seek_ns); \ + COUNTER_UPDATE(Parent->_predicate_column_read_seek_counter, \ + stats.predicate_column_read_seek_num); \ + COUNTER_UPDATE(Parent->_lazy_read_timer, stats.lazy_read_ns); \ + COUNTER_UPDATE(Parent->_lazy_read_seek_timer, stats.block_lazy_read_seek_ns); \ + COUNTER_UPDATE(Parent->_lazy_read_seek_counter, stats.block_lazy_read_seek_num); \ + COUNTER_UPDATE(Parent->_output_col_timer, stats.output_col_ns); \ + COUNTER_UPDATE(Parent->_rows_vec_cond_filtered_counter, stats.rows_vec_cond_filtered); \ + COUNTER_UPDATE(Parent->_rows_short_circuit_cond_filtered_counter, \ + stats.rows_short_circuit_cond_filtered); \ + COUNTER_UPDATE(Parent->_rows_vec_cond_input_counter, stats.vec_cond_input_rows); \ + COUNTER_UPDATE(Parent->_rows_short_circuit_cond_input_counter, \ + stats.short_circuit_cond_input_rows); \ + for (auto& [id, info] : stats.filter_info) { \ + Parent->add_filter_info(id, info); \ + } \ + COUNTER_UPDATE(Parent->_stats_filtered_counter, stats.rows_stats_filtered); \ + COUNTER_UPDATE(Parent->_stats_rp_filtered_counter, stats.rows_stats_rp_filtered); \ + COUNTER_UPDATE(Parent->_dict_filtered_counter, stats.rows_dict_filtered); \ + COUNTER_UPDATE(Parent->_bf_filtered_counter, stats.rows_bf_filtered); \ + COUNTER_UPDATE(Parent->_del_filtered_counter, stats.rows_del_filtered); \ + COUNTER_UPDATE(Parent->_del_filtered_counter, stats.rows_del_by_bitmap); \ + COUNTER_UPDATE(Parent->_del_filtered_counter, stats.rows_vec_del_cond_filtered); \ + COUNTER_UPDATE(Parent->_conditions_filtered_counter, stats.rows_conditions_filtered); \ + COUNTER_UPDATE(Parent->_key_range_filtered_counter, stats.rows_key_range_filtered); \ + COUNTER_UPDATE(Parent->_total_pages_num_counter, stats.total_pages_num); \ + COUNTER_UPDATE(Parent->_cached_pages_num_counter, stats.cached_pages_num); \ + COUNTER_UPDATE(Parent->_bitmap_index_filter_counter, stats.rows_bitmap_index_filtered); \ + COUNTER_UPDATE(Parent->_bitmap_index_filter_timer, stats.bitmap_index_filter_timer); \ + COUNTER_UPDATE(Parent->_inverted_index_filter_counter, stats.rows_inverted_index_filtered); \ + COUNTER_UPDATE(Parent->_inverted_index_filter_timer, stats.inverted_index_filter_timer); \ + COUNTER_UPDATE(Parent->_inverted_index_query_cache_hit_counter, \ + stats.inverted_index_query_cache_hit); \ + COUNTER_UPDATE(Parent->_inverted_index_query_cache_miss_counter, \ + stats.inverted_index_query_cache_miss); \ + COUNTER_UPDATE(Parent->_inverted_index_query_timer, stats.inverted_index_query_timer); \ + COUNTER_UPDATE(Parent->_inverted_index_query_null_bitmap_timer, \ + stats.inverted_index_query_null_bitmap_timer); \ + COUNTER_UPDATE(Parent->_inverted_index_query_bitmap_copy_timer, \ + stats.inverted_index_query_bitmap_copy_timer); \ + COUNTER_UPDATE(Parent->_inverted_index_searcher_open_timer, \ + stats.inverted_index_searcher_open_timer); \ + COUNTER_UPDATE(Parent->_inverted_index_searcher_search_timer, \ + stats.inverted_index_searcher_search_timer); \ + COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_hit_counter, \ + stats.inverted_index_searcher_cache_hit); \ + COUNTER_UPDATE(Parent->_inverted_index_searcher_cache_miss_counter, \ + stats.inverted_index_searcher_cache_miss); \ + COUNTER_UPDATE(Parent->_inverted_index_downgrade_count_counter, \ + stats.inverted_index_downgrade_count); \ + if (config::enable_file_cache) { \ + io::FileCacheProfileReporter cache_profile(Parent->_segment_profile.get()); \ + cache_profile.update(&stats.file_cache_stats); \ + } \ + COUNTER_UPDATE(Parent->_output_index_result_column_timer, \ + stats.output_index_result_column_timer); \ + COUNTER_UPDATE(Parent->_filtered_segment_counter, stats.filtered_segment_number); \ COUNTER_UPDATE(Parent->_total_segment_counter, stats.total_segment_number); // Update counters for NewOlapScanner @@ -665,11 +657,12 @@ void NewOlapScanner::_collect_profile_before_close() { #undef INCR_COUNTER #endif // Update metrics - DorisMetrics::instance()->query_scan_bytes->increment(_scan_bytes); - DorisMetrics::instance()->query_scan_rows->increment(_scan_rows); + DorisMetrics::instance()->query_scan_bytes->increment( + local_state->_read_compressed_counter->value()); + DorisMetrics::instance()->query_scan_rows->increment(local_state->_scan_rows->value()); auto& tablet = _tablet_reader_params.tablet; - tablet->query_scan_bytes->increment(_scan_bytes); - tablet->query_scan_rows->increment(_scan_rows); + tablet->query_scan_bytes->increment(local_state->_read_compressed_counter->value()); + tablet->query_scan_rows->increment(local_state->_scan_rows->value()); tablet->query_scan_count->increment(1); if (_query_statistics) { _query_statistics->add_scan_bytes_from_local_storage( diff --git a/be/src/vec/exec/scan/new_olap_scanner.h b/be/src/vec/exec/scan/new_olap_scanner.h index 44c300f446e6ea8..fd1246b120ba770 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.h +++ b/be/src/vec/exec/scan/new_olap_scanner.h @@ -101,8 +101,6 @@ class NewOlapScanner : public VScanner { std::unordered_set _tablet_columns_convert_to_null_set; // ========= profiles ========== - int64_t _scan_bytes = 0; - int64_t _scan_rows = 0; bool _profile_updated = false; }; } // namespace vectorized diff --git a/be/src/vec/exec/scan/scanner_context.cpp b/be/src/vec/exec/scan/scanner_context.cpp index ee1d60d29024244..bea222bd0f35b00 100644 --- a/be/src/vec/exec/scan/scanner_context.cpp +++ b/be/src/vec/exec/scan/scanner_context.cpp @@ -80,8 +80,6 @@ Status ScannerContext::init() { _scanner_profile = _local_state->_scanner_profile; _scanner_sched_counter = _local_state->_scanner_sched_counter; _newly_create_free_blocks_num = _local_state->_newly_create_free_blocks_num; - _scanner_wait_batch_timer = _local_state->_scanner_wait_batch_timer; - _scanner_ctx_sched_time = _local_state->_scanner_ctx_sched_time; _scale_up_scanners_counter = _local_state->_scale_up_scanners_counter; _scanner_memory_used_counter = _local_state->_memory_used_counter; @@ -224,10 +222,6 @@ Status ScannerContext::init() { return Status::OK(); } -std::string ScannerContext::parent_name() { - return _local_state->get_name(); -} - vectorized::BlockUPtr ScannerContext::get_free_block(bool force) { vectorized::BlockUPtr block = nullptr; if (_free_blocks.try_dequeue(block)) { @@ -257,18 +251,13 @@ void ScannerContext::return_free_block(vectorized::BlockUPtr block) { } } -bool ScannerContext::empty_in_queue(int id) { - std::lock_guard l(_transfer_lock); - return _blocks_queue.empty(); -} - Status ScannerContext::submit_scan_task(std::shared_ptr scan_task) { _scanner_sched_counter->update(1); _num_scheduled_scanners++; return _scanner_scheduler_global->submit(shared_from_this(), scan_task); } -void ScannerContext::append_block_to_queue(std::shared_ptr scan_task) { +void ScannerContext::push_back_scan_task(std::shared_ptr scan_task) { if (scan_task->status_ok()) { for (const auto& [block, _] : scan_task->cached_blocks) { if (block->rows() > 0) { @@ -287,12 +276,12 @@ void ScannerContext::append_block_to_queue(std::shared_ptr scan_task) if (_last_scale_up_time == 0) { _last_scale_up_time = UnixMillis(); } - if (_blocks_queue.empty() && _last_fetch_time != 0) { + if (_tasks_queue.empty() && _last_fetch_time != 0) { // there's no block in queue before current block, so the consumer is waiting _total_wait_block_time += UnixMillis() - _last_fetch_time; } _num_scheduled_scanners--; - _blocks_queue.emplace_back(scan_task); + _tasks_queue.emplace_back(scan_task); _dependency->set_ready(); } @@ -308,9 +297,9 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo _set_scanner_done(); return _process_status; } - if (!_blocks_queue.empty() && !done()) { + if (!_tasks_queue.empty() && !done()) { _last_fetch_time = UnixMillis(); - auto scan_task = _blocks_queue.front(); + auto scan_task = _tasks_queue.front(); DCHECK(scan_task); // The abnormal status of scanner may come from the execution of the scanner itself, @@ -335,7 +324,7 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo return_free_block(std::move(current_block)); } else { // This scan task do not have any cached blocks. - _blocks_queue.pop_front(); + _tasks_queue.pop_front(); // current scanner is finished, and no more data to read if (scan_task->is_eos()) { _num_finished_scanners++; @@ -374,13 +363,13 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo RETURN_IF_ERROR(_try_to_scale_up()); } - if (_num_finished_scanners == _all_scanners.size() && _blocks_queue.empty()) { + if (_num_finished_scanners == _all_scanners.size() && _tasks_queue.empty()) { _set_scanner_done(); _is_finished = true; } *eos = done(); - if (_blocks_queue.empty()) { + if (_tasks_queue.empty()) { _dependency->block(); } return Status::OK(); @@ -466,11 +455,6 @@ Status ScannerContext::validate_block_schema(Block* block) { return Status::OK(); } -void ScannerContext::set_status_on_error(const Status& status) { - std::lock_guard l(_transfer_lock); - _process_status = status; -} - void ScannerContext::stop_scanners(RuntimeState* state) { std::lock_guard l(_transfer_lock); if (_should_stop) { @@ -483,7 +467,7 @@ void ScannerContext::stop_scanners(RuntimeState* state) { sc->_scanner->try_stop(); } } - _blocks_queue.clear(); + _tasks_queue.clear(); // TODO yiguolei, call mark close to scanners if (state->enable_profile()) { std::stringstream scanner_statistics; @@ -533,11 +517,11 @@ void ScannerContext::stop_scanners(RuntimeState* state) { std::string ScannerContext::debug_string() { return fmt::format( - "id: {}, total scanners: {}, blocks in queue: {}," + "id: {}, total scanners: {}, pending tasks: {}," " _should_stop: {}, _is_finished: {}, free blocks: {}," " limit: {}, _num_running_scanners: {}, _max_thread_num: {}," " _max_bytes_in_queue: {}, query_id: {}", - ctx_id, _all_scanners.size(), _blocks_queue.size(), _should_stop, _is_finished, + ctx_id, _all_scanners.size(), _tasks_queue.size(), _should_stop, _is_finished, _free_blocks.size_approx(), limit, _num_scheduled_scanners, _max_thread_num, _max_bytes_in_queue, print_id(_query_id)); } diff --git a/be/src/vec/exec/scan/scanner_context.h b/be/src/vec/exec/scan/scanner_context.h index 85669765df89ef0..c70313c98bca65a 100644 --- a/be/src/vec/exec/scan/scanner_context.h +++ b/be/src/vec/exec/scan/scanner_context.h @@ -75,7 +75,6 @@ class ScanTask { public: std::weak_ptr scanner; std::list> cached_blocks; - uint64_t last_submit_time; // nanoseconds void set_status(Status _status) { if (_status.is()) { @@ -112,7 +111,7 @@ class ScannerContext : public std::enable_shared_from_this, ~ScannerContext() override { SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_query_thread_context.query_mem_tracker); - _blocks_queue.clear(); + _tasks_queue.clear(); vectorized::BlockUPtr block; while (_free_blocks.try_dequeue(block)) { // do nothing @@ -143,37 +142,25 @@ class ScannerContext : public std::enable_shared_from_this, // set the `eos` to `ScanTask::eos` if there is no more data in current scanner Status submit_scan_task(std::shared_ptr scan_task); - // append the running scanner and its cached block to `_blocks_queue` - void append_block_to_queue(std::shared_ptr scan_task); - - void set_status_on_error(const Status& status); + // Push back a scan task. + void push_back_scan_task(std::shared_ptr scan_task); // Return true if this ScannerContext need no more process bool done() const { return _is_finished || _should_stop; } - bool is_finished() { return _is_finished.load(); } - bool should_stop() { return _should_stop.load(); } std::string debug_string(); RuntimeState* state() { return _state; } - void incr_ctx_scheduling_time(int64_t num) { _scanner_ctx_sched_time->update(num); } - std::string parent_name(); - - bool empty_in_queue(int id); SimplifiedScanScheduler* get_scan_scheduler() { return _scanner_scheduler; } void stop_scanners(RuntimeState* state); - int32_t get_max_thread_num() const { return _max_thread_num; } - void set_max_thread_num(int32_t num) { _max_thread_num = num; } - int batch_size() const { return _batch_size; } // the unique id of this context std::string ctx_id; TUniqueId _query_id; - int32_t queue_idx = -1; ThreadPoolToken* thread_token = nullptr; bool _should_reset_thread_name = true; @@ -195,7 +182,7 @@ class ScannerContext : public std::enable_shared_from_this, const RowDescriptor* _output_row_descriptor = nullptr; std::mutex _transfer_lock; - std::list> _blocks_queue; + std::list> _tasks_queue; Status _process_status = Status::OK(); std::atomic_bool _should_stop = false; @@ -223,8 +210,6 @@ class ScannerContext : public std::enable_shared_from_this, // This counter refers to scan operator's local state RuntimeProfile::Counter* _scanner_memory_used_counter = nullptr; RuntimeProfile::Counter* _newly_create_free_blocks_num = nullptr; - RuntimeProfile::Counter* _scanner_wait_batch_timer = nullptr; - RuntimeProfile::Counter* _scanner_ctx_sched_time = nullptr; RuntimeProfile::Counter* _scale_up_scanners_counter = nullptr; QueryThreadContext _query_thread_context; std::shared_ptr _dependency = nullptr; diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index 3ad4e758e799809..385b581d2a57250 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -123,7 +123,6 @@ Status ScannerScheduler::init(ExecEnv* env) { Status ScannerScheduler::submit(std::shared_ptr ctx, std::shared_ptr scan_task) { - scan_task->last_submit_time = GetCurrentTimeNanos(); if (ctx->done()) { return Status::OK(); } @@ -154,7 +153,7 @@ Status ScannerScheduler::submit(std::shared_ptr ctx, if (!status.ok()) { scanner_ref->set_status(status); - ctx->append_block_to_queue(scanner_ref); + ctx->push_back_scan_task(scanner_ref); } }); if (!s.ok()) { @@ -184,7 +183,7 @@ Status ScannerScheduler::submit(std::shared_ptr ctx, if (!status.ok()) { scanner_ref->set_status(status); - ctx->append_block_to_queue(scanner_ref); + ctx->push_back_scan_task(scanner_ref); } }; SimplifiedScanTask simple_scan_task = {work_func, ctx}; @@ -212,8 +211,6 @@ std::unique_ptr ScannerScheduler::new_limited_scan_pool_token( void ScannerScheduler::_scanner_scan(std::shared_ptr ctx, std::shared_ptr scan_task) { - // record the time from scanner submission to actual execution in nanoseconds - ctx->incr_ctx_scheduling_time(GetCurrentTimeNanos() - scan_task->last_submit_time); auto task_lock = ctx->task_exec_ctx(); if (task_lock == nullptr) { return; @@ -343,7 +340,7 @@ void ScannerScheduler::_scanner_scan(std::shared_ptr ctx, scanner->mark_to_need_to_close(); } scan_task->set_eos(eos); - ctx->append_block_to_queue(scan_task); + ctx->push_back_scan_task(scan_task); } void ScannerScheduler::_register_metrics() { diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index c3f4d12f9dc12be..9353887799207dd 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -126,8 +126,6 @@ Status VFileScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conju _open_reader_timer = ADD_TIMER(_local_state->scanner_profile(), "FileScannerOpenReaderTime"); _cast_to_input_block_timer = ADD_TIMER(_local_state->scanner_profile(), "FileScannerCastInputBlockTime"); - _fill_path_columns_timer = - ADD_TIMER(_local_state->scanner_profile(), "FileScannerFillPathColumnTime"); _fill_missing_columns_timer = ADD_TIMER(_local_state->scanner_profile(), "FileScannerFillMissingColumnTime"); _pre_filter_timer = ADD_TIMER(_local_state->scanner_profile(), "FileScannerPreFilterTimer"); @@ -137,8 +135,6 @@ Status VFileScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conju _not_found_file_counter = ADD_COUNTER(_local_state->scanner_profile(), "NotFoundFileNum", TUnit::UNIT); _file_counter = ADD_COUNTER(_local_state->scanner_profile(), "FileNumber", TUnit::UNIT); - _has_fully_rf_file_counter = - ADD_COUNTER(_local_state->scanner_profile(), "HasFullyRfFileNumber", TUnit::UNIT); _file_cache_statistics.reset(new io::FileCacheStatistics()); _io_ctx.reset(new io::IOContext()); @@ -219,7 +215,7 @@ Status VFileScanner::_process_late_arrival_conjuncts() { _discard_conjuncts(); } if (_applied_rf_num == _total_rf_num) { - COUNTER_UPDATE(_has_fully_rf_file_counter, 1); + _local_state->scanner_profile()->add_info_string("ApplyAllRuntimeFilters", "True"); } return Status::OK(); } @@ -432,7 +428,7 @@ Status VFileScanner::_cast_to_input_block(Block* block) { } SCOPED_TIMER(_cast_to_input_block_timer); // cast primitive type(PT0) to primitive type(PT1) - size_t idx = 0; + uint32_t idx = 0; for (auto& slot_desc : _input_tuple_desc->slots()) { if (_name_to_col_type.find(slot_desc->col_name()) == _name_to_col_type.end()) { // skip columns which does not exist in file diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index 750a1371d7ec295..86171d634ac693f 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -180,14 +180,12 @@ class VFileScanner : public VScanner { RuntimeProfile::Counter* _get_block_timer = nullptr; RuntimeProfile::Counter* _open_reader_timer = nullptr; RuntimeProfile::Counter* _cast_to_input_block_timer = nullptr; - RuntimeProfile::Counter* _fill_path_columns_timer = nullptr; RuntimeProfile::Counter* _fill_missing_columns_timer = nullptr; RuntimeProfile::Counter* _pre_filter_timer = nullptr; RuntimeProfile::Counter* _convert_to_output_block_timer = nullptr; RuntimeProfile::Counter* _empty_file_counter = nullptr; RuntimeProfile::Counter* _not_found_file_counter = nullptr; RuntimeProfile::Counter* _file_counter = nullptr; - RuntimeProfile::Counter* _has_fully_rf_file_counter = nullptr; const std::unordered_map* _col_name_to_slot_id = nullptr; // single slot filter conjuncts @@ -216,7 +214,6 @@ class VFileScanner : public VScanner { Status _truncate_char_or_varchar_columns(Block* block); void _truncate_char_or_varchar_column(Block* block, int idx, int len); Status _generate_fill_columns(); - Status _handle_dynamic_block(Block* block); Status _process_conjuncts_for_dict_filter(); Status _process_late_arrival_conjuncts(); void _get_slot_ids(VExpr* expr, std::vector* slot_ids); diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index ae255f85a7f6047..97bf563db1fa580 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -113,8 +113,7 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) { // 1. Get input block from scanner { // get block time - auto* timer = _local_state->_scan_timer; - SCOPED_TIMER(timer); + SCOPED_TIMER(_local_state->_scan_timer); RETURN_IF_ERROR(_get_block_impl(state, block, eof)); if (*eof) { DCHECK(block->rows() == 0); @@ -128,8 +127,7 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) { // 2. Filter the output block finally. { - auto* timer = _local_state->_filter_timer; - SCOPED_TIMER(timer); + SCOPED_TIMER(_local_state->_filter_timer); RETURN_IF_ERROR(_filter_output_block(block)); } // record rows return (after filter) for _limit check diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp index 98acb43bcd47ee2..28baac567cc5d14 100644 --- a/be/src/vec/exec/vjdbc_connector.cpp +++ b/be/src/vec/exec/vjdbc_connector.cpp @@ -95,26 +95,23 @@ Status JdbcConnector::open(RuntimeState* state, bool read) { RETURN_IF_ERROR(JniUtil::get_jni_scanner_class(env, JDBC_EXECUTOR_FACTORY_CLASS, &_executor_factory_clazz)); - _executor_factory_ctor_id = - env->GetStaticMethodID(_executor_factory_clazz, "getExecutorClass", - "(Lorg/apache/doris/thrift/TOdbcTableType;)Ljava/lang/String;"); - if (_executor_factory_ctor_id == nullptr) { - return Status::InternalError("Failed to find method ID for getExecutorClass"); - } + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _executor_factory_ctor_id, env, + GetStaticMethodID(_executor_factory_clazz, "getExecutorClass", + "(Lorg/apache/doris/thrift/TOdbcTableType;)Ljava/lang/String;")); jobject jtable_type = _get_java_table_type(env, _conn_param.table_type); - jstring executor_name = (jstring)env->CallStaticObjectMethod( - _executor_factory_clazz, _executor_factory_ctor_id, jtable_type); - if (executor_name == nullptr) { - return Status::InternalError("getExecutorClass returned null"); - } - const char* executor_name_str = env->GetStringUTFChars(executor_name, nullptr); + JNI_CALL_METHOD_CHECK_EXCEPTION_DELETE_REF( + jobject, executor_name, env, + CallStaticObjectMethod(_executor_factory_clazz, _executor_factory_ctor_id, + jtable_type)); + + const char* executor_name_str = env->GetStringUTFChars((jstring)executor_name, nullptr); RETURN_IF_ERROR(JniUtil::get_jni_scanner_class(env, executor_name_str, &_executor_clazz)); env->DeleteLocalRef(jtable_type); - env->ReleaseStringUTFChars(executor_name, executor_name_str); - env->DeleteLocalRef(executor_name); + env->ReleaseStringUTFChars((jstring)executor_name, executor_name_str); #undef GET_BASIC_JAVA_CLAZZ RETURN_IF_ERROR(_register_func_id(env)); @@ -155,7 +152,6 @@ Status JdbcConnector::open(RuntimeState* state, bool read) { } ctor_params.__set_op(read ? TJdbcOperation::READ : TJdbcOperation::WRITE); ctor_params.__set_table_type(_conn_param.table_type); - ctor_params.__set_enable_connection_pool(_conn_param.enable_connection_pool); ctor_params.__set_connection_pool_min_size(_conn_param.connection_pool_min_size); ctor_params.__set_connection_pool_max_size(_conn_param.connection_pool_max_size); ctor_params.__set_connection_pool_max_wait_time(_conn_param.connection_pool_max_wait_time); @@ -191,14 +187,19 @@ Status JdbcConnector::test_connection() { RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); env->CallNonvirtualVoidMethod(_executor_obj, _executor_clazz, _executor_test_connection_id); - return JniUtil::GetJniExceptionMsg(env); + RETURN_ERROR_IF_EXC(env); + return Status::OK(); } Status JdbcConnector::clean_datasource() { + if (!_is_open) { + return Status::OK(); + } JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); env->CallNonvirtualVoidMethod(_executor_obj, _executor_clazz, _executor_clean_datasource_id); - return JniUtil::GetJniExceptionMsg(env); + RETURN_ERROR_IF_EXC(env); + return Status::OK(); } Status JdbcConnector::query() { @@ -258,8 +259,8 @@ Status JdbcConnector::get_next(bool* eos, Block* block, int batch_size) { RETURN_IF_ERROR(JniUtil::GetJniExceptionMsg(env)); env->DeleteLocalRef(map); - std::vector all_columns; - for (size_t i = 0; i < column_size; ++i) { + std::vector all_columns; + for (uint32_t i = 0; i < column_size; ++i) { all_columns.push_back(i); } SCOPED_RAW_TIMER(&_jdbc_statistic._fill_block_timer); @@ -306,7 +307,7 @@ Status JdbcConnector::exec_stmt_write(Block* block, const VExprContextSPtrs& out env->CallNonvirtualIntMethod(_executor_obj, _executor_clazz, _executor_stmt_write_id, hashmap_object); env->DeleteLocalRef(hashmap_object); - RETURN_IF_ERROR(JniUtil::GetJniExceptionMsg(env)); + RETURN_ERROR_IF_EXC(env); *num_rows_sent = block->rows(); return Status::OK(); } @@ -316,7 +317,7 @@ Status JdbcConnector::begin_trans() { JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); env->CallNonvirtualVoidMethod(_executor_obj, _executor_clazz, _executor_begin_trans_id); - RETURN_IF_ERROR(JniUtil::GetJniExceptionMsg(env)); + RETURN_ERROR_IF_EXC(env); _is_in_transaction = true; } return Status::OK(); @@ -329,7 +330,8 @@ Status JdbcConnector::abort_trans() { JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); env->CallNonvirtualVoidMethod(_executor_obj, _executor_clazz, _executor_abort_trans_id); - return JniUtil::GetJniExceptionMsg(env); + RETURN_ERROR_IF_EXC(env); + return Status::OK(); } Status JdbcConnector::finish_trans() { @@ -337,7 +339,7 @@ Status JdbcConnector::finish_trans() { JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); env->CallNonvirtualVoidMethod(_executor_obj, _executor_clazz, _executor_finish_trans_id); - RETURN_IF_ERROR(JniUtil::GetJniExceptionMsg(env)); + RETURN_ERROR_IF_EXC(env); _is_in_transaction = false; } return Status::OK(); diff --git a/be/src/vec/exec/vjdbc_connector.h b/be/src/vec/exec/vjdbc_connector.h index 066a95de554444b..954b0abfa78f0ce 100644 --- a/be/src/vec/exec/vjdbc_connector.h +++ b/be/src/vec/exec/vjdbc_connector.h @@ -61,7 +61,6 @@ struct JdbcConnectorParam { int32_t connection_pool_max_wait_time = -1; int32_t connection_pool_max_life_time = -1; bool connection_pool_keep_alive = false; - bool enable_connection_pool; const TupleDescriptor* tuple_desc = nullptr; }; diff --git a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp index 0a77ebd673790a9..f698df0c52d92ce 100644 --- a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include "common/status.h" #include "vec/aggregate_functions/aggregate_function.h" @@ -36,16 +35,11 @@ #include "vec/data_types/data_type.h" #include "vec/exprs/lambda_function/lambda_function.h" #include "vec/exprs/lambda_function/lambda_function_factory.h" -#include "vec/exprs/vexpr.h" #include "vec/utils/util.hpp" -namespace doris { -namespace vectorized { -class VExprContext; -} // namespace vectorized -} // namespace doris - namespace doris::vectorized { +#include "common/compile_check_begin.h" +class VExprContext; class ArrayFilterFunction : public LambdaFunction { ENABLE_FACTORY_CREATOR(ArrayFilterFunction); @@ -78,7 +72,7 @@ class ArrayFilterFunction : public LambdaFunction { auto second_column = block->get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - int input_rows = first_column->size(); + auto input_rows = first_column->size(); auto first_outside_null_map = ColumnUInt8::create(input_rows, 0); auto first_arg_column = first_column; if (first_arg_column->is_nullable()) { @@ -89,7 +83,7 @@ class ArrayFilterFunction : public LambdaFunction { VectorizedUtils::update_null_map(first_outside_null_map->get_data(), column_array_nullmap.get_data()); } - const ColumnArray& first_col_array = assert_cast(*first_arg_column); + const auto& first_col_array = assert_cast(*first_arg_column); const auto& first_off_data = assert_cast(first_col_array.get_offsets_column()) .get_data(); @@ -113,7 +107,7 @@ class ArrayFilterFunction : public LambdaFunction { VectorizedUtils::update_null_map(second_outside_null_map->get_data(), column_array_nullmap.get_data()); } - const ColumnArray& second_col_array = assert_cast(*second_arg_column); + const auto& second_col_array = assert_cast(*second_arg_column); const auto& second_off_data = assert_cast( second_col_array.get_offsets_column()) .get_data(); @@ -180,4 +174,6 @@ class ArrayFilterFunction : public LambdaFunction { void register_function_array_filter(doris::vectorized::LambdaFunctionFactory& factory) { factory.register_function(); } + +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/lambda_function/varray_map_function.cpp b/be/src/vec/exprs/lambda_function/varray_map_function.cpp index 609f5dcebdac859..f80cffa166eac44 100644 --- a/be/src/vec/exprs/lambda_function/varray_map_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_map_function.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include "common/status.h" #include "vec/aggregate_functions/aggregate_function.h" @@ -36,16 +35,12 @@ #include "vec/data_types/data_type_nullable.h" #include "vec/exprs/lambda_function/lambda_function.h" #include "vec/exprs/lambda_function/lambda_function_factory.h" -#include "vec/exprs/vexpr.h" #include "vec/utils/util.hpp" -namespace doris { -namespace vectorized { -class VExprContext; -} // namespace vectorized -} // namespace doris - namespace doris::vectorized { +#include "common/compile_check_begin.h" + +class VExprContext; class ArrayMapFunction : public LambdaFunction { ENABLE_FACTORY_CREATOR(ArrayMapFunction); @@ -80,7 +75,7 @@ class ArrayMapFunction : public LambdaFunction { 0); // offset column MutableColumnPtr array_column_offset; - int nested_array_column_rows = 0; + size_t nested_array_column_rows = 0; ColumnPtr first_array_offsets = nullptr; //2. get the result column from executed expr, and the needed is nested column of array @@ -109,23 +104,23 @@ class ArrayMapFunction : public LambdaFunction { } // here is the array column - const ColumnArray& col_array = assert_cast(*column_array); + const auto& col_array = assert_cast(*column_array); const auto& col_type = assert_cast(*type_array); if (i == 0) { nested_array_column_rows = col_array.get_data_ptr()->size(); first_array_offsets = col_array.get_offsets_ptr(); - auto& off_data = assert_cast( + const auto& off_data = assert_cast( col_array.get_offsets_column()); array_column_offset = off_data.clone_resized(col_array.get_offsets_column().size()); } else { // select array_map((x,y)->x+y,c_array1,[0,1,2,3]) from array_test2; // c_array1: [0,1,2,3,4,5,6,7,8,9] - auto& array_offsets = + const auto& array_offsets = assert_cast(*first_array_offsets) .get_data(); if (nested_array_column_rows != col_array.get_data_ptr()->size() || - (array_offsets.size() > 0 && + (!array_offsets.empty() && memcmp(array_offsets.data(), col_array.get_offsets().data(), sizeof(array_offsets[0]) * array_offsets.size()) != 0)) { return Status::InvalidArgument( @@ -192,4 +187,6 @@ class ArrayMapFunction : public LambdaFunction { void register_function_array_map(doris::vectorized::LambdaFunctionFactory& factory) { factory.register_function(); } + +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/table_function/table_function_factory.cpp b/be/src/vec/exprs/table_function/table_function_factory.cpp index 0bef185351dc879..332eaed37d44838 100644 --- a/be/src/vec/exprs/table_function/table_function_factory.cpp +++ b/be/src/vec/exprs/table_function/table_function_factory.cpp @@ -33,6 +33,7 @@ #include "vec/exprs/table_function/vexplode_map.h" #include "vec/exprs/table_function/vexplode_numbers.h" #include "vec/exprs/table_function/vexplode_split.h" +#include "vec/exprs/table_function/vposexplode.h" #include "vec/utils/util.hpp" namespace doris::vectorized { @@ -61,6 +62,7 @@ const std::unordered_map()}, {"explode_map", TableFunctionCreator {}}, {"explode_json_object", TableFunctionCreator {}}, + {"posexplode", TableFunctionCreator {}}, {"explode", TableFunctionCreator {}}}; Status TableFunctionFactory::get_fn(const TFunction& t_fn, ObjectPool* pool, TableFunction** fn) { diff --git a/be/src/vec/exprs/table_function/udf_table_function.cpp b/be/src/vec/exprs/table_function/udf_table_function.cpp index 82e727b3f5dee93..9aa850d68b7d206 100644 --- a/be/src/vec/exprs/table_function/udf_table_function.cpp +++ b/be/src/vec/exprs/table_function/udf_table_function.cpp @@ -24,6 +24,7 @@ #include "vec/columns/column_nullable.h" #include "vec/common/assert_cast.h" #include "vec/core/block.h" +#include "vec/core/column_numbers.h" #include "vec/core/types.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_factory.hpp" @@ -48,9 +49,6 @@ UDFTableFunction::UDFTableFunction(const TFunction& t_fn) : TableFunction(), _t_ Status UDFTableFunction::open() { JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); - if (env == nullptr) { - return Status::InternalError("Failed to get/create JVM"); - } _jni_ctx = std::make_shared(); // Add a scoped cleanup jni reference object. This cleans up local refs made below. JniLocalFrame jni_frame; @@ -70,14 +68,22 @@ Status UDFTableFunction::open() { RETURN_IF_ERROR(jni_frame.push(env)); RETURN_IF_ERROR(SerializeThriftMsg(env, &ctor_params, &ctor_params_bytes)); RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, EXECUTOR_CLASS, &_jni_ctx->executor_cl)); - _jni_ctx->executor_ctor_id = - env->GetMethodID(_jni_ctx->executor_cl, "", EXECUTOR_CTOR_SIGNATURE); - _jni_ctx->executor_evaluate_id = - env->GetMethodID(_jni_ctx->executor_cl, "evaluate", EXECUTOR_EVALUATE_SIGNATURE); - _jni_ctx->executor_close_id = - env->GetMethodID(_jni_ctx->executor_cl, "close", EXECUTOR_CLOSE_SIGNATURE); - _jni_ctx->executor = env->NewObject(_jni_ctx->executor_cl, _jni_ctx->executor_ctor_id, - ctor_params_bytes); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _jni_ctx->executor_ctor_id, env, + GetMethodID(_jni_ctx->executor_cl, "", EXECUTOR_CTOR_SIGNATURE)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _jni_ctx->executor_evaluate_id, env, + GetMethodID(_jni_ctx->executor_cl, "evaluate", EXECUTOR_EVALUATE_SIGNATURE)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _jni_ctx->executor_close_id, env, + GetMethodID(_jni_ctx->executor_cl, "close", EXECUTOR_CLOSE_SIGNATURE)); + + JNI_CALL_METHOD_CHECK_EXCEPTION( + , _jni_ctx->executor, env, + NewObject(_jni_ctx->executor_cl, _jni_ctx->executor_ctor_id, ctor_params_bytes)); jbyte* pBytes = env->GetByteArrayElements(ctor_params_bytes, nullptr); env->ReleaseByteArrayElements(ctor_params_bytes, pBytes, JNI_ABORT); env->DeleteLocalRef(ctor_params_bytes); @@ -90,7 +96,7 @@ Status UDFTableFunction::open() { Status UDFTableFunction::process_init(Block* block, RuntimeState* state) { auto child_size = _expr_context->root()->children().size(); - std::vector child_column_idxs; + ColumnNumbers child_column_idxs; child_column_idxs.resize(child_size); for (int i = 0; i < child_size; ++i) { int result_id = -1; @@ -123,9 +129,10 @@ Status UDFTableFunction::process_init(Block* block, RuntimeState* state) { jobject output_map = JniUtil::convert_to_java_map(env, output_params); DCHECK(_jni_ctx != nullptr); DCHECK(_jni_ctx->executor != nullptr); - long output_address = env->CallLongMethod(_jni_ctx->executor, _jni_ctx->executor_evaluate_id, - input_map, output_map); - RETURN_IF_ERROR(JniUtil::GetJniExceptionMsg(env)); + JNI_CALL_METHOD_CHECK_EXCEPTION( + long, output_address, env, + CallLongMethod(_jni_ctx->executor, _jni_ctx->executor_evaluate_id, input_map, + output_map)); env->DeleteLocalRef(input_map); env->DeleteLocalRef(output_map); RETURN_IF_ERROR(JniConnector::fill_block(block, {_result_column_idx}, output_address)); diff --git a/be/src/vec/exprs/table_function/udf_table_function.h b/be/src/vec/exprs/table_function/udf_table_function.h index b09371984675dfa..b9707bf069398de 100644 --- a/be/src/vec/exprs/table_function/udf_table_function.h +++ b/be/src/vec/exprs/table_function/udf_table_function.h @@ -90,8 +90,8 @@ class UDFTableFunction final : public TableFunction { DataTypePtr _return_type = nullptr; ColumnPtr _array_result_column = nullptr; ColumnArrayExecutionData _array_column_detail; - size_t _result_column_idx = 0; // _array_result_column pos in block - size_t _array_offset = 0; // start offset of array[row_idx] + uint32_t _result_column_idx = 0; // _array_result_column pos in block + size_t _array_offset = 0; // start offset of array[row_idx] }; } // namespace doris::vectorized diff --git a/be/src/vec/exprs/table_function/vposexplode.cpp b/be/src/vec/exprs/table_function/vposexplode.cpp new file mode 100644 index 000000000000000..20d04a219f831ac --- /dev/null +++ b/be/src/vec/exprs/table_function/vposexplode.cpp @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/table_function/vposexplode.h" + +#include + +#include +#include + +#include "common/status.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/common/string_ref.h" +#include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/exprs/vexpr.h" +#include "vec/exprs/vexpr_context.h" + +namespace doris::vectorized { + +VPosExplodeTableFunction::VPosExplodeTableFunction() { + _fn_name = "posexplode"; +} + +Status VPosExplodeTableFunction::process_init(Block* block, RuntimeState* state) { + CHECK(_expr_context->root()->children().size() == 1) + << "VPosExplodeTableFunction only support 1 child but has " + << _expr_context->root()->children().size(); + + int value_column_idx = -1; + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); + + _collection_column = + block->get_by_position(value_column_idx).column->convert_to_full_column_if_const(); + + if (!extract_column_array_info(*_collection_column, _array_detail)) { + return Status::NotSupported("column type {} not supported now, only support array", + block->get_by_position(value_column_idx).column->get_name()); + } + if (is_column_nullable(*_collection_column)) { + _array_data_column = + assert_cast( + assert_cast(*_collection_column).get_nested_column()) + .get_data_ptr(); + } else { + _array_data_column = assert_cast(*_collection_column).get_data_ptr(); + } + return Status::OK(); +} + +void VPosExplodeTableFunction::process_row(size_t row_idx) { + DCHECK(row_idx < _collection_column->size()); + TableFunction::process_row(row_idx); + + if (!_array_detail.array_nullmap_data || !_array_detail.array_nullmap_data[row_idx]) { + _collection_offset = (*_array_detail.offsets_ptr)[row_idx - 1]; + _cur_size = (*_array_detail.offsets_ptr)[row_idx] - _collection_offset; + } +} + +void VPosExplodeTableFunction::process_close() { + _collection_column = nullptr; + _array_data_column = nullptr; + _array_detail.reset(); + _collection_offset = 0; +} + +void VPosExplodeTableFunction::get_same_many_values(MutableColumnPtr& column, int length) { + // now we only support array column explode to struct column + size_t pos = _collection_offset + _cur_offset; + // if current is empty array row, also append a default value + if (current_empty()) { + column->insert_many_defaults(length); + return; + } + ColumnStruct* ret = nullptr; + // this _is_nullable is whole output column's nullable + if (_is_nullable) { + ret = assert_cast( + assert_cast(column.get())->get_nested_column_ptr().get()); + assert_cast( + assert_cast(column.get())->get_null_map_column_ptr().get()) + ->insert_many_defaults(length); + } else if (column->is_column_struct()) { + ret = assert_cast(column.get()); + } else { + throw Exception(ErrorCode::INTERNAL_ERROR, + "only support array column explode to struct column"); + } + if (!ret || ret->tuple_size() != 2) { + throw Exception( + ErrorCode::INTERNAL_ERROR, + "only support array column explode to two column, but given: ", ret->tuple_size()); + } + auto& pose_column_nullable = assert_cast(ret->get_column(0)); + pose_column_nullable.get_null_map_column().insert_many_defaults(length); + assert_cast(pose_column_nullable.get_nested_column()) + .insert_many_vals(_cur_offset, length); + ret->get_column(1).insert_many_from(*_array_data_column, pos, length); +} + +int VPosExplodeTableFunction::get_value(MutableColumnPtr& column, int max_step) { + max_step = std::min(max_step, (int)(_cur_size - _cur_offset)); + size_t pos = _collection_offset + _cur_offset; + if (current_empty()) { + column->insert_default(); + max_step = 1; + } else { + ColumnStruct* struct_column = nullptr; + if (_is_nullable) { + auto* nullable_column = assert_cast(column.get()); + struct_column = + assert_cast(nullable_column->get_nested_column_ptr().get()); + auto* nullmap_column = + assert_cast(nullable_column->get_null_map_column_ptr().get()); + // here nullmap_column insert max_step many defaults as if array[row_idx] is NULL + // will be not update value, _cur_size = 0, means current_empty; + // so here could insert directly + nullmap_column->insert_many_defaults(max_step); + } else { + struct_column = assert_cast(column.get()); + } + if (!struct_column || struct_column->tuple_size() != 2) { + throw Exception(ErrorCode::INTERNAL_ERROR, + "only support array column explode to two column, but given: ", + struct_column->tuple_size()); + } + auto& pose_column_nullable = assert_cast(struct_column->get_column(0)); + pose_column_nullable.get_null_map_column().insert_many_defaults(max_step); + assert_cast(pose_column_nullable.get_nested_column()) + .insert_range_of_integer(_cur_offset, _cur_offset + max_step); + struct_column->get_column(1).insert_range_from(*_array_data_column, pos, max_step); + } + forward(max_step); + return max_step; +} +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/table_function/vposexplode.h b/be/src/vec/exprs/table_function/vposexplode.h new file mode 100644 index 000000000000000..4e021fd58da918a --- /dev/null +++ b/be/src/vec/exprs/table_function/vposexplode.h @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" +#include "vec/columns/column_map.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/exprs/table_function/table_function.h" +#include "vec/functions/array/function_array_utils.h" + +namespace doris::vectorized { + +class VPosExplodeTableFunction : public TableFunction { + ENABLE_FACTORY_CREATOR(VPosExplodeTableFunction); + +public: + VPosExplodeTableFunction(); + + ~VPosExplodeTableFunction() override = default; + + Status process_init(Block* block, RuntimeState* state) override; + void process_row(size_t row_idx) override; + void process_close() override; + void get_same_many_values(MutableColumnPtr& column, int length) override; + int get_value(MutableColumnPtr& column, int max_step) override; + +private: + ColumnPtr _collection_column; + ColumnPtr _array_data_column; + ColumnArrayExecutionData _array_detail; + size_t _collection_offset; // start offset of array[row_idx] +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/vbitmap_predicate.cpp b/be/src/vec/exprs/vbitmap_predicate.cpp index 8116311247b5b97..d801f8691fe3b8d 100644 --- a/be/src/vec/exprs/vbitmap_predicate.cpp +++ b/be/src/vec/exprs/vbitmap_predicate.cpp @@ -17,11 +17,8 @@ #include "vec/exprs/vbitmap_predicate.h" -#include - -#include +#include #include -#include #include "exprs/bitmapfilter_predicate.h" #include "gutil/integral_types.h" @@ -41,12 +38,12 @@ class RowDescriptor; class RuntimeState; class TExprNode; -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" + +class VExprContext; vectorized::VBitmapPredicate::VBitmapPredicate(const TExprNode& node) : VExpr(node), _filter(nullptr), _expr_name("bitmap_predicate") {} @@ -90,14 +87,14 @@ doris::Status vectorized::VBitmapPredicate::execute(vectorized::VExprContext* co arguments[i] = column_id; } // call function - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); auto res_data_column = ColumnVector::create(block->rows()); ColumnPtr argument_column = block->get_by_position(arguments[0]).column->convert_to_full_column_if_const(); size_t sz = argument_column->size(); res_data_column->resize(sz); - auto ptr = ((ColumnVector*)res_data_column.get())->get_data().data(); + auto* ptr = res_data_column->get_data().data(); if (argument_column->is_nullable()) { auto column_nested = reinterpret_cast(argument_column.get()) @@ -134,4 +131,5 @@ void vectorized::VBitmapPredicate::set_filter(std::shared_ptr - +#include #include -#include #include "common/status.h" #include "exprs/bloom_filter_func.h" -#include "gutil/integral_types.h" #include "runtime/runtime_state.h" #include "vec/columns/column.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" -#include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" #include "vec/core/column_with_type_and_name.h" @@ -41,12 +37,12 @@ namespace doris { class RowDescriptor; class TExprNode; -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" + +class VExprContext; VBloomPredicate::VBloomPredicate(const TExprNode& node) : VExpr(node), _filter(nullptr), _expr_name("bloom_predicate") {} @@ -85,7 +81,7 @@ Status VBloomPredicate::execute(VExprContext* context, Block* block, int* result arguments[i] = column_id; } // call function - size_t num_columns_without_result = block->columns(); + auto num_columns_without_result = block->columns(); auto res_data_column = ColumnVector::create(block->rows()); ColumnPtr argument_column = @@ -112,4 +108,6 @@ const std::string& VBloomPredicate::expr_name() const { void VBloomPredicate::set_filter(std::shared_ptr& filter) { _filter = filter; } + +#include "common/compile_check_end.h" } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vcase_expr.cpp b/be/src/vec/exprs/vcase_expr.cpp index 222a8f5629af778..7e65a5bbb486c97 100644 --- a/be/src/vec/exprs/vcase_expr.cpp +++ b/be/src/vec/exprs/vcase_expr.cpp @@ -19,12 +19,8 @@ #include #include -#include -#include -#include #include -#include #include "common/status.h" #include "runtime/runtime_state.h" @@ -43,6 +39,7 @@ class RuntimeState; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" VCaseExpr::VCaseExpr(const TExprNode& node) : VExpr(node), @@ -61,8 +58,7 @@ Status VCaseExpr::prepare(RuntimeState* state, const RowDescriptor& desc, VExprC ColumnsWithTypeAndName argument_template; DataTypes arguments; - for (int i = 0; i < _children.size(); i++) { - auto child = _children[i]; + for (auto child : _children) { argument_template.emplace_back(nullptr, child->data_type(), child->expr_name()); arguments.emplace_back(child->data_type()); } @@ -113,7 +109,7 @@ Status VCaseExpr::execute(VExprContext* context, Block* block, int* result_colum } RETURN_IF_ERROR(check_constant(*block, arguments)); - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); block->insert({nullptr, _data_type, _expr_name}); RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments, @@ -132,7 +128,7 @@ std::string VCaseExpr::debug_string() const { out << "CaseExpr(has_case_expr=" << _has_case_expr << " has_else_expr=" << _has_else_expr << " function=" << _function_name << "){"; bool first = true; - for (auto& input_expr : children()) { + for (const auto& input_expr : children()) { if (first) { first = false; } else { @@ -143,4 +139,6 @@ std::string VCaseExpr::debug_string() const { out << "}"; return out.str(); } + +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp index 38f861add87224d..6e3a49cfc13e63c 100644 --- a/be/src/vec/exprs/vcast_expr.cpp +++ b/be/src/vec/exprs/vcast_expr.cpp @@ -41,6 +41,7 @@ class RuntimeState; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" doris::Status VCastExpr::prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, VExprContext* context) { @@ -57,14 +58,11 @@ doris::Status VCastExpr::prepare(doris::RuntimeState* state, const doris::RowDes // Using typeindex to indicate the datatype, not using type name because // type name is not stable, but type index is stable and immutable _cast_param_data_type = _target_data_type; - // Has to cast to int16_t or there will be compile error because there is no - // TypeIndexField - _cast_param = _cast_param_data_type->create_column_const_with_default_value(1); ColumnsWithTypeAndName argument_template; argument_template.reserve(2); argument_template.emplace_back(nullptr, child->data_type(), child_name); - argument_template.emplace_back(_cast_param, _cast_param_data_type, _target_data_type_name); + argument_template.emplace_back(nullptr, _cast_param_data_type, _target_data_type_name); _function = SimpleFunctionFactory::instance().get_function( function_name, argument_template, _data_type, {.enable_decimal256 = state->enable_decimal256()}); @@ -111,14 +109,14 @@ doris::Status VCastExpr::execute(VExprContext* context, doris::vectorized::Block RETURN_IF_ERROR(_children[0]->execute(context, block, &column_id)); // call function - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); // prepare a column to save result block->insert({nullptr, _data_type, _expr_name}); auto state = Status::OK(); try { state = _function->execute(context->fn_context(_fn_context_index), *block, - {static_cast(column_id)}, num_columns_without_result, + {static_cast(column_id)}, num_columns_without_result, block->rows(), false); *result_column_id = num_columns_without_result; } catch (const Exception& e) { @@ -133,10 +131,10 @@ const std::string& VCastExpr::expr_name() const { std::string VCastExpr::debug_string() const { std::stringstream out; - out << "CastExpr(CAST " << _cast_param_data_type->get_name() << " to " + out << "CastExpr(CAST " << get_child(0)->data_type()->get_name() << " to " << _target_data_type->get_name() << "){"; bool first = true; - for (auto& input_expr : children()) { + for (const auto& input_expr : children()) { if (first) { first = false; } else { @@ -147,4 +145,6 @@ std::string VCastExpr::debug_string() const { out << "}"; return out.str(); } + +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vcast_expr.h b/be/src/vec/exprs/vcast_expr.h index 3c03cb42ffb02cf..f553d7682a3b167 100644 --- a/be/src/vec/exprs/vcast_expr.h +++ b/be/src/vec/exprs/vcast_expr.h @@ -61,7 +61,6 @@ class VCastExpr final : public VExpr { std::string _target_data_type_name; DataTypePtr _cast_param_data_type; - ColumnPtr _cast_param; static const constexpr char* function_name = "CAST"; }; diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index faed6788ba34ac5..8c65e6c8adbfefe 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -18,18 +18,21 @@ #pragma once #include +#include +#include + #include "common/status.h" #include "gutil/integral_types.h" #include "util/simd/bits.h" #include "vec/columns/column.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" -#include "vec/data_types/data_type_number.h" #include "vec/exprs/vectorized_fn_call.h" -#include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" inline std::string compound_operator_to_string(TExprOpcode::type op) { if (op == TExprOpcode::COMPOUND_AND) { @@ -155,7 +158,7 @@ class VCompoundPred : public VectorizedFnCall { if (_can_fast_execute && fast_execute(context, block, result_column_id)) { return Status::OK(); } - if (children().size() == 1 || !_all_child_is_compound_and_not_const()) { + if (get_num_children() == 1 || !_all_child_is_compound_and_not_const()) { return VectorizedFnCall::execute(context, block, result_column_id); } @@ -168,7 +171,7 @@ class VCompoundPred : public VectorizedFnCall { bool lhs_is_nullable = lhs_column->is_nullable(); auto [lhs_data_column, lhs_null_map] = _get_raw_data_and_null_map(lhs_column, lhs_is_nullable); - int filted = simd::count_zero_num((int8_t*)lhs_data_column, size); + size_t filted = simd::count_zero_num((int8_t*)lhs_data_column, size); bool lhs_all_true = (filted == 0); bool lhs_all_false = (filted == size); @@ -196,7 +199,7 @@ class VCompoundPred : public VectorizedFnCall { auto rhs_nullable_column = _get_raw_data_and_null_map(rhs_column, rhs_is_nullable); rhs_data_column = rhs_nullable_column.first; rhs_null_map = rhs_nullable_column.second; - int filted = simd::count_zero_num((int8_t*)rhs_data_column, size); + size_t filted = simd::count_zero_num((int8_t*)rhs_data_column, size); rhs_all_true = (filted == 0); rhs_all_false = (filted == size); if (rhs_is_nullable) { @@ -340,13 +343,9 @@ class VCompoundPred : public VectorizedFnCall { } bool _all_child_is_compound_and_not_const() const { - for (auto child : _children) { - // we can make sure non const compound predicate's return column is allow modifyied locally. - if (child->is_constant() || !child->is_compound_predicate()) { - return false; - } - } - return true; + return std::ranges::all_of(_children, [](const VExprSPtr& arg) -> bool { + return arg->is_compound_predicate() && !arg->is_constant(); + }); } std::pair _get_raw_data_and_null_map(ColumnPtr column, @@ -371,4 +370,6 @@ class VCompoundPred : public VectorizedFnCall { TExprOpcode::type _op; }; + +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vdirect_in_predicate.h b/be/src/vec/exprs/vdirect_in_predicate.h index 7abd43a5e029033..c86a1e10b1da286 100644 --- a/be/src/vec/exprs/vdirect_in_predicate.h +++ b/be/src/vec/exprs/vdirect_in_predicate.h @@ -22,6 +22,8 @@ #include "vec/exprs/vexpr.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" + class VDirectInPredicate final : public VExpr { ENABLE_FACTORY_CREATOR(VDirectInPredicate); @@ -52,7 +54,7 @@ class VDirectInPredicate final : public VExpr { Status execute_runtime_fitler(doris::vectorized::VExprContext* context, doris::vectorized::Block* block, int* result_column_id, - std::vector& args) override { + ColumnNumbers& args) override { return _do_execute(context, block, result_column_id, args); } @@ -62,7 +64,7 @@ class VDirectInPredicate final : public VExpr { private: Status _do_execute(VExprContext* context, Block* block, int* result_column_id, - std::vector& arguments) { + ColumnNumbers& arguments) { DCHECK(_open_finished || _getting_const_col); arguments.resize(_children.size()); for (int i = 0; i < _children.size(); ++i) { @@ -71,7 +73,7 @@ class VDirectInPredicate final : public VExpr { arguments[i] = column_id; } - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); auto res_data_column = ColumnVector::create(block->rows()); ColumnPtr argument_column = block->get_by_position(arguments[0]).column->convert_to_full_column_if_const(); @@ -99,4 +101,6 @@ class VDirectInPredicate final : public VExpr { std::shared_ptr _filter; std::string _expr_name; }; + +#include "common/compile_check_end.h" } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp index 45ad573cb5d9df0..b5e5aa4738a0414 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.cpp +++ b/be/src/vec/exprs/vectorized_agg_fn.cpp @@ -44,6 +44,8 @@ #include "vec/exprs/vexpr_context.h" #include "vec/utils/util.hpp" +static constexpr int64_t BE_VERSION_THAT_SUPPORT_NULLABLE_CHECK = 8; + namespace doris { class RowDescriptor; namespace vectorized { @@ -54,6 +56,7 @@ class IColumn; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" template AggregateFunctionPtr get_agg_state_function(const DataTypes& argument_types, @@ -63,9 +66,10 @@ AggregateFunctionPtr get_agg_state_function(const DataTypes& argument_types, argument_types, return_type); } -AggFnEvaluator::AggFnEvaluator(const TExprNode& desc) +AggFnEvaluator::AggFnEvaluator(const TExprNode& desc, const bool without_key) : _fn(desc.fn), _is_merge(desc.agg_expr.is_merge_agg), + _without_key(without_key), _return_type(TypeDescriptor::from_thrift(desc.fn.ret_type)) { bool nullable = true; if (desc.__isset.is_nullable) { @@ -83,8 +87,8 @@ AggFnEvaluator::AggFnEvaluator(const TExprNode& desc) } Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, const TSortInfo& sort_info, - AggFnEvaluator** result) { - *result = pool->add(AggFnEvaluator::create_unique(desc.nodes[0]).release()); + const bool without_key, AggFnEvaluator** result) { + *result = pool->add(AggFnEvaluator::create_unique(desc.nodes[0], without_key).release()); auto& agg_fn_evaluator = *result; int node_idx = 0; for (int i = 0; i < desc.nodes[0].num_children; ++i) { @@ -213,6 +217,13 @@ Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, _function = transform_to_sort_agg_function(_function, _argument_types_with_sort, _sort_description, state); } + + if (!AggregateFunctionSimpleFactory::is_foreach(_fn.name.function_name)) { + if (state->be_exec_version() >= BE_VERSION_THAT_SUPPORT_NULLABLE_CHECK) { + RETURN_IF_ERROR( + _function->verify_result_type(_without_key, argument_types, _data_type)); + } + } _expr_name = fmt::format("{}({})", _fn.name.function_name, child_expr_name); return Status::OK(); } @@ -320,6 +331,7 @@ AggFnEvaluator* AggFnEvaluator::clone(RuntimeState* state, ObjectPool* pool) { AggFnEvaluator::AggFnEvaluator(AggFnEvaluator& evaluator, RuntimeState* state) : _fn(evaluator._fn), _is_merge(evaluator._is_merge), + _without_key(evaluator._without_key), _argument_types_with_sort(evaluator._argument_types_with_sort), _real_argument_types(evaluator._real_argument_types), _return_type(evaluator._return_type), @@ -351,11 +363,11 @@ AggFnEvaluator::AggFnEvaluator(AggFnEvaluator& evaluator, RuntimeState* state) } } -Status AggFnEvaluator::check_agg_fn_output(int64_t key_size, +Status AggFnEvaluator::check_agg_fn_output(uint32_t key_size, const std::vector& agg_fn, const RowDescriptor& output_row_desc) { auto name_and_types = VectorizedUtils::create_name_and_data_types(output_row_desc); - for (int i = key_size, j = 0; i < name_and_types.size(); i++, j++) { + for (uint32_t i = key_size, j = 0; i < name_and_types.size(); i++, j++) { auto&& [name, column_type] = name_and_types[i]; auto agg_return_type = agg_fn[j]->function()->get_return_type(); if (!column_type->equals(*agg_return_type)) { @@ -370,4 +382,6 @@ Status AggFnEvaluator::check_agg_fn_output(int64_t key_size, } return Status::OK(); } + +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vectorized_agg_fn.h b/be/src/vec/exprs/vectorized_agg_fn.h index 8e4f864c474058e..e47d3638646eb0d 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.h +++ b/be/src/vec/exprs/vectorized_agg_fn.h @@ -17,8 +17,8 @@ #pragma once #include -#include +#include #include #include @@ -31,6 +31,8 @@ #include "vec/exprs/vexpr_fwd.h" namespace doris { +#include "common/compile_check_begin.h" + class RuntimeState; class SlotDescriptor; class ObjectPool; @@ -50,7 +52,7 @@ class AggFnEvaluator { public: static Status create(ObjectPool* pool, const TExpr& desc, const TSortInfo& sort_info, - AggFnEvaluator** result); + const bool without_key, AggFnEvaluator** result); Status prepare(RuntimeState* state, const RowDescriptor& desc, const SlotDescriptor* intermediate_slot_desc, @@ -97,7 +99,7 @@ class AggFnEvaluator { bool is_merge() const { return _is_merge; } const VExprContextSPtrs& input_exprs_ctxs() const { return _input_exprs_ctxs; } - static Status check_agg_fn_output(int64_t key_size, + static Status check_agg_fn_output(uint32_t key_size, const std::vector& agg_fn, const RowDescriptor& output_row_desc); @@ -109,8 +111,12 @@ class AggFnEvaluator { const TFunction _fn; const bool _is_merge; + // We need this flag to distinguish between the two types of aggregation functions: + // 1. executed without group by key (agg function used with window function is also regarded as this type) + // 2. executed with group by key + const bool _without_key; - AggFnEvaluator(const TExprNode& desc); + AggFnEvaluator(const TExprNode& desc, const bool without_key); AggFnEvaluator(AggFnEvaluator& evaluator, RuntimeState* state); Status _calc_argument_columns(Block* block); @@ -141,4 +147,5 @@ class AggFnEvaluator { }; } // namespace vectorized +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index 3192653a816f2c5..fc69b4047ceb5f6 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -22,10 +22,8 @@ #include #include -#include #include "common/config.h" -#include "common/consts.h" #include "common/status.h" #include "pipeline/pipeline_task.h" #include "runtime/runtime_state.h" @@ -49,6 +47,7 @@ class TExprNode; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" const std::string AGG_STATE_SUFFIX = "_state"; @@ -146,7 +145,7 @@ Status VectorizedFnCall::evaluate_inverted_index(VExprContext* context, uint32_t Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context, doris::vectorized::Block* block, int* result_column_id, - std::vector& args) { + ColumnNumbers& args) { if (is_const_and_have_executed()) { // const have executed in open function return get_result_from_const(block, _expr_name, result_column_id); } @@ -182,7 +181,7 @@ Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context, RETURN_IF_ERROR(check_constant(*block, args)); // call function - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); // prepare a column to save result block->insert({nullptr, _data_type, _expr_name}); RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, args, @@ -193,13 +192,13 @@ Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context, Status VectorizedFnCall::execute_runtime_fitler(doris::vectorized::VExprContext* context, doris::vectorized::Block* block, - int* result_column_id, std::vector& args) { + int* result_column_id, ColumnNumbers& args) { return _do_execute(context, block, result_column_id, args); } Status VectorizedFnCall::execute(VExprContext* context, vectorized::Block* block, int* result_column_id) { - std::vector arguments; + ColumnNumbers arguments; return _do_execute(context, block, result_column_id, arguments); } @@ -247,10 +246,10 @@ bool VectorizedFnCall::equals(const VExpr& other) { if (this->_function_name != other_ptr->_function_name) { return false; } - if (this->children().size() != other_ptr->children().size()) { + if (get_num_children() != other_ptr->get_num_children()) { return false; } - for (size_t i = 0; i < this->children().size(); i++) { + for (uint16_t i = 0; i < get_num_children(); i++) { if (!this->get_child(i)->equals(*other_ptr->get_child(i))) { return false; } @@ -258,4 +257,5 @@ bool VectorizedFnCall::equals(const VExpr& other) { return true; } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h index bae996136ddc319..cea328f413b620f 100644 --- a/be/src/vec/exprs/vectorized_fn_call.h +++ b/be/src/vec/exprs/vectorized_fn_call.h @@ -17,12 +17,10 @@ #pragma once #include -#include #include #include -#include "common/object_pool.h" #include "common/status.h" #include "udf/udf.h" #include "vec/core/column_numbers.h" @@ -35,14 +33,14 @@ namespace doris { class RowDescriptor; class RuntimeState; class TExprNode; +} // namespace doris + +namespace doris::vectorized { +#include "common/compile_check_begin.h" -namespace vectorized { class Block; class VExprContext; -} // namespace vectorized -} // namespace doris -namespace doris::vectorized { class VectorizedFnCall : public VExpr { ENABLE_FACTORY_CREATOR(VectorizedFnCall); @@ -51,7 +49,7 @@ class VectorizedFnCall : public VExpr { Status execute(VExprContext* context, Block* block, int* result_column_id) override; Status execute_runtime_fitler(doris::vectorized::VExprContext* context, doris::vectorized::Block* block, int* result_column_id, - std::vector& args) override; + ColumnNumbers& args) override; Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override; Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override; Status open(RuntimeState* state, VExprContext* context, @@ -79,7 +77,8 @@ class VectorizedFnCall : public VExpr { private: Status _do_execute(doris::vectorized::VExprContext* context, doris::vectorized::Block* block, - int* result_column_id, std::vector& args); + int* result_column_id, ColumnNumbers& args); }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index ba440231f4ee51c..c40bcbcab57f23c 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,8 @@ #include "vec/utils/util.hpp" namespace doris { +#include "common/compile_check_begin.h" + class RowDescriptor; class RuntimeState; @@ -628,7 +631,7 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase VExprSPtrs children_exprs; // Reserve space to avoid multiple reallocations - const size_t estimated_size = children().size(); + const size_t estimated_size = get_num_children(); iterators.reserve(estimated_size); data_type_with_names.reserve(estimated_size); column_ids.reserve(estimated_size); @@ -642,7 +645,7 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase for (const auto& child : children()) { if (child->node_type() == TExprNodeType::CAST_EXPR) { auto* cast_expr = assert_cast(child.get()); - DCHECK_EQ(cast_expr->children().size(), 1); + DCHECK_EQ(cast_expr->get_num_children(), 1); if (cast_expr->get_child(0)->is_slot_ref()) { auto* column_slot_ref = assert_cast(cast_expr->get_child(0).get()); auto column_id = column_slot_ref->column_id(); @@ -742,7 +745,7 @@ bool VExpr::fast_execute(doris::vectorized::VExprContext* context, doris::vector int* result_column_id) { if (context->get_inverted_index_context() && context->get_inverted_index_context()->get_inverted_index_result_column().contains(this)) { - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); // prepare a column to save result auto result_column = context->get_inverted_index_context()->get_inverted_index_result_column()[this]; @@ -763,4 +766,5 @@ bool VExpr::equals(const VExpr& other) { return false; } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index b608f876456e546..3456fb431a48cb0 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -55,6 +56,7 @@ class RowDescriptor; class RuntimeState; namespace vectorized { +#include "common/compile_check_begin.h" #define RETURN_IF_ERROR_OR_PREPARED(stmt) \ if (_prepared) { \ @@ -68,12 +70,13 @@ namespace vectorized { // the relatioinship between threads and classes. class VExpr { public: - // resize inserted param column to make sure column size equal to block.rows() - // and return param column index - static size_t insert_param(Block* block, ColumnWithTypeAndName&& elem, size_t size) { + // resize inserted param column to make sure column size equal to block.rows() and return param column index + // keep return type same with block::columns() + static uint32_t insert_param(Block* block, ColumnWithTypeAndName&& elem, size_t size) { // usually elem.column always is const column, so we just clone it. elem.column = elem.column->clone_resized(size); block->insert(std::move(elem)); + // just inserted. so no need to check underflow. return block->columns() - 1; } @@ -129,7 +132,7 @@ class VExpr { // Only the 4th parameter is used in the runtime filter. In and MinMax need overwrite the // interface virtual Status execute_runtime_fitler(VExprContext* context, Block* block, - int* result_column_id, std::vector& args) { + int* result_column_id, ColumnNumbers& args) { return execute(context, block, result_column_id); }; @@ -152,15 +155,17 @@ class VExpr { TExprOpcode::type op() const { return _opcode; } void add_child(const VExprSPtr& expr) { _children.push_back(expr); } - VExprSPtr get_child(int i) const { return _children[i]; } - int get_num_children() const { return _children.size(); } + VExprSPtr get_child(uint16_t i) const { return _children[i]; } + // Expr's children number is restricted by org.apache.doris.common.Config#expr_children_limit, 10000 default. and strongly not recommend to change. + // There's little to worry about it. uint16 is enough. + uint16_t get_num_children() const { return static_cast(_children.size()); } virtual bool is_rf_wrapper() const { return std::ranges::any_of(_children.begin(), _children.end(), [](VExprSPtr child) { return child->is_rf_wrapper(); }); } - virtual void do_judge_selectivity(int64_t filter_rows, int64_t input_rows) { + virtual void do_judge_selectivity(uint64_t filter_rows, uint64_t input_rows) { for (auto child : _children) { child->do_judge_selectivity(filter_rows, input_rows); } @@ -217,7 +222,7 @@ class VExpr { int fn_context_index() const { return _fn_context_index; } - static const VExprSPtr expr_without_cast(const VExprSPtr& expr) { + static VExprSPtr expr_without_cast(const VExprSPtr& expr) { if (expr->node_type() == TExprNodeType::CAST_EXPR) { return expr_without_cast(expr->_children[0]); } @@ -225,7 +230,7 @@ class VExpr { } // If this expr is a RuntimeFilterWrapper, this method will return an underlying rf expression - virtual const VExprSPtr get_impl() const { return {}; } + virtual VExprSPtr get_impl() const { return {}; } // If this expr is a BloomPredicate, this method will return a BloomFilterFunc virtual std::shared_ptr get_bloom_filter_func() const { @@ -300,7 +305,7 @@ class VExpr { TExprOpcode::type _opcode; TypeDescriptor _type; DataTypePtr _data_type; - VExprSPtrs _children; + VExprSPtrs _children; // in few hundreds TFunction _fn; /// Index to pass to ExprContext::fn_context() to retrieve this expr's FunctionContext. @@ -492,4 +497,5 @@ Status create_texpr_literal_node(const void* data, TExprNode* node, int precisio TExprNode create_texpr_node_from(const void* data, const PrimitiveType& type, int precision = 0, int scale = 0); +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index ff2238548ec55e9..b01ce2fade36602 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -17,9 +17,11 @@ #include "vec/exprs/vexpr_context.h" +#include #include #include +#include "common/cast_set.h" #include "common/compiler_util.h" // IWYU pragma: keep #include "common/exception.h" #include "runtime/runtime_state.h" @@ -27,6 +29,7 @@ #include "udf/udf.h" #include "util/simd/bits.h" #include "vec/columns/column_const.h" +#include "vec/core/column_numbers.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/columns_with_type_and_name.h" #include "vec/exprs/vexpr.h" @@ -36,6 +39,8 @@ class RowDescriptor; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" + VExprContext::~VExprContext() { // In runtime filter, only create expr context to get expr root, will not call // prepare or open, so that it is not need to call close. And call close may core @@ -118,7 +123,7 @@ int VExprContext::register_function_context(RuntimeState* state, const TypeDescr const std::vector& arg_types) { _fn_contexts.push_back(FunctionContext::create_context(state, return_type, arg_types)); _fn_contexts.back()->set_check_overflow_for_decimal(state->check_overflow_for_decimal()); - return _fn_contexts.size() - 1; + return static_cast(_fn_contexts.size()) - 1; } Status VExprContext::evaluate_inverted_index(uint32_t segment_num_rows) { @@ -146,11 +151,11 @@ Status VExprContext::filter_block(const VExprContextSPtrs& expr_contexts, Block* return Status::OK(); } - std::vector columns_to_filter(column_to_keep); + ColumnNumbers columns_to_filter(column_to_keep); std::iota(columns_to_filter.begin(), columns_to_filter.end(), 0); return execute_conjuncts_and_filter_block(expr_contexts, block, columns_to_filter, - column_to_keep); + static_cast(column_to_keep)); } Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, @@ -164,7 +169,7 @@ Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, const std::vector* filters, bool accept_null, Block* block, IColumn::Filter* result_filter, bool* can_filter_all) { - int rows = block->rows(); + size_t rows = block->rows(); DCHECK_EQ(result_filter->size(), rows); *can_filter_all = false; auto* __restrict result_filter_data = result_filter->data(); @@ -186,7 +191,7 @@ Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, const auto* __restrict filter_data = filter.data(); const auto* __restrict null_map_data = nullable_column->get_null_map_data().data(); - int input_rows = + size_t input_rows = rows - (is_rf_wrapper ? simd::count_zero_num((int8*)result_filter_data, rows) : 0); @@ -200,7 +205,7 @@ Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, } } - int output_rows = + size_t output_rows = rows - (is_rf_wrapper ? simd::count_zero_num((int8*)result_filter_data, rows) : 0); @@ -226,7 +231,7 @@ Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, assert_cast(*filter_column).get_data(); const auto* __restrict filter_data = filter.data(); - int input_rows = + size_t input_rows = rows - (is_rf_wrapper ? simd::count_zero_num((int8*)result_filter_data, rows) : 0); @@ -234,7 +239,7 @@ Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, result_filter_data[i] &= filter_data[i]; } - int output_rows = + size_t output_rows = rows - (is_rf_wrapper ? simd::count_zero_num((int8*)result_filter_data, rows) : 0); @@ -281,14 +286,14 @@ Status VExprContext::execute_conjuncts(const VExprContextSPtrs& conjuncts, Block for (const auto& conjunct : conjuncts) { int result_column_id = -1; RETURN_IF_ERROR(conjunct->execute(block, &result_column_id)); - auto& filter_column = + const auto& filter_column = unpack_if_const(block->get_by_position(result_column_id).column).first; - if (auto* nullable_column = check_and_get_column(*filter_column)) { + if (const auto* nullable_column = check_and_get_column(*filter_column)) { const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr(); const IColumn::Filter& result = assert_cast(*nested_column).get_data(); - auto* __restrict filter_data = result.data(); - auto* __restrict null_map_data = nullable_column->get_null_map_data().data(); + const auto* __restrict filter_data = result.data(); + const auto* __restrict null_map_data = nullable_column->get_null_map_data().data(); DCHECK_EQ(rows, nullable_column->size()); for (size_t i = 0; i != rows; ++i) { @@ -300,7 +305,8 @@ Status VExprContext::execute_conjuncts(const VExprContextSPtrs& conjuncts, Block final_filter_ptr[i] = final_filter_ptr[i] & filter_data[i]; } } else { - auto* filter_data = assert_cast(*filter_column).get_data().data(); + const auto* filter_data = + assert_cast(*filter_column).get_data().data(); for (size_t i = 0; i != rows; ++i) { final_filter_ptr[i] = final_filter_ptr[i] & filter_data[i]; } @@ -320,6 +326,7 @@ Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& execute_conjuncts(ctxs, nullptr, false, block, &result_filter, &can_filter_all)); if (can_filter_all) { for (auto& col : columns_to_filter) { + // NOLINTNEXTLINE(performance-move-const-arg) std::move(*block->get_by_position(col).column).assume_mutable()->clear(); } } else { @@ -352,6 +359,7 @@ Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& RETURN_IF_ERROR(execute_conjuncts(ctxs, nullptr, false, block, &filter, &can_filter_all)); if (can_filter_all) { for (auto& col : columns_to_filter) { + // NOLINTNEXTLINE(performance-move-const-arg) std::move(*block->get_by_position(col).column).assume_mutable()->clear(); } } else { @@ -388,4 +396,5 @@ Status VExprContext::get_output_block_after_execute_exprs( return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vin_predicate.cpp b/be/src/vec/exprs/vin_predicate.cpp index efd757ddd8b4d32..ce6270c2e62deaa 100644 --- a/be/src/vec/exprs/vin_predicate.cpp +++ b/be/src/vec/exprs/vin_predicate.cpp @@ -20,12 +20,10 @@ #include #include #include -#include #include -#include +#include #include -#include #include "common/status.h" #include "runtime/runtime_state.h" @@ -44,6 +42,7 @@ class RuntimeState; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" VInPredicate::VInPredicate(const TExprNode& node) : VExpr(node), _is_not_in(node.in_predicate.is_not_in) {} @@ -52,16 +51,15 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) { RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context)); - if (_children.size() < 1) { + if (_children.empty()) { return Status::InternalError("no Function operator in."); } _expr_name = fmt::format("({} {} set)", _children[0]->expr_name(), _is_not_in ? "not_in" : "in"); - DCHECK(_children.size() >= 1); ColumnsWithTypeAndName argument_template; - argument_template.reserve(_children.size()); + argument_template.reserve(get_num_children()); for (auto child : _children) { argument_template.emplace_back(nullptr, child->data_type(), child->expr_name()); } @@ -146,7 +144,7 @@ Status VInPredicate::execute(VExprContext* context, Block* block, int* result_co arguments[i] = column_id; } // call function - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); // prepare a column to save result block->insert({nullptr, _data_type, _expr_name}); @@ -163,7 +161,7 @@ const std::string& VInPredicate::expr_name() const { std::string VInPredicate::debug_string() const { std::stringstream out; out << "InPredicate(" << children()[0]->debug_string() << " " << _is_not_in << ",["; - int num_children = children().size(); + int num_children = get_num_children(); for (int i = 1; i < num_children; ++i) { out << (i == 1 ? "" : " ") << children()[i]->debug_string(); @@ -173,4 +171,5 @@ std::string VInPredicate::debug_string() const { return out.str(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vinfo_func.cpp b/be/src/vec/exprs/vinfo_func.cpp index c262882b317e90b..59140c4c267ca66 100644 --- a/be/src/vec/exprs/vinfo_func.cpp +++ b/be/src/vec/exprs/vinfo_func.cpp @@ -19,11 +19,8 @@ #include #include -#include #include -#include -#include #include "runtime/define_primitive_type.h" #include "runtime/types.h" @@ -32,13 +29,10 @@ #include "vec/core/types.h" #include "vec/data_types/data_type.h" -namespace doris { -namespace vectorized { -class VExprContext; -} // namespace vectorized -} // namespace doris - namespace doris::vectorized { +#include "common/compile_check_begin.h" + +class VExprContext; VInfoFunc::VInfoFunc(const TExprNode& node) : VExpr(node) { Field field; @@ -63,9 +57,10 @@ VInfoFunc::VInfoFunc(const TExprNode& node) : VExpr(node) { Status VInfoFunc::execute(VExprContext* context, vectorized::Block* block, int* result_column_id) { // Info function should return least one row, e.g. select current_user(). - size_t row_size = std::max(block->rows(), size_t(1)); + size_t row_size = std::max(block->rows(), 1UL); *result_column_id = VExpr::insert_param(block, {_column_ptr, _data_type, _expr_name}, row_size); return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vliteral.cpp b/be/src/vec/exprs/vliteral.cpp index d4e8c8831065a7c..b1e3113b45af923 100644 --- a/be/src/vec/exprs/vliteral.cpp +++ b/be/src/vec/exprs/vliteral.cpp @@ -21,33 +21,21 @@ #include #include #include -#include -#include #include #include #include -#include -#include "common/exception.h" -#include "olap/olap_common.h" -#include "runtime/decimalv2_value.h" -#include "runtime/define_primitive_type.h" -#include "runtime/jsonb_value.h" -#include "runtime/large_int_value.h" -#include "runtime/types.h" -#include "util/string_parser.hpp" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" -#include "vec/common/string_ref.h" -#include "vec/common/typeid_cast.h" #include "vec/core/block.h" #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type_decimal.h" -#include "vec/runtime/vdatetime_value.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" + class VExprContext; void VLiteral::init(const TExprNode& node) { @@ -70,15 +58,8 @@ Status VLiteral::execute(VExprContext* context, vectorized::Block* block, int* r } std::string VLiteral::value() const { - //TODO: dcheck the equality of size with 1. then use string with size to replace the ss. - std::stringstream out; - for (size_t i = 0; i < _column_ptr->size(); i++) { - if (i != 0) { - out << ", "; - } - out << _data_type->to_string(*_column_ptr, i); - } - return out.str(); + DCHECK(_column_ptr->size() == 1); + return _data_type->to_string(*_column_ptr, 0); } std::string VLiteral::debug_string() const { @@ -111,4 +92,5 @@ bool VLiteral::equals(const VExpr& other) { return true; } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vmatch_predicate.cpp b/be/src/vec/exprs/vmatch_predicate.cpp index c80933df13c0bd4..333d0ef389f7249 100644 --- a/be/src/vec/exprs/vmatch_predicate.cpp +++ b/be/src/vec/exprs/vmatch_predicate.cpp @@ -17,6 +17,8 @@ #include "vec/exprs/vmatch_predicate.h" +#include + #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wshadow-field" @@ -26,8 +28,8 @@ #include // IWYU pragma: keep #include #include -#include +#include #include #include #include @@ -50,6 +52,8 @@ class RuntimeState; } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" + using namespace doris::segment_v2; VMatchPredicate::VMatchPredicate(const TExprNode& node) : VExpr(node) { @@ -106,8 +110,8 @@ Status VMatchPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, Status VMatchPredicate::open(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) { DCHECK(_prepare_finished); - for (int i = 0; i < _children.size(); ++i) { - RETURN_IF_ERROR(_children[i]->open(state, context, scope)); + for (auto& i : _children) { + RETURN_IF_ERROR(i->open(state, context, scope)); } RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope, _function)); if (scope == FunctionContext::THREAD_LOCAL || scope == FunctionContext::FRAGMENT_LOCAL) { @@ -161,7 +165,7 @@ Status VMatchPredicate::execute(VExprContext* context, Block* block, int* result arguments[i] = column_id; } // call function - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); // prepare a column to save result block->insert({nullptr, _data_type, _expr_name}); RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments, @@ -181,9 +185,9 @@ const std::string& VMatchPredicate::function_name() const { std::string VMatchPredicate::debug_string() const { std::stringstream out; out << "MatchPredicate(" << children()[0]->debug_string() << ",["; - int num_children = children().size(); + uint16_t num_children = get_num_children(); - for (int i = 1; i < num_children; ++i) { + for (uint16_t i = 1; i < num_children; ++i) { out << (i == 1 ? "" : " ") << children()[i]->debug_string(); } @@ -191,4 +195,5 @@ std::string VMatchPredicate::debug_string() const { return out.str(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.cpp b/be/src/vec/exprs/vruntimefilter_wrapper.cpp index bbd466327819f66..40910ab7a6b9fa4 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.cpp +++ b/be/src/vec/exprs/vruntimefilter_wrapper.cpp @@ -18,26 +18,22 @@ #include "vec/exprs/vruntimefilter_wrapper.h" #include -#include -#include -#include -#include +#include -#include "util/defer_op.h" #include "util/runtime_profile.h" -#include "util/simd/bits.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" -#include "vec/columns/column_nullable.h" -#include "vec/columns/column_vector.h" #include "vec/core/block.h" +#include "vec/core/column_numbers.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/utils/util.hpp" namespace doris { +#include "common/compile_check_begin.h" + class RowDescriptor; class RuntimeState; class TExprNode; @@ -53,17 +49,18 @@ double get_comparison_ignore_thredhold() { double get_bloom_filter_ignore_thredhold() { return 0.4; } - -namespace vectorized { -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { -VRuntimeFilterWrapper::VRuntimeFilterWrapper(const TExprNode& node, const VExprSPtr& impl, +class VExprContext; + +VRuntimeFilterWrapper::VRuntimeFilterWrapper(const TExprNode& node, VExprSPtr impl, double ignore_thredhold, bool null_aware) - : VExpr(node), _impl(impl), _ignore_thredhold(ignore_thredhold), _null_aware(null_aware) { + : VExpr(node), + _impl(std::move(impl)), + _ignore_thredhold(ignore_thredhold), + _null_aware(null_aware) { reset_judge_selectivity(); } @@ -108,7 +105,7 @@ Status VRuntimeFilterWrapper::execute(VExprContext* context, Block* block, int* if (_getting_const_col) { _impl->set_getting_const_col(true); } - std::vector args; + ColumnNumbers args; RETURN_IF_ERROR(_impl->execute_runtime_fitler(context, block, result_column_id, args)); if (_getting_const_col) { _impl->set_getting_const_col(false); @@ -128,4 +125,5 @@ const std::string& VRuntimeFilterWrapper::expr_name() const { return _expr_name; } -} // namespace doris::vectorized \ No newline at end of file +#include "common/compile_check_end.h" +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.h b/be/src/vec/exprs/vruntimefilter_wrapper.h index 477d0dc8b1b4863..79e9361e854719c 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.h +++ b/be/src/vec/exprs/vruntimefilter_wrapper.h @@ -20,10 +20,8 @@ #include #include #include -#include #include "common/config.h" -#include "common/object_pool.h" #include "common/status.h" #include "udf/udf.h" #include "util/runtime_profile.h" @@ -37,20 +35,19 @@ class TExprNode; double get_in_list_ignore_thredhold(size_t list_size); double get_comparison_ignore_thredhold(); double get_bloom_filter_ignore_thredhold(); - -namespace vectorized { -class Block; -class VExprContext; -} // namespace vectorized } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" + +class Block; +class VExprContext; class VRuntimeFilterWrapper final : public VExpr { ENABLE_FACTORY_CREATOR(VRuntimeFilterWrapper); public: - VRuntimeFilterWrapper(const TExprNode& node, const VExprSPtr& impl, double ignore_thredhold, + VRuntimeFilterWrapper(const TExprNode& node, VExprSPtr impl, double ignore_thredhold, bool null_aware = false); ~VRuntimeFilterWrapper() override = default; Status execute(VExprContext* context, Block* block, int* result_column_id) override; @@ -62,7 +59,7 @@ class VRuntimeFilterWrapper final : public VExpr { const std::string& expr_name() const override; const VExprSPtrs& children() const override { return _impl->children(); } - const VExprSPtr get_impl() const override { return _impl; } + VExprSPtr get_impl() const override { return _impl; } void attach_profile_counter(RuntimeProfile::Counter* expr_filtered_rows_counter, RuntimeProfile::Counter* expr_input_rows_counter, @@ -80,12 +77,12 @@ class VRuntimeFilterWrapper final : public VExpr { template static void judge_selectivity(double ignore_threshold, int64_t filter_rows, int64_t input_rows, T& always_true) { - always_true = filter_rows / (input_rows * 1.0) < ignore_threshold; + always_true = filter_rows / (input_rows * 1.0L) < ignore_threshold; } bool is_rf_wrapper() const override { return true; } - void do_judge_selectivity(int64_t filter_rows, int64_t input_rows) override { + void do_judge_selectivity(uint64_t filter_rows, uint64_t input_rows) override { update_counters(filter_rows, input_rows); if (!_always_true) { @@ -114,8 +111,8 @@ class VRuntimeFilterWrapper final : public VExpr { // without recalculating. At the beginning of the next period, // reset_judge_selectivity is used to reset these variables. std::atomic_int _judge_counter = 0; - std::atomic_int _judge_input_rows = 0; - std::atomic_int _judge_filter_rows = 0; + std::atomic_uint64_t _judge_input_rows = 0; + std::atomic_uint64_t _judge_filter_rows = 0; std::atomic_int _always_true = false; RuntimeProfile::Counter* _expr_filtered_rows_counter = nullptr; @@ -129,4 +126,5 @@ class VRuntimeFilterWrapper final : public VExpr { using VRuntimeFilterPtr = std::shared_ptr; +#include "common/compile_check_end.h" } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vtopn_pred.h b/be/src/vec/exprs/vtopn_pred.h index 044bc28b2618ab8..ce756273d92018f 100644 --- a/be/src/vec/exprs/vtopn_pred.h +++ b/be/src/vec/exprs/vtopn_pred.h @@ -24,7 +24,7 @@ #include "runtime/query_context.h" #include "runtime/runtime_predicate.h" #include "runtime/runtime_state.h" -#include "vec/columns/columns_number.h" +#include "vec/core/column_numbers.h" #include "vec/data_types/data_type.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vslot_ref.h" @@ -32,6 +32,7 @@ #include "vec/utils/util.hpp" namespace doris::vectorized { +#include "common/compile_check_begin.h" // only used for dynamic topn filter class VTopNPred : public VExpr { @@ -94,10 +95,11 @@ class VTopNPred : public VExpr { int slot_id = -1; RETURN_IF_ERROR(_children[0]->execute(context, block, &slot_id)); + // if error(slot_id == -1), will return. + ColumnNumbers arguments = {static_cast(slot_id), + static_cast(topn_value_id)}; - std::vector arguments = {(size_t)slot_id, (size_t)topn_value_id}; - - size_t num_columns_without_result = block->columns(); + uint32_t num_columns_without_result = block->columns(); block->insert({nullptr, _data_type, _expr_name}); RETURN_IF_ERROR(_function->execute(nullptr, *block, arguments, num_columns_without_result, block->rows(), false)); @@ -119,4 +121,6 @@ class VTopNPred : public VExpr { FunctionBasePtr _function; VExprContextSPtr _target_ctx; }; + +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_aggregation.cpp b/be/src/vec/functions/array/function_array_aggregation.cpp index 24d82f7894a3eb4..24ded2c7b41952c 100644 --- a/be/src/vec/functions/array/function_array_aggregation.cpp +++ b/be/src/vec/functions/array/function_array_aggregation.cpp @@ -205,7 +205,7 @@ struct ArrayAggregateImpl { } } - static Status execute(Block& block, const ColumnNumbers& arguments, size_t result, + static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, const DataTypeArray* data_type_array, const ColumnArray& array) { ColumnPtr res; DataTypePtr type = data_type_array->get_nested_type(); diff --git a/be/src/vec/functions/array/function_array_apply.cpp b/be/src/vec/functions/array/function_array_apply.cpp index 426347c449b3b65..75425389dd975cc 100644 --- a/be/src/vec/functions/array/function_array_apply.cpp +++ b/be/src/vec/functions/array/function_array_apply.cpp @@ -72,7 +72,7 @@ class FunctionArrayApply : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const auto& src_column_array = check_and_get_column(*src_column); diff --git a/be/src/vec/functions/array/function_array_binary.h b/be/src/vec/functions/array/function_array_binary.h index a41a82b8a784532..3a134e7392a40cf 100644 --- a/be/src/vec/functions/array/function_array_binary.h +++ b/be/src/vec/functions/array/function_array_binary.h @@ -50,7 +50,7 @@ class FunctionArrayBinary : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& [left_column, left_const] = unpack_if_const(block.get_by_position(arguments[0]).column); const auto& [right_column, right_const] = diff --git a/be/src/vec/functions/array/function_array_compact.h b/be/src/vec/functions/array/function_array_compact.h index c3acde4959a47d6..ef4ae5a76ad8923 100644 --- a/be/src/vec/functions/array/function_array_compact.h +++ b/be/src/vec/functions/array/function_array_compact.h @@ -66,7 +66,7 @@ class FunctionArrayCompact : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const auto& src_column_array = check_and_get_column(*src_column); diff --git a/be/src/vec/functions/array/function_array_concat.cpp b/be/src/vec/functions/array/function_array_concat.cpp index a4f1bfd29ccd786..18d0b7b48c14dcd 100644 --- a/be/src/vec/functions/array/function_array_concat.cpp +++ b/be/src/vec/functions/array/function_array_concat.cpp @@ -68,7 +68,7 @@ class FunctionArrayConcat : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) const override { + const uint32_t result, size_t input_rows_count) const override { DataTypePtr column_type = block.get_by_position(arguments[0]).type; auto nested_type = assert_cast(*column_type).get_nested_type(); auto result_column = ColumnArray::create(nested_type->create_column(), diff --git a/be/src/vec/functions/array/function_array_constructor.cpp b/be/src/vec/functions/array/function_array_constructor.cpp index 6a26725ac9d00d4..50c53697d2600d3 100644 --- a/be/src/vec/functions/array/function_array_constructor.cpp +++ b/be/src/vec/functions/array/function_array_constructor.cpp @@ -72,7 +72,7 @@ class FunctionArrayConstructor : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { size_t num_element = arguments.size(); auto result_col = block.get_by_position(result).type->create_column(); auto* result_array_col = static_cast(result_col.get()); diff --git a/be/src/vec/functions/array/function_array_contains_all.cpp b/be/src/vec/functions/array/function_array_contains_all.cpp index de74b648aab0475..298289278bfa5f7 100644 --- a/be/src/vec/functions/array/function_array_contains_all.cpp +++ b/be/src/vec/functions/array/function_array_contains_all.cpp @@ -57,7 +57,7 @@ class FunctionArrayContainsAll : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& [left_column, left_is_const] = unpack_if_const(block.get_by_position(arguments[0]).column); const auto& [right_column, right_is_const] = diff --git a/be/src/vec/functions/array/function_array_count.cpp b/be/src/vec/functions/array/function_array_count.cpp index 2d8eca3c178f903..21520de0744ea13 100644 --- a/be/src/vec/functions/array/function_array_count.cpp +++ b/be/src/vec/functions/array/function_array_count.cpp @@ -48,7 +48,7 @@ class FunctionArrayCount : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& [src_column, src_const] = unpack_if_const(block.get_by_position(arguments[0]).column); const ColumnArray* array_column = nullptr; diff --git a/be/src/vec/functions/array/function_array_cum_sum.cpp b/be/src/vec/functions/array/function_array_cum_sum.cpp index 24750b55f6c8c20..2f93a2a83b1a897 100644 --- a/be/src/vec/functions/array/function_array_cum_sum.cpp +++ b/be/src/vec/functions/array/function_array_cum_sum.cpp @@ -97,7 +97,7 @@ class FunctionArrayCumSum : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) const override { + const uint32_t result, size_t input_rows_count) const override { auto src_arg = block.get_by_position(arguments[0]); ColumnPtr src_column = src_arg.column->convert_to_full_column_if_const(); diff --git a/be/src/vec/functions/array/function_array_difference.h b/be/src/vec/functions/array/function_array_difference.h index 9eca677f0336ce4..283ac206ce69b75 100644 --- a/be/src/vec/functions/array/function_array_difference.h +++ b/be/src/vec/functions/array/function_array_difference.h @@ -104,7 +104,7 @@ class FunctionArrayDifference : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnWithTypeAndName& arg = block.get_by_position(arguments[0]); auto res_column = _execute_non_nullable(arg, input_rows_count); if (!res_column) { diff --git a/be/src/vec/functions/array/function_array_distance.h b/be/src/vec/functions/array/function_array_distance.h index fa05196e5e1c2c5..e03e52a0ce191fa 100644 --- a/be/src/vec/functions/array/function_array_distance.h +++ b/be/src/vec/functions/array/function_array_distance.h @@ -95,7 +95,7 @@ class FunctionArrayDistance : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& arg1 = block.get_by_position(arguments[0]); const auto& arg2 = block.get_by_position(arguments[1]); if (!_check_input_type(arg1.type) || !_check_input_type(arg2.type)) { @@ -123,7 +123,7 @@ class FunctionArrayDistance : public IFunction { // prepare return data auto dst = ColumnFloat64::create(input_rows_count); auto& dst_data = dst->get_data(); - auto dst_null_column = ColumnUInt8::create(input_rows_count); + auto dst_null_column = ColumnUInt8::create(input_rows_count, 0); auto& dst_null_data = dst_null_column->get_data(); const auto& offsets1 = *arr1.offsets_ptr; diff --git a/be/src/vec/functions/array/function_array_distinct.h b/be/src/vec/functions/array/function_array_distinct.h index a3b70aa60d73859..6f477f3b671dec2 100644 --- a/be/src/vec/functions/array/function_array_distinct.h +++ b/be/src/vec/functions/array/function_array_distinct.h @@ -76,7 +76,7 @@ class FunctionArrayDistinct : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const auto& src_column_array = check_and_get_column(*src_column); diff --git a/be/src/vec/functions/array/function_array_element.h b/be/src/vec/functions/array/function_array_element.h index 4a9bffdbb3cc1d1..7a4807608c3ef98 100644 --- a/be/src/vec/functions/array/function_array_element.h +++ b/be/src/vec/functions/array/function_array_element.h @@ -17,6 +17,7 @@ // This file is copied from // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/arrayElement.cpp // and modified by Doris + #pragma once #include @@ -96,8 +97,8 @@ class FunctionArrayElement : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - auto dst_null_column = ColumnUInt8::create(input_rows_count); + uint32_t result, size_t input_rows_count) const override { + auto dst_null_column = ColumnUInt8::create(input_rows_count, 0); UInt8* dst_null_map = dst_null_column->get_data().data(); const UInt8* src_null_map = nullptr; ColumnsWithTypeAndName args; diff --git a/be/src/vec/functions/array/function_array_enumerate.cpp b/be/src/vec/functions/array/function_array_enumerate.cpp index 0ce927db897870c..0e8bca3e5cd3b1b 100644 --- a/be/src/vec/functions/array/function_array_enumerate.cpp +++ b/be/src/vec/functions/array/function_array_enumerate.cpp @@ -79,7 +79,7 @@ class FunctionArrayEnumerate : public IFunction { return return_type; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto left_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const ColumnArray* array = diff --git a/be/src/vec/functions/array/function_array_enumerate_uniq.cpp b/be/src/vec/functions/array/function_array_enumerate_uniq.cpp index 167d008a7339d7f..a10124354be3ae3 100644 --- a/be/src/vec/functions/array/function_array_enumerate_uniq.cpp +++ b/be/src/vec/functions/array/function_array_enumerate_uniq.cpp @@ -117,7 +117,7 @@ class FunctionArrayEnumerateUniq : public IFunction { #endif // __GNUC__ Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnRawPtrs data_columns(arguments.size()); const ColumnArray::Offsets64* offsets = nullptr; ColumnPtr src_offsets; diff --git a/be/src/vec/functions/array/function_array_exists.cpp b/be/src/vec/functions/array/function_array_exists.cpp index daef5faf8494758..8621ecd35c02803 100644 --- a/be/src/vec/functions/array/function_array_exists.cpp +++ b/be/src/vec/functions/array/function_array_exists.cpp @@ -64,7 +64,7 @@ class FunctionArrayExists : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { // 1. get first array column const auto first_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); diff --git a/be/src/vec/functions/array/function_array_filter.cpp b/be/src/vec/functions/array/function_array_filter.cpp index af7e7a4cb316b84..1a9cc5105b0f58b 100644 --- a/be/src/vec/functions/array/function_array_filter.cpp +++ b/be/src/vec/functions/array/function_array_filter.cpp @@ -63,7 +63,7 @@ class FunctionArrayFilter : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { //TODO: maybe need optimize not convert auto first_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); diff --git a/be/src/vec/functions/array/function_array_first_or_last_index.cpp b/be/src/vec/functions/array/function_array_first_or_last_index.cpp index 144c7a9194fa738..773fc7b81998be1 100644 --- a/be/src/vec/functions/array/function_array_first_or_last_index.cpp +++ b/be/src/vec/functions/array/function_array_first_or_last_index.cpp @@ -67,7 +67,7 @@ class FunctionArrayFirstOrLastIndex : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const ColumnArray* array_column = nullptr; diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index ec31277c36c855b..e602d67a73b01c0 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -191,7 +191,7 @@ class FunctionArrayIndex : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DBUG_EXECUTE_IF("array_func.array_contains", { auto req_id = DebugPoints::instance()->get_debug_param_or_default( "array_func.array_contains", "req_id", 0); @@ -217,9 +217,9 @@ class FunctionArrayIndex : public IFunction { const auto& right_chars = reinterpret_cast(right_column).get_chars(); // prepare return data - auto dst = ColumnVector::create(offsets.size()); + auto dst = ColumnVector::create(offsets.size(), 0); auto& dst_data = dst->get_data(); - auto dst_null_column = ColumnUInt8::create(offsets.size()); + auto dst_null_column = ColumnUInt8::create(offsets.size(), 0); auto& dst_null_data = dst_null_column->get_data(); // process @@ -286,9 +286,9 @@ class FunctionArrayIndex : public IFunction { const auto& right_data = reinterpret_cast(right_column).get_data(); // prepare return data - auto dst = ColumnVector::create(offsets.size()); + auto dst = ColumnVector::create(offsets.size(), 0); auto& dst_data = dst->get_data(); - auto dst_null_column = ColumnUInt8::create(offsets.size()); + auto dst_null_column = ColumnUInt8::create(offsets.size(), 0); auto& dst_null_data = dst_null_column->get_data(); // process @@ -348,7 +348,7 @@ class FunctionArrayIndex : public IFunction { return nullptr; } - Status _execute_dispatch(Block& block, const ColumnNumbers& arguments, size_t result, + Status _execute_dispatch(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { // extract array offsets and nested data auto left_column = diff --git a/be/src/vec/functions/array/function_array_join.h b/be/src/vec/functions/array/function_array_join.h index d184765edf65afe..f7ffd4dfca3449b 100644 --- a/be/src/vec/functions/array/function_array_join.h +++ b/be/src/vec/functions/array/function_array_join.h @@ -54,7 +54,7 @@ struct ArrayJoinImpl { return std::make_shared(); } - static Status execute(Block& block, const ColumnNumbers& arguments, size_t result, + static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, const DataTypeArray* data_type_array, const ColumnArray& array) { ColumnPtr src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); diff --git a/be/src/vec/functions/array/function_array_mapped.h b/be/src/vec/functions/array/function_array_mapped.h index 19ffb8ce9c722fd..93f7d068e4b01dc 100644 --- a/be/src/vec/functions/array/function_array_mapped.h +++ b/be/src/vec/functions/array/function_array_mapped.h @@ -48,7 +48,7 @@ class FunctionArrayMapped : public IFunction { String get_name() const override { return name; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& typed_column = block.get_by_position(arguments[0]); auto ptr = typed_column.column->convert_to_full_column_if_const(); const typename Impl::column_type* column_array; diff --git a/be/src/vec/functions/array/function_array_nary.h b/be/src/vec/functions/array/function_array_nary.h index 9d14a68560cef41..923b64473a8d1de 100644 --- a/be/src/vec/functions/array/function_array_nary.h +++ b/be/src/vec/functions/array/function_array_nary.h @@ -59,7 +59,7 @@ class FunctionArrayNary : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr res_ptr; ColumnArrayExecutionDatas datas(arguments.size()); std::vector col_const(arguments.size()); diff --git a/be/src/vec/functions/array/function_array_pop.cpp b/be/src/vec/functions/array/function_array_pop.cpp index f9b5c161ae124e9..2182699e0205b5b 100644 --- a/be/src/vec/functions/array/function_array_pop.cpp +++ b/be/src/vec/functions/array/function_array_pop.cpp @@ -63,7 +63,7 @@ class FunctionArrayPop : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto array_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); // extract src array column diff --git a/be/src/vec/functions/array/function_array_pushback.cpp b/be/src/vec/functions/array/function_array_pushback.cpp index 8dedd3e9a66e8be..d1152ca9739b983 100644 --- a/be/src/vec/functions/array/function_array_pushback.cpp +++ b/be/src/vec/functions/array/function_array_pushback.cpp @@ -61,7 +61,7 @@ class FunctionArrayPushback : public IFunction { }; Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& [src_column, src_const] = unpack_if_const(block.get_by_position(arguments[0]).column); const auto& [right_column, right_const] = diff --git a/be/src/vec/functions/array/function_array_pushfront.cpp b/be/src/vec/functions/array/function_array_pushfront.cpp index 28ba7ac6c36ff5c..c592999b108a356 100644 --- a/be/src/vec/functions/array/function_array_pushfront.cpp +++ b/be/src/vec/functions/array/function_array_pushfront.cpp @@ -64,7 +64,7 @@ class FunctionArrayPushfront : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); // extract src array column diff --git a/be/src/vec/functions/array/function_array_range.cpp b/be/src/vec/functions/array/function_array_range.cpp index 1b71693f6407bcf..b6b814a26e339a3 100644 --- a/be/src/vec/functions/array/function_array_range.cpp +++ b/be/src/vec/functions/array/function_array_range.cpp @@ -80,7 +80,7 @@ class FunctionArrayRange : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); } }; @@ -127,7 +127,7 @@ struct RangeImplUtil { static constexpr auto name = get_function_name(); - static Status range_execute(Block& block, const ColumnNumbers& arguments, size_t result, + static Status range_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 3); auto return_nested_type = make_nullable(std::make_shared()); @@ -248,7 +248,7 @@ struct RangeOneImpl : public RangeImplUtil { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { using ColumnType = std::conditional_t, ColumnInt32, ColumnDateTimeV2>; @@ -272,7 +272,7 @@ struct RangeTwoImpl : public RangeImplUtil { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto step_column = ColumnInt32::create(input_rows_count, 1); block.insert({std::move(step_column), std::make_shared(), "step_column"}); @@ -291,7 +291,7 @@ struct RangeThreeImpl : public RangeImplUtil { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { return (RangeImplUtil::range_execute)( block, arguments, result, input_rows_count); diff --git a/be/src/vec/functions/array/function_array_remove.h b/be/src/vec/functions/array/function_array_remove.h index 2645f87320fabbf..197b032b0f8a4be 100644 --- a/be/src/vec/functions/array/function_array_remove.h +++ b/be/src/vec/functions/array/function_array_remove.h @@ -73,7 +73,7 @@ class FunctionArrayRemove : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { // For default implementation of nulls args ColumnsWithTypeAndName args = {block.get_by_position(arguments[0]), block.get_by_position(arguments[1])}; diff --git a/be/src/vec/functions/array/function_array_reverse.h b/be/src/vec/functions/array/function_array_reverse.h index 07145426145b3c9..8567bc61158baba 100644 --- a/be/src/vec/functions/array/function_array_reverse.h +++ b/be/src/vec/functions/array/function_array_reverse.h @@ -28,7 +28,7 @@ namespace doris::vectorized { struct ArrayReverseImpl { - static Status _execute(Block& block, const ColumnNumbers& arguments, size_t result, + static Status _execute(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { ColumnPtr src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); diff --git a/be/src/vec/functions/array/function_array_shuffle.cpp b/be/src/vec/functions/array/function_array_shuffle.cpp index 8d46aaa69333fa4..648b06318df3dc6 100644 --- a/be/src/vec/functions/array/function_array_shuffle.cpp +++ b/be/src/vec/functions/array/function_array_shuffle.cpp @@ -66,7 +66,7 @@ class FunctionArrayShuffle : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const auto& src_column_array = assert_cast(*src_column); diff --git a/be/src/vec/functions/array/function_array_slice.h b/be/src/vec/functions/array/function_array_slice.h index 6cc5ece48ab57b5..2acd1d3fbe1fd43 100644 --- a/be/src/vec/functions/array/function_array_slice.h +++ b/be/src/vec/functions/array/function_array_slice.h @@ -69,7 +69,7 @@ class FunctionArraySlice : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto array_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); auto offset_column = diff --git a/be/src/vec/functions/array/function_array_sort.h b/be/src/vec/functions/array/function_array_sort.h index 800a137742478b6..7b66336836ea12f 100644 --- a/be/src/vec/functions/array/function_array_sort.h +++ b/be/src/vec/functions/array/function_array_sort.h @@ -63,7 +63,7 @@ class FunctionArraySort : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const auto& src_column_array = check_and_get_column(*src_column); diff --git a/be/src/vec/functions/array/function_array_sortby.cpp b/be/src/vec/functions/array/function_array_sortby.cpp index 15e296acb2440e4..499a5e37ab9380e 100644 --- a/be/src/vec/functions/array/function_array_sortby.cpp +++ b/be/src/vec/functions/array/function_array_sortby.cpp @@ -66,7 +66,7 @@ class FunctionArraySortBy : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr argument_columns[2] = {nullptr, nullptr}; ColumnPtr argument_nullmap[2] = {nullptr, nullptr}; for (int i = 0; i < 2; ++i) { diff --git a/be/src/vec/functions/array/function_array_split.cpp b/be/src/vec/functions/array/function_array_split.cpp index 30e46d18c8fae04..7f7a847abc71f41 100644 --- a/be/src/vec/functions/array/function_array_split.cpp +++ b/be/src/vec/functions/array/function_array_split.cpp @@ -61,7 +61,7 @@ class FunctionArraySplit : public IFunction { }; Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { // (Array((Int))) auto src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); diff --git a/be/src/vec/functions/array/function_array_with_constant.cpp b/be/src/vec/functions/array/function_array_with_constant.cpp index 16e3947714cb627..ba385535ebb3a68 100644 --- a/be/src/vec/functions/array/function_array_with_constant.cpp +++ b/be/src/vec/functions/array/function_array_with_constant.cpp @@ -75,7 +75,7 @@ class FunctionArrayWithConstant : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto num = block.get_by_position(arguments[FunctionType::param_num_idx]) .column->convert_to_full_column_if_const(); num = num->is_nullable() @@ -92,8 +92,8 @@ class FunctionArrayWithConstant : public IFunction { for (size_t i = 0; i < input_rows_count; ++i) { auto array_size = num->get_int(i); if (UNLIKELY(array_size < 0) || UNLIKELY(array_size > max_array_size_as_field)) { - return Status::RuntimeError("Array size should in range(0, {}) in function: {}", - max_array_size_as_field, get_name()); + return Status::InvalidArgument("Array size should in range(0, {}) in function: {}", + max_array_size_as_field, get_name()); } offset += array_size; offsets.push_back(offset); diff --git a/be/src/vec/functions/array/function_array_zip.cpp b/be/src/vec/functions/array/function_array_zip.cpp index 2f9b94454e1f5e9..217a8039421a1c1 100644 --- a/be/src/vec/functions/array/function_array_zip.cpp +++ b/be/src/vec/functions/array/function_array_zip.cpp @@ -91,7 +91,7 @@ class FunctionArrayZip : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { size_t num_element = arguments.size(); // all the columns must have the same size as the first column diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h index 23c0ed1f8cc2615..7c851f5c16088b7 100644 --- a/be/src/vec/functions/array/function_arrays_overlap.h +++ b/be/src/vec/functions/array/function_arrays_overlap.h @@ -207,7 +207,7 @@ class FunctionArraysOverlap : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DBUG_EXECUTE_IF("array_func.arrays_overlap", { auto req_id = DebugPoints::instance()->get_debug_param_or_default( "array_func.arrays_overlap", "req_id", 0); diff --git a/be/src/vec/functions/array/varray_match_function.cpp b/be/src/vec/functions/array/varray_match_function.cpp index c322fea75663031..e0081f3c39fc52b 100644 --- a/be/src/vec/functions/array/varray_match_function.cpp +++ b/be/src/vec/functions/array/varray_match_function.cpp @@ -59,7 +59,7 @@ class ArrayMatchFunction : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { // here is executed by array_map filtered and arg[0] is bool result column const auto& [src_column, src_const] = unpack_if_const(block.get_by_position(arguments[0]).column); diff --git a/be/src/vec/functions/comparison_equal_for_null.cpp b/be/src/vec/functions/comparison_equal_for_null.cpp index 24c669094a515e6..919f9ebed65a7c0 100644 --- a/be/src/vec/functions/comparison_equal_for_null.cpp +++ b/be/src/vec/functions/comparison_equal_for_null.cpp @@ -66,7 +66,7 @@ class FunctionEqForNull : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnWithTypeAndName& col_left = block.get_by_position(arguments[0]); ColumnWithTypeAndName& col_right = block.get_by_position(arguments[1]); diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 73155afae3a996e..d2c30d9d706332f 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -429,7 +429,7 @@ struct Transformer> { template struct DateTimeTransformImpl { - static Status execute(Block& block, const ColumnNumbers& arguments, size_t result, + static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { using Op = Transformer; diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 1fea4c70fc1753e..5b935444710e25d 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" @@ -42,7 +41,7 @@ namespace doris::vectorized { ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { ColumnPtr result_null_map_column; /// If result is already nullable. ColumnPtr src_not_nullable = src; @@ -105,7 +104,7 @@ bool have_null_column(const ColumnsWithTypeAndName& args) { } inline Status PreparedFunctionImpl::_execute_skipped_constant_deal( - FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, + FunctionContext* context, Block& block, const ColumnNumbers& args, uint32_t result, size_t input_rows_count, bool dry_run) const { bool executed = false; RETURN_IF_ERROR(default_implementation_for_nulls(context, block, args, result, input_rows_count, @@ -122,7 +121,7 @@ inline Status PreparedFunctionImpl::_execute_skipped_constant_deal( } Status PreparedFunctionImpl::default_implementation_for_constant_arguments( - FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, + FunctionContext* context, Block& block, const ColumnNumbers& args, uint32_t result, size_t input_rows_count, bool dry_run, bool* executed) const { *executed = false; ColumnNumbers args_expect_const = get_arguments_that_are_always_constant(); @@ -186,7 +185,7 @@ Status PreparedFunctionImpl::default_implementation_for_constant_arguments( } Status PreparedFunctionImpl::default_implementation_for_nulls( - FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, + FunctionContext* context, Block& block, const ColumnNumbers& args, uint32_t result, size_t input_rows_count, bool dry_run, bool* executed) const { *executed = false; if (args.empty() || !use_default_implementation_for_nulls()) { @@ -232,7 +231,7 @@ Status PreparedFunctionImpl::default_implementation_for_nulls( } Status PreparedFunctionImpl::execute_without_low_cardinality_columns( - FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, + FunctionContext* context, Block& block, const ColumnNumbers& args, uint32_t result, size_t input_rows_count, bool dry_run) const { bool executed = false; @@ -246,7 +245,7 @@ Status PreparedFunctionImpl::execute_without_low_cardinality_columns( } Status PreparedFunctionImpl::execute(FunctionContext* context, Block& block, - const ColumnNumbers& args, size_t result, + const ColumnNumbers& args, uint32_t result, size_t input_rows_count, bool dry_run) const { return execute_without_low_cardinality_columns(context, block, args, result, input_rows_count, dry_run); diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 4702a4b7af0bbf2..5dab13012a2784c 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -22,10 +22,9 @@ #include #include -#include +#include #include -#include #include #include @@ -95,7 +94,7 @@ class IPreparedFunction { virtual String get_name() const = 0; virtual Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run) const = 0; + uint32_t result, size_t input_rows_count, bool dry_run) const = 0; }; using PreparedFunctionPtr = std::shared_ptr; @@ -103,7 +102,7 @@ using PreparedFunctionPtr = std::shared_ptr; class PreparedFunctionImpl : public IPreparedFunction { public: Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run = false) const final; + uint32_t result, size_t input_rows_count, bool dry_run = false) const final; /** If the function have non-zero number of arguments, * and if all arguments are constant, that we could automatically provide default implementation: @@ -120,13 +119,13 @@ class PreparedFunctionImpl : public IPreparedFunction { protected: virtual Status execute_impl_dry_run(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { return execute_impl(context, block, arguments, result, input_rows_count); } virtual Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const = 0; /** Default implementation in presence of Nullable arguments or NULL constants as arguments is the following: @@ -150,18 +149,18 @@ class PreparedFunctionImpl : public IPreparedFunction { private: Status default_implementation_for_nulls(FunctionContext* context, Block& block, - const ColumnNumbers& args, size_t result, + const ColumnNumbers& args, uint32_t result, size_t input_rows_count, bool dry_run, bool* executed) const; Status default_implementation_for_constant_arguments(FunctionContext* context, Block& block, - const ColumnNumbers& args, size_t result, + const ColumnNumbers& args, uint32_t result, size_t input_rows_count, bool dry_run, bool* executed) const; Status execute_without_low_cardinality_columns(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count, bool dry_run) const; Status _execute_skipped_constant_deal(FunctionContext* context, Block& block, - const ColumnNumbers& args, size_t result, + const ColumnNumbers& args, uint32_t result, size_t input_rows_count, bool dry_run) const; }; @@ -179,7 +178,7 @@ class IFunctionBase { /// Do preparations and return executable. /// sample_block should contain data types of arguments and values of constants, if relevant. virtual PreparedFunctionPtr prepare(FunctionContext* context, const Block& sample_block, - const ColumnNumbers& arguments, size_t result) const = 0; + const ColumnNumbers& arguments, uint32_t result) const = 0; /// Override this when function need to store state in the `FunctionContext`, or do some /// preparation work according to information from `FunctionContext`. @@ -189,7 +188,7 @@ class IFunctionBase { /// TODO: make const virtual Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run = false) const { + uint32_t result, size_t input_rows_count, bool dry_run = false) const { return prepare(context, block, arguments, result) ->execute(context, block, arguments, result, input_rows_count, dry_run); } @@ -367,7 +366,7 @@ class FunctionBuilderImpl : public IFunctionBuilder { virtual FunctionBasePtr build_impl(const ColumnsWithTypeAndName& arguments, const DataTypePtr& return_type) const = 0; - virtual DataTypes get_variadic_argument_types_impl() const { return DataTypes(); } + virtual DataTypes get_variadic_argument_types_impl() const { return {}; } private: DataTypePtr get_return_type_without_low_cardinality( @@ -388,9 +387,8 @@ class IFunction : public std::enable_shared_from_this, String get_name() const override = 0; /// Notice: We should not change the column in the block, because the column may be shared by multiple expressions or exec nodes. - virtual Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) const override = 0; + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + uint32_t result, size_t input_rows_count) const override = 0; /// Override this functions to change default implementation behavior. See details in IMyFunction. bool use_default_implementation_for_nulls() const override { return true; } @@ -415,7 +413,7 @@ class IFunction : public std::enable_shared_from_this, [[noreturn]] PreparedFunctionPtr prepare(FunctionContext* context, const Block& /*sample_block*/, const ColumnNumbers& /*arguments*/, - size_t /*result*/) const final { + uint32_t /*result*/) const final { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "prepare is not implemented for IFunction {}", get_name()); __builtin_unreachable(); @@ -459,7 +457,7 @@ class DefaultExecutable final : public PreparedFunctionImpl { protected: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const final { + uint32_t result, size_t input_rows_count) const final { return function->execute_impl(context, block, arguments, result, input_rows_count); } @@ -473,7 +471,7 @@ class DefaultExecutable final : public PreparedFunctionImpl { } Status execute_impl_dry_run(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const final { return function->execute_impl_dry_run(context, block, arguments, result, input_rows_count); } @@ -517,7 +515,7 @@ class DefaultFunction final : public IFunctionBase { // return a default wrapper for IFunction. PreparedFunctionPtr prepare(FunctionContext* context, const Block& /*sample_block*/, const ColumnNumbers& /*arguments*/, - size_t /*result*/) const override { + uint32_t /*result*/) const override { return std::make_shared(function); } @@ -614,7 +612,7 @@ using FunctionPtr = std::shared_ptr; * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. */ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args, - size_t result, size_t input_rows_count); + uint32_t result, size_t input_rows_count); #define NUMERIC_TYPE_TO_COLUMN_TYPE(M) \ M(UInt8, ColumnUInt8) \ diff --git a/be/src/vec/functions/function_agg_state.h b/be/src/vec/functions/function_agg_state.h index b17e8916d0e0a98..f4b7aef23af220d 100644 --- a/be/src/vec/functions/function_agg_state.h +++ b/be/src/vec/functions/function_agg_state.h @@ -61,7 +61,7 @@ class FunctionAggState : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto col = _agg_function->create_serialize_column(); std::vector agg_columns; std::vector save_columns; diff --git a/be/src/vec/functions/function_always_not_nullable.h b/be/src/vec/functions/function_always_not_nullable.h index b6f18490bb9ea12..618e799f4ae5032 100644 --- a/be/src/vec/functions/function_always_not_nullable.h +++ b/be/src/vec/functions/function_always_not_nullable.h @@ -76,7 +76,7 @@ class FunctionAlwaysNotNullable : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr& column = block.get_by_position(arguments[0]).column; const DataTypePtr& data_type = block.get_by_position(arguments[0]).type; WhichDataType which(data_type); diff --git a/be/src/vec/functions/function_assert_true.cpp b/be/src/vec/functions/function_assert_true.cpp index 71deeab32a50719..93b7b58b204a5d0 100644 --- a/be/src/vec/functions/function_assert_true.cpp +++ b/be/src/vec/functions/function_assert_true.cpp @@ -65,7 +65,7 @@ class FunctionAssertTrue : public IFunction { // column2 is const, so in default logic column1 is no way const. Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { std::string errmsg = assert_cast(*block.get_by_position(arguments[1]).column) .get_data_at(0) diff --git a/be/src/vec/functions/function_binary_arithmetic.h b/be/src/vec/functions/function_binary_arithmetic.h index 9f2af326f719015..31e1d50f45b03ce 100644 --- a/be/src/vec/functions/function_binary_arithmetic.h +++ b/be/src/vec/functions/function_binary_arithmetic.h @@ -1030,7 +1030,7 @@ class FunctionBinaryArithmetic : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto* left_generic = block.get_by_position(arguments[0]).type.get(); auto* right_generic = block.get_by_position(arguments[1]).type.get(); auto* result_generic = block.get_by_position(result).type.get(); diff --git a/be/src/vec/functions/function_bit_test.cpp b/be/src/vec/functions/function_bit_test.cpp index 8e010fd94464442..e863e2a4cf8e52b 100644 --- a/be/src/vec/functions/function_bit_test.cpp +++ b/be/src/vec/functions/function_bit_test.cpp @@ -46,7 +46,7 @@ class FunctionBitTest : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { bool valid = cast_type(block.get_by_position(arguments[0]).type.get(), [&](const auto& type) { using DataType = std::decay_t; @@ -75,7 +75,7 @@ class FunctionBitTest : public IFunction { } template - void execute_inner(Block& block, const ColumnNumbers& arguments, size_t result, + void execute_inner(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { size_t argument_size = arguments.size(); std::vector argument_columns(argument_size); diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index b6072c05cf0f664..64059472efcee9b 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -350,7 +350,7 @@ class FunctionBitmapAlwaysNull : public IFunction { size_t get_number_of_arguments() const override { return 1; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res_null_map = ColumnUInt8::create(input_rows_count, 0); auto res_data_column = ColumnBitmap::create(); auto& null_map = res_null_map->get_data(); @@ -497,7 +497,7 @@ class FunctionBitmapCount : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res_data_column = ColumnInt64::create(); auto& res = res_data_column->get_data(); auto data_null_map = ColumnUInt8::create(input_rows_count, 0); @@ -660,7 +660,7 @@ void update_bitmap_op_count(int64_t* __restrict count, const NullMap& null_map) // for bitmap_and_count, bitmap_xor_count and bitmap_and_not_count, // result is 0 for rows that if any column is null value ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, - const ColumnNumbers& args, size_t result, + const ColumnNumbers& args, uint32_t result, size_t input_rows_count) { auto* nullable = assert_cast(src.get()); ColumnPtr src_not_nullable = nullable->get_nested_column_ptr(); @@ -696,7 +696,7 @@ ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, } Status execute_bitmap_op_count_null_to_zero( - FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, + FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count, const std::function& exec_impl_func) { @@ -745,10 +745,10 @@ class FunctionBitmapAndNotCount : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 2); auto impl_func = [&](FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { return execute_impl_internal(context, block, arguments, result, input_rows_count); }; return execute_bitmap_op_count_null_to_zero(context, block, arguments, result, @@ -756,7 +756,7 @@ class FunctionBitmapAndNotCount : public IFunction { } Status execute_impl_internal(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { using ResultType = typename ResultDataType::FieldType; using ColVecResult = ColumnVector; @@ -1147,7 +1147,7 @@ class FunctionBitmapSubs : public IFunction { size_t get_number_of_arguments() const override { return 3; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 3); auto res_null_map = ColumnUInt8::create(input_rows_count, 0); auto res_data_column = ColumnBitmap::create(input_rows_count); @@ -1200,7 +1200,7 @@ class FunctionBitmapToArray : public IFunction { size_t get_number_of_arguments() const override { return 1; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto return_nested_type = make_nullable(std::make_shared()); auto dest_array_column_ptr = ColumnArray::create(return_nested_type->create_column(), ColumnArray::ColumnOffsets::create()); diff --git a/be/src/vec/functions/function_bitmap_min_or_max.h b/be/src/vec/functions/function_bitmap_min_or_max.h index 85cf9fc3f08fdfc..44bb135335218c2 100644 --- a/be/src/vec/functions/function_bitmap_min_or_max.h +++ b/be/src/vec/functions/function_bitmap_min_or_max.h @@ -50,7 +50,7 @@ class FunctionBitmapSingle : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto result_column = ColumnInt64::create(); auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); diff --git a/be/src/vec/functions/function_bitmap_variadic.cpp b/be/src/vec/functions/function_bitmap_variadic.cpp index c1e044a2a4aa694..6e1a103fdbd83bc 100644 --- a/be/src/vec/functions/function_bitmap_variadic.cpp +++ b/be/src/vec/functions/function_bitmap_variadic.cpp @@ -158,7 +158,7 @@ BITMAP_FUNCTION_COUNT_VARIADIC(BitmapAndCount, bitmap_and_count, &=); BITMAP_FUNCTION_COUNT_VARIADIC(BitmapXorCount, bitmap_xor_count, ^=); Status execute_bitmap_op_count_null_to_zero( - FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, + FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count, const std::function& exec_impl_func); @@ -202,10 +202,10 @@ class FunctionBitMapVariadic : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { if (std::is_same_v || std::is_same_v) { auto impl_func = [&](FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { return execute_impl_internal(context, block, arguments, result, input_rows_count); }; @@ -217,7 +217,7 @@ class FunctionBitMapVariadic : public IFunction { } Status execute_impl_internal(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { size_t argument_size = arguments.size(); std::vector argument_columns(argument_size); diff --git a/be/src/vec/functions/function_case.h b/be/src/vec/functions/function_case.h index f02b85aed456bf5..fdfecc211caed88 100644 --- a/be/src/vec/functions/function_case.h +++ b/be/src/vec/functions/function_case.h @@ -153,7 +153,7 @@ class FunctionCase : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } template - Status execute_short_circuit(const DataTypePtr& data_type, Block& block, size_t result, + Status execute_short_circuit(const DataTypePtr& data_type, Block& block, uint32_t result, CaseWhenColumnHolder column_holder) const { auto case_column_ptr = column_holder.when_ptrs[0].value_or(nullptr); int rows_count = column_holder.rows_count; @@ -196,7 +196,7 @@ class FunctionCase : public IFunction { } template - Status execute_impl(const DataTypePtr& data_type, Block& block, size_t result, + Status execute_impl(const DataTypePtr& data_type, Block& block, uint32_t result, CaseWhenColumnHolder column_holder) const { if (column_holder.pair_count > UINT8_MAX) { return execute_short_circuit(data_type, block, result, @@ -250,7 +250,7 @@ class FunctionCase : public IFunction { } template - Status execute_update_result(const DataTypePtr& data_type, size_t result, Block& block, + Status execute_update_result(const DataTypePtr& data_type, uint32_t result, Block& block, const uint8* then_idx, CaseWhenColumnHolder& column_holder) const { auto result_column_ptr = data_type->create_column(); @@ -348,7 +348,7 @@ class FunctionCase : public IFunction { template Status execute_get_then_null(const DataTypePtr& data_type, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { bool then_null = false; for (int i = 1 + has_case; i < arguments.size() - has_else; i += 2) { @@ -378,7 +378,7 @@ class FunctionCase : public IFunction { template Status execute_get_when_null(const DataTypePtr& data_type, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { bool when_null = false; if constexpr (has_case) { @@ -404,7 +404,7 @@ class FunctionCase : public IFunction { } Status execute_get_type(const DataTypePtr& data_type, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { WhichDataType which( data_type->is_nullable() @@ -420,7 +420,7 @@ class FunctionCase : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return execute_get_type(block.get_by_position(result).type, block, arguments, result, input_rows_count); } diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index acf63a66229eb93..0e567a2b74327ad 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -258,7 +258,7 @@ struct ConvertImpl { template static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, + uint32_t result, size_t input_rows_count, Additions additions = Additions()) { const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]); @@ -472,7 +472,7 @@ template requires(!T::is_parametric) struct ConvertImpl { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t /*input_rows_count*/) { + uint32_t result, size_t /*input_rows_count*/) { block.get_by_position(result).column = block.get_by_position(arguments[0]).column; return Status::OK(); } @@ -485,7 +485,7 @@ struct ConvertImplToTimeType { using FromFieldType = typename FromDataType::FieldType; using ToFieldType = typename ToDataType::FieldType; - static Status execute(Block& block, const ColumnNumbers& arguments, size_t result, + static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t /*input_rows_count*/) { const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]); @@ -563,14 +563,14 @@ struct ConvertImplGenericToString { } static Status execute2(FunctionContext* /*ctx*/, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t /*input_rows_count*/) { + const uint32_t result, size_t /*input_rows_count*/) { return execute(block, arguments, result); } }; //this is for data in compound type struct ConvertImplGenericFromString { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IColumn& col_from = *col_with_type_and_name.column; // result column must set type @@ -624,7 +624,7 @@ struct ConvertImplGenericFromString { template struct ConvertImplNumberToJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); auto column_string = ColumnString::create(); @@ -665,7 +665,7 @@ struct ConvertImplNumberToJsonb { struct ConvertImplStringToJsonbAsJsonbString { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { auto data_type_to = block.get_by_position(result).type; const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IColumn& col_from = *col_with_type_and_name.column; @@ -689,7 +689,7 @@ struct ConvertImplStringToJsonbAsJsonbString { struct ConvertImplGenericFromJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { auto data_type_to = block.get_by_position(result).type; const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IColumn& col_from = *col_with_type_and_name.column; @@ -770,7 +770,7 @@ struct ConvertImplGenericFromJsonb { // Generic conversion of any type to jsonb. struct ConvertImplGenericToJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { auto data_type_to = block.get_by_position(result).type; const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IDataType& type = *col_with_type_and_name.type; @@ -823,7 +823,7 @@ struct ConvertImplGenericToJsonb { struct ConvertNothingToJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IColumn& col_from = *col_with_type_and_name.column; auto data_type_to = block.get_by_position(result).type; @@ -839,7 +839,7 @@ struct ConvertNothingToJsonb { template struct ConvertImplFromJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IColumn& col_from = *col_with_type_and_name.column; // result column must set type @@ -957,7 +957,7 @@ struct ConvertImpl { template static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, + uint32_t result, size_t input_rows_count, Additions additions [[maybe_unused]] = Additions()) { return Status::RuntimeError("not support convert from string"); } @@ -1292,7 +1292,7 @@ class FunctionConvert : public IFunction { ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { if (!arguments.size()) { return Status::RuntimeError("Function {} expects at least 1 arguments", get_name()); } @@ -1495,7 +1495,7 @@ class PreparedFunctionCast : public PreparedFunctionImpl { protected: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return wrapper_function(context, block, arguments, result, input_rows_count); } @@ -1517,7 +1517,7 @@ struct StringParsing { template static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, + uint32_t result, size_t input_rows_count, Additions additions [[maybe_unused]] = Additions()) { using ColVecTo = std::conditional_t, ColumnDecimal, ColumnVector>; @@ -1643,7 +1643,7 @@ class FunctionConvertFromString : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); if (check_and_get_data_type(from_type)) { @@ -1679,7 +1679,7 @@ class FunctionConvertToTimeType : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { Status ret_status = Status::OK(); const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); auto call = [&](const auto& types) -> bool { @@ -1723,7 +1723,7 @@ class FunctionCast final : public IFunctionBase { PreparedFunctionPtr prepare(FunctionContext* context, const Block& /*sample_block*/, const ColumnNumbers& /*arguments*/, - size_t /*result*/) const override { + uint32_t /*result*/) const override { return std::make_shared( prepare_unpack_dictionaries(context, get_argument_types()[0], get_return_type()), name); @@ -1770,7 +1770,7 @@ class FunctionCast final : public IFunctionBase { { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); } return [function](FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { return function->execute(context, block, arguments, result, input_rows_count); }; } @@ -1782,7 +1782,7 @@ class FunctionCast final : public IFunctionBase { { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); } return [function](FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { return function->execute(context, block, arguments, result, input_rows_count); }; } @@ -1805,7 +1805,7 @@ class FunctionCast final : public IFunctionBase { } return [type_index, precision, scale](FunctionContext* context, Block& block, - const ColumnNumbers& arguments, const size_t result, + const ColumnNumbers& arguments, const uint32_t result, size_t input_rows_count) { auto res = call_on_index_and_data_type( type_index, [&](const auto& types) -> bool { @@ -1834,7 +1834,7 @@ class FunctionCast final : public IFunctionBase { WrapperType create_identity_wrapper(const DataTypePtr&) const { return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t /*input_rows_count*/) { + const uint32_t result, size_t /*input_rows_count*/) { block.get_by_position(result).column = block.get_by_position(arguments.front()).column; return Status::OK(); }; @@ -1843,7 +1843,7 @@ class FunctionCast final : public IFunctionBase { WrapperType create_nothing_wrapper(const IDataType* to_type) const { ColumnPtr res = to_type->create_column_const_with_default_value(1); return [res](FunctionContext* context, Block& block, const ColumnNumbers&, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { /// Column of Nothing type is trivially convertible to any other column block.get_by_position(result).column = res->clone_resized(input_rows_count)->convert_to_full_column_if_const(); @@ -1939,7 +1939,7 @@ class FunctionCast final : public IFunctionBase { return [nested_function, from_nested_type, to_nested_type]( FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t /*input_rows_count*/) -> Status { + const uint32_t result, size_t /*input_rows_count*/) -> Status { ColumnPtr from_column = block.get_by_position(arguments.front()).column; const ColumnArray* from_col_array = @@ -2037,7 +2037,7 @@ class FunctionCast final : public IFunctionBase { struct ConvertImplGenericFromVariant { static Status execute(const FunctionCast* fn, FunctionContext* context, Block& block, - const ColumnNumbers& arguments, const size_t result, + const ColumnNumbers& arguments, const uint32_t result, size_t input_rows_count) { auto& data_type_to = block.get_by_position(result).type; const auto& col_with_type_and_name = block.get_by_position(arguments[0]); @@ -2111,7 +2111,7 @@ class FunctionCast final : public IFunctionBase { struct ConvertImplGenericToVariant { static Status execute(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, const size_t result, + const ColumnNumbers& arguments, const uint32_t result, size_t input_rows_count) { // auto& data_type_to = block.get_by_position(result).type; const auto& col_with_type_and_name = block.get_by_position(arguments[0]); @@ -2135,7 +2135,7 @@ class FunctionCast final : public IFunctionBase { WrapperType create_variant_wrapper(const DataTypeObject& from_type, const DataTypePtr& to_type) const { return [this](FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) -> Status { + const uint32_t result, size_t input_rows_count) -> Status { return ConvertImplGenericFromVariant::execute(this, context, block, arguments, result, input_rows_count); }; @@ -2166,7 +2166,7 @@ class FunctionCast final : public IFunctionBase { auto kv_wrappers = get_element_wrappers(context, from_kv_types, to_kv_types); return [kv_wrappers, from_kv_types, to_kv_types]( FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t /*input_rows_count*/) -> Status { + const uint32_t result, size_t /*input_rows_count*/) -> Status { auto& from_column = block.get_by_position(arguments.front()).column; auto from_col_map = check_and_get_column(from_column.get()); if (!from_col_map) { @@ -2241,7 +2241,7 @@ class FunctionCast final : public IFunctionBase { auto element_wrappers = get_element_wrappers(context, from_element_types, to_element_types); return [element_wrappers, from_element_types, to_element_types]( FunctionContext* context, Block& block, const ColumnNumbers& arguments, - const size_t result, size_t /*input_rows_count*/) -> Status { + const uint32_t result, size_t /*input_rows_count*/) -> Status { auto& from_column = block.get_by_position(arguments.front()).column; auto from_col_struct = check_and_get_column(from_column.get()); if (!from_col_struct) { @@ -2282,7 +2282,7 @@ class FunctionCast final : public IFunctionBase { } return [](FunctionContext* context, Block& block, const ColumnNumbers&, - const size_t result, size_t input_rows_count) { + const uint32_t result, size_t input_rows_count) { auto& res = block.get_by_position(result); res.column = res.type->create_column_const_with_default_value(input_rows_count) ->convert_to_full_column_if_const(); @@ -2391,7 +2391,7 @@ class FunctionCast final : public IFunctionBase { if (result_is_nullable) { return [this, from_type, to_type](FunctionContext* context, Block& block, - const ColumnNumbers& arguments, const size_t result, + const ColumnNumbers& arguments, const uint32_t result, size_t input_rows_count) { auto from_type_not_nullable = remove_nullable(from_type); auto to_type_not_nullable = remove_nullable(to_type); diff --git a/be/src/vec/functions/function_coalesce.cpp b/be/src/vec/functions/function_coalesce.cpp index d3450e97e988574..6e5db15d160c065 100644 --- a/be/src/vec/functions/function_coalesce.cpp +++ b/be/src/vec/functions/function_coalesce.cpp @@ -56,7 +56,6 @@ class FunctionCoalesce : public IFunction { public: static constexpr auto name = "coalesce"; - mutable DataTypePtr result_type; mutable FunctionBasePtr func_is_not_null; static FunctionPtr create() { return std::make_shared(); } @@ -70,26 +69,25 @@ class FunctionCoalesce : public IFunction { size_t get_number_of_arguments() const override { return 0; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DataTypePtr res; for (const auto& arg : arguments) { if (!arg->is_nullable()) { - result_type = arg; + res = arg; break; } } - result_type = result_type ? result_type : arguments[0]; - return result_type; + res = res ? res : arguments[0]; + + const ColumnsWithTypeAndName is_not_null_col {{nullptr, make_nullable(res), ""}}; + func_is_not_null = SimpleFunctionFactory::instance().get_function( + "is_not_null_pred", is_not_null_col, std::make_shared()); + + return res; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { - if (!func_is_not_null) [[unlikely]] { - const ColumnsWithTypeAndName is_not_null_col { - {nullptr, make_nullable(result_type), ""}}; - func_is_not_null = SimpleFunctionFactory::instance().get_function( - "is_not_null_pred", is_not_null_col, std::make_shared(), - {.enable_decimal256 = context->state()->enable_decimal256()}); - } + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 1); DataTypePtr result_type = block.get_by_position(result).type; ColumnNumbers filtered_args; diff --git a/be/src/vec/functions/function_collection_in.h b/be/src/vec/functions/function_collection_in.h index 33a4a2570800a97..ce58d63f44b6555 100644 --- a/be/src/vec/functions/function_collection_in.h +++ b/be/src/vec/functions/function_collection_in.h @@ -41,6 +41,7 @@ #include "vec/functions/function.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" struct ColumnRowRef { ENABLE_FACTORY_CREATOR(ColumnRowRef); ColumnPtr column; @@ -128,7 +129,7 @@ class FunctionCollectionIn : public IFunction { } ColumnPtr column_ptr = std::move(args_column_ptr); // make collection ref into set - int col_size = column_ptr->size(); + auto col_size = column_ptr->size(); for (size_t i = 0; i < col_size; i++) { state->args_set.insert({column_ptr, i}); } @@ -137,7 +138,7 @@ class FunctionCollectionIn : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto in_state = reinterpret_cast( context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (!in_state) { @@ -191,3 +192,5 @@ class FunctionCollectionIn : public IFunction { }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/functions/function_const.h b/be/src/vec/functions/function_const.h index 0ce24c85dcb0e82..1cc97170ac13920 100644 --- a/be/src/vec/functions/function_const.h +++ b/be/src/vec/functions/function_const.h @@ -44,7 +44,7 @@ class FunctionConst : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { block.get_by_position(result).column = block.get_by_position(result).type->create_column_const(input_rows_count, Impl::init_value()); @@ -66,7 +66,7 @@ class FunctionConst : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto column = Impl::ReturnColVec::create(); column->get_data().emplace_back(Impl::init_value()); block.replace_by_position(result, ColumnConst::create(std::move(column), input_rows_count)); @@ -90,7 +90,7 @@ class FunctionMathConstFloat64 : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { block.get_by_position(result).column = block.get_by_position(result).type->create_column_const( input_rows_count == 0 ? 1 : input_rows_count, Impl::value); diff --git a/be/src/vec/functions/function_conv.cpp b/be/src/vec/functions/function_conv.cpp index 3dbfd81e8a2a53f..085d982a1cbf1ec 100644 --- a/be/src/vec/functions/function_conv.cpp +++ b/be/src/vec/functions/function_conv.cpp @@ -49,6 +49,7 @@ #include "vec/functions/simple_function_factory.h" namespace doris { +#include "common/compile_check_begin.h" class FunctionContext; } // namespace doris @@ -73,7 +74,7 @@ class FunctionConv : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto result_column = ColumnString::create(); auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index d0a600a9e41a861..962d3f598824b4f 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -52,6 +52,7 @@ #include "vec/functions/function.h" #include "vec/runtime/vdatetime_value.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" struct ConvertTzState { bool use_state = false; @@ -144,7 +145,7 @@ class FunctionConvertTZ : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto* convert_tz_state = reinterpret_cast( context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (!convert_tz_state) { @@ -215,7 +216,7 @@ class FunctionConvertTZ : public IFunction { NullMap& result_null_map, size_t input_rows_count) { cctz::time_zone& from_tz = convert_tz_state->from_tz; cctz::time_zone& to_tz = convert_tz_state->to_tz; - auto push_null = [&](int row) { + auto push_null = [&](size_t row) { result_null_map[row] = true; result_column->insert_default(); }; @@ -310,3 +311,5 @@ class FunctionConvertTZ : public IFunction { }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h index ac18965749eb8e8..2dd71ec31be81fa 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.h +++ b/be/src/vec/functions/function_date_or_datetime_computation.h @@ -27,6 +27,7 @@ #include #include +#include "common/cast_set.h" #include "common/compiler_util.h" #include "common/exception.h" #include "common/logging.h" @@ -535,7 +536,7 @@ struct DateTimeOp { template struct DateTimeAddIntervalImpl { - static Status execute(Block& block, const ColumnNumbers& arguments, size_t result, + static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { using ToType = typename Transform::ReturnType::FieldType; using Op = DateTimeOp; @@ -645,7 +646,7 @@ struct DateTimeAddIntervalImpl { col_to->get_data(), null_map->get_data(), delta_vec_column->get_data()); } else { - Op::constant_vector(sources_const->template get_value(), + Op::constant_vector(sources_const->template get_value(), col_to->get_data(), null_map->get_data(), *not_nullable_column_ptr_arg1); } @@ -675,7 +676,7 @@ struct DateTimeAddIntervalImpl { Op::constant_vector(sources_const->template get_value(), col_to->get_data(), delta_vec_column->get_data()); } else { - Op::constant_vector(sources_const->template get_value(), + Op::constant_vector(sources_const->template get_value(), col_to->get_data(), *block.get_by_position(arguments[1]).column); } @@ -744,7 +745,7 @@ class FunctionDateOrDateTimeComputation : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& first_arg_type = block.get_by_position(arguments[0]).type; const auto& second_arg_type = block.get_by_position(arguments[1]).type; WhichDataType which1(remove_nullable(first_arg_type)); @@ -822,7 +823,7 @@ class FunctionCurrentDateOrDateTime : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return FunctionImpl::execute(context, block, arguments, result, input_rows_count); } }; @@ -841,7 +842,7 @@ struct CurrentDateTimeImpl { } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { WhichDataType which(remove_nullable(block.get_by_position(result).type)); if constexpr (WithPrecision) { DCHECK(which.is_date_time_v2() || which.is_date_v2()); @@ -868,7 +869,7 @@ struct CurrentDateTimeImpl { template static Status executeImpl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto col_to = ColumnVector::create(); DateValueType dtv; @@ -876,7 +877,7 @@ struct CurrentDateTimeImpl { if constexpr (WithPrecision) { if (const auto* const_column = check_and_get_column( block.get_by_position(arguments[0]).column)) { - int scale = const_column->get_int(0); + int64_t scale = const_column->get_int(0); dtv.from_unixtime(context->state()->timestamp_ms() / 1000, context->state()->nano_seconds(), context->state()->timezone_obj(), scale); @@ -956,7 +957,7 @@ struct CurrentDateImpl { using ReturnType = DateType; static constexpr auto name = FunctionName::name; static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { auto col_to = ColumnVector::create(); if constexpr (std::is_same_v) { DateV2Value dtv; @@ -987,7 +988,7 @@ struct CurrentTimeImpl { using ReturnType = DataTypeTimeV2; static constexpr auto name = FunctionName::name; static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { auto col_to = ColumnFloat64::create(); VecDateTimeValue dtv; dtv.from_unixtime(context->state()->timestamp_ms() / 1000, @@ -1002,17 +1003,19 @@ struct CurrentTimeImpl { }; struct TimeToSecImpl { + // rethink the func should return int32 using ReturnType = DataTypeInt32; static constexpr auto name = "time_to_sec"; static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { auto res_col = ColumnInt32::create(input_rows_count); const auto& arg_col = block.get_by_position(arguments[0]).column; const auto& column_data = assert_cast(*arg_col); auto& res_data = res_col->get_data(); for (int i = 0; i < input_rows_count; ++i) { - res_data[i] = static_cast(column_data.get_element(i)) / (1000 * 1000); + res_data[i] = + cast_set(static_cast(column_data.get_element(i)) / (1000 * 1000)); } block.replace_by_position(result, std::move(res_col)); @@ -1024,7 +1027,7 @@ struct SecToTimeImpl { using ReturnType = DataTypeTimeV2; static constexpr auto name = "sec_to_time"; static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { const auto& arg_col = block.get_by_position(arguments[0]).column; const auto& column_data = assert_cast(*arg_col); @@ -1066,7 +1069,7 @@ struct TimestampToDateTime : IFunction { static FunctionPtr create() { return std::make_shared>(); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& arg_col = block.get_by_position(arguments[0]).column; const auto& column_data = assert_cast(*arg_col); auto res_col = ColumnUInt64::create(); @@ -1105,7 +1108,7 @@ struct UtcTimestampImpl { using ReturnType = DataTypeDateTime; static constexpr auto name = "utc_timestamp"; static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { WhichDataType which(remove_nullable(block.get_by_position(result).type)); if (which.is_date_time_v2()) { return executeImpl, UInt64>(context, block, result, @@ -1119,7 +1122,7 @@ struct UtcTimestampImpl { } template - static Status executeImpl(FunctionContext* context, Block& block, size_t result, + static Status executeImpl(FunctionContext* context, Block& block, uint32_t result, size_t input_rows_count) { auto col_to = ColumnVector::create(); DateValueType dtv; diff --git a/be/src/vec/functions/function_date_or_datetime_to_something.h b/be/src/vec/functions/function_date_or_datetime_to_something.h index 4bea968a62c53b2..2bc96cc7e937d76 100644 --- a/be/src/vec/functions/function_date_or_datetime_to_something.h +++ b/be/src/vec/functions/function_date_or_datetime_to_something.h @@ -94,7 +94,7 @@ class FunctionDateOrDateTimeToSomething : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); diff --git a/be/src/vec/functions/function_date_or_datetime_to_string.h b/be/src/vec/functions/function_date_or_datetime_to_string.h index 6ffb67f9c349a8b..14e8335388b2dca 100644 --- a/be/src/vec/functions/function_date_or_datetime_to_string.h +++ b/be/src/vec/functions/function_date_or_datetime_to_string.h @@ -80,7 +80,7 @@ class FunctionDateOrDateTimeToString : public IFunction { ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr source_col = block.get_by_position(arguments[0]).column; const auto is_nullable = block.get_by_position(result).type->is_nullable(); const auto* sources = check_and_get_column>( diff --git a/be/src/vec/functions/function_datetime_floor_ceil.cpp b/be/src/vec/functions/function_datetime_floor_ceil.cpp index bf74deaed1daeb5..376cf83b0911100 100644 --- a/be/src/vec/functions/function_datetime_floor_ceil.cpp +++ b/be/src/vec/functions/function_datetime_floor_ceil.cpp @@ -51,6 +51,7 @@ #include "vec/runtime/vdatetime_value.h" namespace doris { +#include "common/compile_check_begin.h" class FunctionContext; namespace vectorized { @@ -73,7 +74,7 @@ struct YearFloor; namespace doris::vectorized { -template +template class FunctionDateTimeFloorCeil : public IFunction { public: using ReturnDataType = std::conditional_t< @@ -84,7 +85,7 @@ class FunctionDateTimeFloorCeil : public IFunction { std::is_same_v, Int64, std::conditional_t>, UInt32, UInt64>>; - using DeltaDataType = DataTypeNumber; // int32/64 + using DeltaDataType = DataTypeNumber; // int32/64 static constexpr auto name = Impl::name; static FunctionPtr create() { return std::make_shared(); } @@ -140,7 +141,7 @@ class FunctionDateTimeFloorCeil : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr source_col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); if (const auto* sources = @@ -166,7 +167,7 @@ class FunctionDateTimeFloorCeil : public IFunction { col_to->get_data(), null_map->get_data()); } else { // time_round(datetime,const(period)) - Impl::template vector_constant_delta( + Impl::template vector_constant_delta( sources->get_data(), delta_const_column->get_field().get(), col_to->get_data(), null_map->get_data()); } @@ -178,7 +179,7 @@ class FunctionDateTimeFloorCeil : public IFunction { col_to->get_data(), null_map->get_data()); } else { const auto* delta_vec_column1 = - check_and_get_column>(delta_column); + check_and_get_column>(delta_column); DCHECK(delta_vec_column1 != nullptr); // time_round(datetime, period) Impl::vector_vector(sources->get_data(), delta_vec_column1->get_data(), @@ -197,7 +198,7 @@ class FunctionDateTimeFloorCeil : public IFunction { arg1_col->get(0, arg1); arg2_col->get(0, arg2); // time_round(datetime,const(period) , const(origin)) - Impl::template vector_const_const( + Impl::template vector_const_const( sources->get_data(), arg1.get(), arg2.get(), col_to->get_data(), null_map->get_data()); @@ -207,27 +208,25 @@ class FunctionDateTimeFloorCeil : public IFunction { const auto arg2_column = check_and_get_column>(*arg2_col); // time_round(datetime,const(period) , origin) - Impl::template vector_const_vector( + Impl::template vector_const_vector( sources->get_data(), arg1.get(), arg2_column->get_data(), col_to->get_data(), null_map->get_data()); } else if (!arg1_const && arg2_const) { Field arg2; arg2_col->get(0, arg2); - const auto arg1_column = - check_and_get_column>(*arg1_col); + const auto arg1_column = check_and_get_column>(*arg1_col); // time_round(datetime, period , const(origin)) - Impl::template vector_vector_const( + Impl::template vector_vector_const( sources->get_data(), arg1_column->get_data(), arg2.get(), col_to->get_data(), null_map->get_data()); } else { - const auto arg1_column = - check_and_get_column>(*arg1_col); + const auto arg1_column = check_and_get_column>(*arg1_col); const auto arg2_column = check_and_get_column>(*arg2_col); DCHECK(arg1_column != nullptr); DCHECK(arg2_column != nullptr); // time_round(datetime, period, origin) - Impl::template vector_vector( + Impl::template vector_vector( sources->get_data(), arg1_column->get_data(), arg2_column->get_data(), col_to->get_data(), null_map->get_data()); } @@ -289,8 +288,8 @@ struct FloorCeilImpl { } } - template - static void vector_constant_delta(const PaddedPODArray& dates, DeltaType period, + template + static void vector_constant_delta(const PaddedPODArray& dates, Int32 period, PaddedPODArray& res, NullMap& null_map) { // time_round(datetime,const(period)) if (period < 1) { @@ -313,7 +312,7 @@ struct FloorCeilImpl { } } - template + template static void vector_const_const_with_constant_optimization( const PaddedPODArray& dates, NativeType origin_date, PaddedPODArray& res, NullMap& null_map) { @@ -333,8 +332,8 @@ struct FloorCeilImpl { } } } - template - static void vector_const_const(const PaddedPODArray& dates, const DeltaType period, + template + static void vector_const_const(const PaddedPODArray& dates, const Int32 period, NativeType origin_date, PaddedPODArray& res, NullMap& null_map) { if (period < 1) { @@ -343,63 +342,63 @@ struct FloorCeilImpl { } switch (period) { case 1: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 2: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 3: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 4: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 5: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 6: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 7: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 8: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 9: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 10: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 11: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } case 12: { - vector_const_const_with_constant_optimization( - dates, origin_date, res, null_map); + vector_const_const_with_constant_optimization(dates, origin_date, res, + null_map); break; } default: @@ -420,8 +419,8 @@ struct FloorCeilImpl { } } - template - static void vector_const_vector(const PaddedPODArray& dates, const DeltaType period, + template + static void vector_const_vector(const PaddedPODArray& dates, const Int32 period, const PaddedPODArray& origin_dates, PaddedPODArray& res, NullMap& null_map) { if (period < 1) { @@ -492,10 +491,10 @@ struct FloorCeilImpl { } } - template + template static void vector_vector(const PaddedPODArray& dates, - const PaddedPODArray& periods, - PaddedPODArray& res, NullMap& null_map) { + const PaddedPODArray& periods, PaddedPODArray& res, + NullMap& null_map) { // time_round(datetime, period) for (int i = 0; i < dates.size(); ++i) { if (periods[i] < 1) { @@ -517,9 +516,9 @@ struct FloorCeilImpl { } } - template + template static void vector_vector(const PaddedPODArray& dates, - const PaddedPODArray& periods, + const PaddedPODArray& periods, const PaddedPODArray& origin_dates, PaddedPODArray& res, NullMap& null_map) { // time_round(datetime, period, origin) @@ -931,53 +930,51 @@ struct TimeRound { } }; -#define TIME_ROUND_WITH_DELTA_TYPE(CLASS, NAME, UNIT, TYPE, DELTA) \ - using FunctionOneArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, VecDateTimeValue, DELTA, 1, \ - false>; \ - using FunctionTwoArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, VecDateTimeValue, DELTA, 2, \ - false>; \ - using FunctionThreeArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, VecDateTimeValue, DELTA, 3, \ - false>; \ - using FunctionDateV2OneArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, \ - DateV2Value, DELTA, 1, false>; \ - using FunctionDateV2TwoArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, \ - DateV2Value, DELTA, 2, false>; \ - using FunctionDateV2ThreeArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, \ - DateV2Value, DELTA, 3, false>; \ - using FunctionDateTimeV2OneArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, \ - DateV2Value, DELTA, 1, false>; \ - using FunctionDateTimeV2TwoArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, \ - DateV2Value, DELTA, 2, false>; \ - using FunctionDateTimeV2ThreeArg##CLASS##DELTA = \ - FunctionDateTimeFloorCeil>, \ - DateV2Value, DELTA, 3, false>; +#define TIME_ROUND_WITH_DELTA_TYPE(CLASS, NAME, UNIT, TYPE, DELTA) \ + using FunctionOneArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, VecDateTimeValue, 1, \ + false>; \ + using FunctionTwoArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, VecDateTimeValue, 2, \ + false>; \ + using FunctionThreeArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, VecDateTimeValue, 3, \ + false>; \ + using FunctionDateV2OneArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, \ + DateV2Value, 1, false>; \ + using FunctionDateV2TwoArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, \ + DateV2Value, 2, false>; \ + using FunctionDateV2ThreeArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, \ + DateV2Value, 3, false>; \ + using FunctionDateTimeV2OneArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, \ + DateV2Value, 1, false>; \ + using FunctionDateTimeV2TwoArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, \ + DateV2Value, 2, false>; \ + using FunctionDateTimeV2ThreeArg##CLASS##DELTA = \ + FunctionDateTimeFloorCeil>, \ + DateV2Value, 3, false>; -#define TIME_ROUND(CLASS, NAME, UNIT, TYPE) \ - struct CLASS { \ - static constexpr auto name = #NAME; \ - static constexpr TimeUnit Unit = UNIT; \ - static constexpr auto Type = TYPE; \ - }; \ - \ - TIME_ROUND_WITH_DELTA_TYPE(CLASS, NAME, UNIT, TYPE, Int32) \ - TIME_ROUND_WITH_DELTA_TYPE(CLASS, NAME, UNIT, TYPE, Int64) \ - using FunctionDateTimeV2TwoArg##CLASS = \ - FunctionDateTimeFloorCeil>, \ - DateV2Value, Int32, 2, true>; \ - using FunctionDateV2TwoArg##CLASS = \ - FunctionDateTimeFloorCeil>, \ - DateV2Value, Int32, 2, true>; \ - using FunctionDateTimeTwoArg##CLASS = \ - FunctionDateTimeFloorCeil>, VecDateTimeValue, Int32, 2, \ - true>; +#define TIME_ROUND(CLASS, NAME, UNIT, TYPE) \ + struct CLASS { \ + static constexpr auto name = #NAME; \ + static constexpr TimeUnit Unit = UNIT; \ + static constexpr auto Type = TYPE; \ + }; \ + \ + TIME_ROUND_WITH_DELTA_TYPE(CLASS, NAME, UNIT, TYPE, Int32) \ + using FunctionDateTimeV2TwoArg##CLASS = \ + FunctionDateTimeFloorCeil>, \ + DateV2Value, 2, true>; \ + using FunctionDateV2TwoArg##CLASS = \ + FunctionDateTimeFloorCeil>, \ + DateV2Value, 2, true>; \ + using FunctionDateTimeTwoArg##CLASS = \ + FunctionDateTimeFloorCeil>, VecDateTimeValue, 2, true>; TIME_ROUND(YearFloor, year_floor, YEAR, FLOOR); TIME_ROUND(MonthFloor, month_floor, MONTH, FLOOR); @@ -1010,9 +1007,7 @@ void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory) { factory.register_function(); \ factory.register_function(); -#define REGISTER_FUNC(CLASS) \ - REGISTER_FUNC_WITH_DELTA_TYPE(CLASS, Int32) \ - REGISTER_FUNC_WITH_DELTA_TYPE(CLASS, Int64) +#define REGISTER_FUNC(CLASS) REGISTER_FUNC_WITH_DELTA_TYPE(CLASS, Int32) REGISTER_FUNC(YearFloor); REGISTER_FUNC(MonthFloor); diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h index 80fe6cf1f4174bf..5dfa32e0c9fac39 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.h +++ b/be/src/vec/functions/function_datetime_string_to_string.h @@ -23,6 +23,7 @@ #include #include +#include "common/cast_set.h" #include "common/status.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" @@ -46,6 +47,7 @@ #include "vec/runtime/vdatetime_value.h" namespace doris { +#include "common/compile_check_begin.h" class FunctionContext; } // namespace doris @@ -128,7 +130,7 @@ class FunctionDateTimeStringToString : public IFunction { ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr source_col = block.get_by_position(arguments[0]).column; const auto* nullable_column = check_and_get_column(source_col.get()); @@ -189,7 +191,7 @@ class FunctionDateTimeStringToString : public IFunction { for (int i = 0; i < len; ++i) { null_map[i] = Transform::template execute( ts[i], format, res_data, offset, context->state()->timezone_obj()); - res_offsets[i] = offset; + res_offsets[i] = cast_set(offset); } res_data.resize(offset); }, @@ -199,3 +201,5 @@ class FunctionDateTimeStringToString : public IFunction { }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/functions/function_decode_varchar.cpp b/be/src/vec/functions/function_decode_varchar.cpp index 59f7ecfac047739..be41df0e08232bc 100644 --- a/be/src/vec/functions/function_decode_varchar.cpp +++ b/be/src/vec/functions/function_decode_varchar.cpp @@ -74,7 +74,7 @@ class FunctionDecodeAsVarchar : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnVector* col_source = assert_cast*>( block.get_by_position(arguments[0]).column.get()); diff --git a/be/src/vec/functions/function_encode_varchar.cpp b/be/src/vec/functions/function_encode_varchar.cpp index 2e53a511c5ba4d9..1edb80bfd216f8e 100644 --- a/be/src/vec/functions/function_encode_varchar.cpp +++ b/be/src/vec/functions/function_encode_varchar.cpp @@ -66,7 +66,7 @@ class FunctionEncodeVarchar : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnString* col_str = assert_cast(block.get_by_position(arguments[0]).column.get()); diff --git a/be/src/vec/functions/function_encryption.cpp b/be/src/vec/functions/function_encryption.cpp index 9aaefc26a652ccb..8376058fea724f2 100644 --- a/be/src/vec/functions/function_encryption.cpp +++ b/be/src/vec/functions/function_encryption.cpp @@ -15,32 +15,25 @@ // specific language governing permissions and limitations // under the License. -#include -#include - #include +#include +#include #include #include #include #include #include +#include "common/cast_set.h" #include "common/status.h" #include "util/encryption_util.h" -#include "util/string_util.h" -#include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" -#include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" -#include "vec/common/pod_array.h" #include "vec/common/string_ref.h" #include "vec/core/block.h" -#include "vec/core/column_numbers.h" -#include "vec/core/column_with_type_and_name.h" -#include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_string.h" @@ -50,6 +43,7 @@ #include "vec/utils/util.hpp" namespace doris { +#include "common/compile_check_begin.h" class FunctionContext; } // namespace doris @@ -111,7 +105,7 @@ class FunctionEncryptionAndDecrypt : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl_inner(context, block, arguments, result, input_rows_count); } }; @@ -136,9 +130,9 @@ void execute_result_vector(std::vector& offsets_li template void execute_result_const(const ColumnString::Offsets* offsets_column, const ColumnString::Chars* chars_column, StringRef key_arg, size_t i, - EncryptionMode& encryption_mode, const char* iv_raw, int iv_length, + EncryptionMode& encryption_mode, const char* iv_raw, size_t iv_length, ColumnString::Chars& result_data, ColumnString::Offsets& result_offset, - NullMap& null_map, const char* aad, int aad_length) { + NullMap& null_map, const char* aad, size_t aad_length) { int src_size = (*offsets_column)[i] - (*offsets_column)[i - 1]; const auto* src_raw = reinterpret_cast(&(*chars_column)[(*offsets_column)[i - 1]]); execute_result(src_raw, src_size, key_arg.data, key_arg.size, i, @@ -147,15 +141,15 @@ void execute_result_const(const ColumnString::Offsets* offsets_column, } template -void execute_result(const char* src_raw, int src_size, const char* key_raw, int key_size, size_t i, - EncryptionMode& encryption_mode, const char* iv_raw, int iv_length, +void execute_result(const char* src_raw, size_t src_size, const char* key_raw, size_t key_size, + size_t i, EncryptionMode& encryption_mode, const char* iv_raw, size_t iv_length, ColumnString::Chars& result_data, ColumnString::Offsets& result_offset, - NullMap& null_map, const char* aad, int aad_length) { + NullMap& null_map, const char* aad, size_t aad_length) { if (src_size == 0) { StringOP::push_null_string(i, result_data, result_offset, null_map); return; } - int cipher_len = src_size; + auto cipher_len = src_size; if constexpr (is_encrypt) { cipher_len += 16; // for output AEAD tag @@ -187,7 +181,7 @@ struct EncryptionAndDecryptTwoImpl { } static Status execute_impl_inner(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto result_column = ColumnString::create(); auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); @@ -298,7 +292,7 @@ struct EncryptionAndDecryptMultiImpl { } static Status execute_impl_inner(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto result_column = ColumnString::create(); auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); @@ -438,22 +432,25 @@ struct EncryptionAndDecryptMultiImpl { }; struct EncryptImpl { - static int execute_impl(EncryptionMode mode, const unsigned char* source, - uint32_t source_length, const unsigned char* key, uint32_t key_length, - const char* iv, int iv_length, bool padding, unsigned char* encrypt, - const unsigned char* aad, int aad_length) { - return EncryptionUtil::encrypt(mode, source, source_length, key, key_length, iv, iv_length, - true, encrypt, aad, aad_length); + static int execute_impl(EncryptionMode mode, const unsigned char* source, size_t source_length, + const unsigned char* key, size_t key_length, const char* iv, + size_t iv_length, bool padding, unsigned char* encrypt, + const unsigned char* aad, size_t aad_length) { + // now the openssl only support int, so here we need to cast size_t to uint32_t + return EncryptionUtil::encrypt(mode, source, cast_set(source_length), key, + cast_set(key_length), iv, cast_set(iv_length), + true, encrypt, aad, cast_set(aad_length)); } }; struct DecryptImpl { - static int execute_impl(EncryptionMode mode, const unsigned char* source, - uint32_t source_length, const unsigned char* key, uint32_t key_length, - const char* iv, int iv_length, bool padding, unsigned char* encrypt, - const unsigned char* aad, int aad_length) { - return EncryptionUtil::decrypt(mode, source, source_length, key, key_length, iv, iv_length, - true, encrypt, aad, aad_length); + static int execute_impl(EncryptionMode mode, const unsigned char* source, size_t source_length, + const unsigned char* key, size_t key_length, const char* iv, + size_t iv_length, bool padding, unsigned char* encrypt, + const unsigned char* aad, size_t aad_length) { + return EncryptionUtil::decrypt(mode, source, cast_set(source_length), key, + cast_set(key_length), iv, cast_set(iv_length), + true, encrypt, aad, cast_set(aad_length)); } }; diff --git a/be/src/vec/functions/function_fake.cpp b/be/src/vec/functions/function_fake.cpp index 6a4f6275e1a89b8..646da600b50c13a 100644 --- a/be/src/vec/functions/function_fake.cpp +++ b/be/src/vec/functions/function_fake.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -83,6 +84,25 @@ struct FunctionExplodeMap { static std::string get_error_msg() { return "Fake function do not support execute"; } }; +template +struct FunctionPoseExplode { + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + DCHECK(is_array(arguments[0])) << arguments[0]->get_name() << " not supported"; + DataTypes fieldTypes(2); + fieldTypes[0] = make_nullable(std::make_shared()); + fieldTypes[1] = + check_and_get_data_type(arguments[0].get())->get_nested_type(); + auto struct_type = std::make_shared(fieldTypes); + if constexpr (AlwaysNullable) { + return make_nullable(struct_type); + } else { + return arguments[0]->is_nullable() ? make_nullable(struct_type) : struct_type; + } + } + static DataTypes get_variadic_argument_types() { return {}; } + static std::string get_error_msg() { return "Fake function do not support execute"; } +}; + // explode json-object: expands json-object to struct with a pair of key and value in column string struct FunctionExplodeJsonObject { static DataTypePtr get_return_type_impl(const DataTypes& arguments) { @@ -138,6 +158,12 @@ void register_table_function_expand_outer_default(SimpleFunctionFactory& factory COMBINATOR_SUFFIX_OUTER); }; +template +void register_table_function_with_impl(SimpleFunctionFactory& factory, const std::string& name, + const std::string& suffix = "") { + factory.register_function>(name + suffix); +}; + void register_function_fake(SimpleFunctionFactory& factory) { register_function(factory, "esquery"); @@ -158,6 +184,9 @@ void register_function_fake(SimpleFunctionFactory& factory) { register_table_function_expand_outer_default( factory, "explode_json_array_double"); register_table_function_expand_outer_default(factory, "explode_bitmap"); + register_table_function_with_impl>(factory, "posexplode"); + register_table_function_with_impl>(factory, "posexplode", + COMBINATOR_SUFFIX_OUTER); register_table_function_expand_outer_default(factory, "explode_variant_array"); } diff --git a/be/src/vec/functions/function_fake.h b/be/src/vec/functions/function_fake.h index d180cbf3270d4b0..dabb5eb039afb7d 100644 --- a/be/src/vec/functions/function_fake.h +++ b/be/src/vec/functions/function_fake.h @@ -79,7 +79,7 @@ class FunctionFake : public IFunction { bool use_default_implementation_for_constants() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Status::NotSupported(Impl::get_error_msg()); } }; diff --git a/be/src/vec/functions/function_grouping.h b/be/src/vec/functions/function_grouping.h index 0dbd50c8b947f43..0917b4d1db89ecb 100644 --- a/be/src/vec/functions/function_grouping.h +++ b/be/src/vec/functions/function_grouping.h @@ -53,7 +53,7 @@ class FunctionGroupingBase : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnWithTypeAndName& src_column = block.get_by_position(arguments[0]); DCHECK(src_column.column->size() == input_rows_count); // result of functions grouping and grouping_id is always not nullable, diff --git a/be/src/vec/functions/function_hash.cpp b/be/src/vec/functions/function_hash.cpp index 972d2eb0b9d8a19..a4648e54dfd5126 100644 --- a/be/src/vec/functions/function_hash.cpp +++ b/be/src/vec/functions/function_hash.cpp @@ -38,6 +38,7 @@ #include "vec/utils/template_helpers.hpp" namespace doris::vectorized { +#include "common/compile_check_begin.h" constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c; template diff --git a/be/src/vec/functions/function_helpers.cpp b/be/src/vec/functions/function_helpers.cpp index ea3d98511b0624d..03df6e3d99dd6dd 100644 --- a/be/src/vec/functions/function_helpers.cpp +++ b/be/src/vec/functions/function_helpers.cpp @@ -119,7 +119,7 @@ std::tuple create_block_with_nested_columns( } std::tuple create_block_with_nested_columns( - const Block& block, const ColumnNumbers& args, size_t result, + const Block& block, const ColumnNumbers& args, uint32_t result, bool need_replace_null_data_to_default) { auto [res, res_args] = create_block_with_nested_columns(block, args, true, need_replace_null_data_to_default); diff --git a/be/src/vec/functions/function_helpers.h b/be/src/vec/functions/function_helpers.h index 28f79a8d0fb1930..c439f601f936f9a 100644 --- a/be/src/vec/functions/function_helpers.h +++ b/be/src/vec/functions/function_helpers.h @@ -103,7 +103,7 @@ std::tuple create_block_with_nested_columns( // Same as above and return the new_res loc in tuple std::tuple create_block_with_nested_columns( - const Block& block, const ColumnNumbers& args, size_t result, + const Block& block, const ColumnNumbers& args, uint32_t result, bool need_replace_null_data_to_default = false); /// Checks argument type at specified index with predicate. diff --git a/be/src/vec/functions/function_hex.cpp b/be/src/vec/functions/function_hex.cpp index f66849b93363352..5ba46ad4d74803d 100644 --- a/be/src/vec/functions/function_hex.cpp +++ b/be/src/vec/functions/function_hex.cpp @@ -25,6 +25,7 @@ #include #include +#include "common/cast_set.h" #include "common/status.h" #include "olap/hll.h" #include "util/simd/vstring_function.h" //place this header file at last to compile @@ -46,6 +47,7 @@ #include "vec/functions/simple_function_factory.h" namespace doris { +#include "common/compile_check_begin.h" class FunctionContext; } // namespace doris @@ -70,7 +72,7 @@ class FunctionHexVariadic : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr& argument_column = block.get_by_position(arguments[0]).column; auto result_data_column = ColumnString::create(); @@ -111,7 +113,7 @@ struct HexStringImpl { auto source = reinterpret_cast(&data[offsets[i - 1]]); size_t srclen = offsets[i] - offsets[i - 1]; hex_encode(source, srclen, dst_data_ptr, offset); - dst_offsets[i] = offset; + dst_offsets[i] = cast_set(offset); } return Status::OK(); } @@ -184,7 +186,7 @@ struct HexHLLImpl { dst_data_ptr = res_data.data() + offset; hex_encode(reinterpret_cast(hll_str.data()), hll_str.length(), dst_data_ptr, offset); - res_offsets[i] = offset; + res_offsets[i] = cast_set(offset); hll_str.clear(); } return Status::OK(); diff --git a/be/src/vec/functions/function_hll.cpp b/be/src/vec/functions/function_hll.cpp index a6b91e27c2dd1f7..1040d7ea33d71b1 100644 --- a/be/src/vec/functions/function_hll.cpp +++ b/be/src/vec/functions/function_hll.cpp @@ -22,6 +22,7 @@ #include #include +#include "common/cast_set.h" #include "common/status.h" #include "olap/hll.h" #include "util/hash_util.hpp" @@ -47,6 +48,7 @@ #include "vec/functions/simple_function_factory.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" struct HLLCardinality { static constexpr auto name = "hll_cardinality"; @@ -97,7 +99,7 @@ class FunctionHLL : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto column = block.get_by_position(arguments[0]).column; MutableColumnPtr column_result = get_return_type_impl({})->create_column(); @@ -153,7 +155,7 @@ class FunctionHllFromBase64 : public IFunction { bool use_default_implementation_for_nulls() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res_null_map = ColumnUInt8::create(input_rows_count, 0); auto res_data_column = ColumnHLL::create(); auto& null_map = res_null_map->get_data(); @@ -167,8 +169,8 @@ class FunctionHllFromBase64 : public IFunction { res.reserve(input_rows_count); std::string decode_buff; - int last_decode_buff_len = 0; - int curr_decode_buff_len = 0; + int64_t last_decode_buff_len = 0; + int64_t curr_decode_buff_len = 0; for (size_t i = 0; i < input_rows_count; ++i) { const char* src_str = reinterpret_cast(&data[offsets[i - 1]]); int64_t src_size = offsets[i] - offsets[i - 1]; @@ -302,7 +304,7 @@ struct HllToBase64 { DCHECK(outlen > 0); encoded_offset += outlen; - offsets[i] = encoded_offset; + offsets[i] = cast_set(encoded_offset); } return Status::OK(); } diff --git a/be/src/vec/functions/function_ifnull.h b/be/src/vec/functions/function_ifnull.h index 9deb7f8d71f7d5c..9cd1ef5b36e0ca7 100644 --- a/be/src/vec/functions/function_ifnull.h +++ b/be/src/vec/functions/function_ifnull.h @@ -78,7 +78,7 @@ class FunctionIfNull : public IFunction { // ifnull(col_left, col_right) == if(isnull(col_left), col_right, col_left) Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnWithTypeAndName& col_left = block.get_by_position(arguments[0]); if (col_left.column->only_null()) { block.get_by_position(result).column = block.get_by_position(arguments[1]).column; diff --git a/be/src/vec/functions/function_ignore.cpp b/be/src/vec/functions/function_ignore.cpp index 1769dd94fc79e72..5f2c0f7a4181330 100644 --- a/be/src/vec/functions/function_ignore.cpp +++ b/be/src/vec/functions/function_ignore.cpp @@ -37,7 +37,7 @@ class FunctionIgnore : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr col = ColumnBool::create(1, false); block.replace_by_position(result, ColumnConst::create(col, input_rows_count)); return Status::OK(); diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index ddb99d80a1b10b0..69fd2a8914890e0 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -24,6 +24,7 @@ #include #include +#include "common/cast_set.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" @@ -49,6 +50,7 @@ #include "vec/runtime/ip_address_cidr.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" class FunctionIPv4NumToString : public IFunction { private: @@ -75,12 +77,11 @@ class FunctionIPv4NumToString : public IFunction { for (size_t i = 0; i < vec_in.size(); ++i) { auto value = vec_in[i]; if (value < IPV4_MIN_NUM_VALUE || value > IPV4_MAX_NUM_VALUE) { - offsets_res[i] = pos - begin; null_map->get_data()[i] = 1; } else { format_ipv4(reinterpret_cast(&vec_in[i]), src_size, pos); - offsets_res[i] = pos - begin; } + offsets_res[i] = cast_set(pos - begin); } vec_res.resize(pos - begin); @@ -102,7 +103,7 @@ class FunctionIPv4NumToString : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnWithTypeAndName& argument = block.get_by_position(arguments[0]); switch (argument.type->get_type_id()) { @@ -230,7 +231,7 @@ class FunctionIPv4StringToNum : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr column = block.get_by_position(arguments[0]).column; ColumnPtr null_map_column; const NullMap* null_map = nullptr; @@ -283,7 +284,6 @@ void process_ipv6_column(const ColumnPtr& column, size_t input_rows_count, } if (is_empty) { - offsets_res[i] = pos - begin; null_map->get_data()[i] = 1; } else { if constexpr (std::is_same_v) { @@ -296,8 +296,8 @@ void process_ipv6_column(const ColumnPtr& column, size_t input_rows_count, std::reverse(ipv6_address_data, ipv6_address_data + IPV6_BINARY_LENGTH); format_ipv6(ipv6_address_data, pos); } - offsets_res[i] = pos - begin; } + offsets_res[i] = cast_set(pos - begin); } } @@ -315,7 +315,7 @@ class FunctionIPv6NumToString : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr& column = block.get_by_position(arguments[0]).column; auto col_res = ColumnString::create(); @@ -525,7 +525,7 @@ class FunctionIPv6StringToNum : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr column = block.get_by_position(arguments[0]).column; ColumnPtr null_map_column; const NullMap* null_map = nullptr; @@ -566,7 +566,7 @@ class FunctionIsIPString : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& addr_column_with_type_and_name = block.get_by_position(arguments[0]); WhichDataType addr_type(addr_column_with_type_and_name.type); const ColumnPtr& addr_column = addr_column_with_type_and_name.column; @@ -736,7 +736,7 @@ class FunctionIsIPAddressInRange : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DBUG_EXECUTE_IF("ip.inverted_index_filtered", { auto req_id = DebugPoints::instance()->get_debug_param_or_default( "ip.inverted_index_filtered", "req_id", 0); @@ -800,7 +800,7 @@ class FunctionIPv4CIDRToRange : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnWithTypeAndName& ip_column = block.get_by_position(arguments[0]); ColumnWithTypeAndName& cidr_column = block.get_by_position(arguments[1]); @@ -829,7 +829,7 @@ class FunctionIPv4CIDRToRange : public IFunction { throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal cidr value '{}'", std::to_string(cidr)); } - auto range = apply_cidr_mask(ip, cidr); + auto range = apply_cidr_mask(ip, cast_set(cidr)); vec_lower_range_output[i] = range.first; vec_upper_range_output[i] = range.second; } @@ -841,7 +841,7 @@ class FunctionIPv4CIDRToRange : public IFunction { } for (size_t i = 0; i < input_rows_count; ++i) { auto ip = vec_ip_input[i]; - auto range = apply_cidr_mask(ip, cidr); + auto range = apply_cidr_mask(ip, cast_set(cidr)); vec_lower_range_output[i] = range.first; vec_upper_range_output[i] = range.second; } @@ -853,7 +853,7 @@ class FunctionIPv4CIDRToRange : public IFunction { throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal cidr value '{}'", std::to_string(cidr)); } - auto range = apply_cidr_mask(ip, cidr); + auto range = apply_cidr_mask(ip, cast_set(cidr)); vec_lower_range_output[i] = range.first; vec_upper_range_output[i] = range.second; } @@ -882,7 +882,7 @@ class FunctionIPv6CIDRToRange : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& addr_column_with_type_and_name = block.get_by_position(arguments[0]); const auto& cidr_column_with_type_and_name = block.get_by_position(arguments[1]); WhichDataType addr_type(addr_column_with_type_and_name.type); @@ -937,11 +937,13 @@ class FunctionIPv6CIDRToRange : public IFunction { auto* src_data = const_cast(from_column.get_data_at(0).data); std::reverse(src_data, src_data + IPV6_BINARY_LENGTH); apply_cidr_mask(src_data, reinterpret_cast(&vec_res_lower_range[i]), - reinterpret_cast(&vec_res_upper_range[i]), cidr); + reinterpret_cast(&vec_res_upper_range[i]), + cast_set(cidr)); } else { apply_cidr_mask(from_column.get_data_at(0).data, reinterpret_cast(&vec_res_lower_range[i]), - reinterpret_cast(&vec_res_upper_range[i]), cidr); + reinterpret_cast(&vec_res_upper_range[i]), + cast_set(cidr)); } } } else if (is_cidr_const) { @@ -957,11 +959,13 @@ class FunctionIPv6CIDRToRange : public IFunction { auto* src_data = const_cast(from_column.get_data_at(i).data); std::reverse(src_data, src_data + IPV6_BINARY_LENGTH); apply_cidr_mask(src_data, reinterpret_cast(&vec_res_lower_range[i]), - reinterpret_cast(&vec_res_upper_range[i]), cidr); + reinterpret_cast(&vec_res_upper_range[i]), + cast_set(cidr)); } else { apply_cidr_mask(from_column.get_data_at(i).data, reinterpret_cast(&vec_res_lower_range[i]), - reinterpret_cast(&vec_res_upper_range[i]), cidr); + reinterpret_cast(&vec_res_upper_range[i]), + cast_set(cidr)); } } } else { @@ -977,11 +981,13 @@ class FunctionIPv6CIDRToRange : public IFunction { auto* src_data = const_cast(from_column.get_data_at(i).data); std::reverse(src_data, src_data + IPV6_BINARY_LENGTH); apply_cidr_mask(src_data, reinterpret_cast(&vec_res_lower_range[i]), - reinterpret_cast(&vec_res_upper_range[i]), cidr); + reinterpret_cast(&vec_res_upper_range[i]), + cast_set(cidr)); } else { apply_cidr_mask(from_column.get_data_at(i).data, reinterpret_cast(&vec_res_lower_range[i]), - reinterpret_cast(&vec_res_upper_range[i]), cidr); + reinterpret_cast(&vec_res_upper_range[i]), + cast_set(cidr)); } } } @@ -1004,7 +1010,7 @@ class FunctionIsIPv4Compat : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr& column = block.get_by_position(arguments[0]).column; const auto* col_in = assert_cast(column.get()); @@ -1045,7 +1051,7 @@ class FunctionIsIPv4Mapped : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr& column = block.get_by_position(arguments[0]).column; const auto* col_in = assert_cast(column.get()); @@ -1119,7 +1125,7 @@ class FunctionToIP : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& addr_column_with_type_and_name = block.get_by_position(arguments[0]); WhichDataType addr_type(addr_column_with_type_and_name.type); const ColumnPtr& addr_column = addr_column_with_type_and_name.column; @@ -1214,7 +1220,7 @@ class FunctionIPv4ToIPv6 : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& ipv4_column_with_type_and_name = block.get_by_position(arguments[0]); const auto& [ipv4_column, ipv4_const] = unpack_if_const(ipv4_column_with_type_and_name.column); @@ -1254,7 +1260,7 @@ class FunctionCutIPv6 : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& ipv6_column_with_type_and_name = block.get_by_position(arguments[0]); const auto& bytes_to_cut_for_ipv6_column_with_type_and_name = block.get_by_position(arguments[1]); @@ -1315,7 +1321,7 @@ class FunctionCutIPv6 : public IFunction { UInt8 bytes_to_cut_count = is_ipv4_mapped(address) ? bytes_to_cut_for_ipv4_count : bytes_to_cut_for_ipv6_count; cut_address(address, pos, bytes_to_cut_count); - offsets_res[i] = pos - begin; + offsets_res[i] = cast_set(pos - begin); } block.replace_by_position(result, std::move(col_res)); @@ -1335,3 +1341,5 @@ class FunctionCutIPv6 : public IFunction { }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/functions/function_java_udf.cpp b/be/src/vec/functions/function_java_udf.cpp index 86daf5ebf3bb0a8..bea8543d6f062d2 100644 --- a/be/src/vec/functions/function_java_udf.cpp +++ b/be/src/vec/functions/function_java_udf.cpp @@ -42,9 +42,6 @@ JavaFunctionCall::JavaFunctionCall(const TFunction& fn, const DataTypes& argumen Status JavaFunctionCall::open(FunctionContext* context, FunctionContext::FunctionStateScope scope) { JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); - if (env == nullptr) { - return Status::InternalError("Failed to get/create JVM"); - } if (scope == FunctionContext::FunctionStateScope::THREAD_LOCAL) { SCOPED_TIMER(context->get_udf_execute_timer()); @@ -94,7 +91,7 @@ Status JavaFunctionCall::open(FunctionContext* context, FunctionContext::Functio } Status JavaFunctionCall::execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t num_rows) const { JNIEnv* env = nullptr; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); diff --git a/be/src/vec/functions/function_java_udf.h b/be/src/vec/functions/function_java_udf.h index e35fc67881acb5c..dc2d9788e097c54 100644 --- a/be/src/vec/functions/function_java_udf.h +++ b/be/src/vec/functions/function_java_udf.h @@ -44,7 +44,7 @@ namespace doris::vectorized { class JavaUdfPreparedFunction : public PreparedFunctionImpl { public: using execute_call_back = std::function; explicit JavaUdfPreparedFunction(const execute_call_back& func, const std::string& name) @@ -54,7 +54,7 @@ class JavaUdfPreparedFunction : public PreparedFunctionImpl { protected: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return callback_function(context, block, arguments, result, input_rows_count); } @@ -87,7 +87,7 @@ class JavaFunctionCall : public IFunctionBase { const DataTypePtr& get_return_type() const override { return _return_type; } PreparedFunctionPtr prepare(FunctionContext* context, const Block& sample_block, - const ColumnNumbers& arguments, size_t result) const override { + const ColumnNumbers& arguments, uint32_t result) const override { return std::make_shared( [this](auto&& PH1, auto&& PH2, auto&& PH3, auto&& PH4, auto&& PH5) { return JavaFunctionCall::execute_impl( @@ -101,7 +101,7 @@ class JavaFunctionCall : public IFunctionBase { Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const; + uint32_t result, size_t input_rows_count) const; Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 05915db8ec10bf2..6ab03dbbb7b51ca 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -725,7 +725,7 @@ class FunctionJsonAlwaysNotNullable : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto result_column = ColumnString::create(); std::vector column_ptrs; // prevent converted column destruct @@ -977,7 +977,7 @@ class FunctionJson : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto result_column = ColumnString::create(); std::vector column_ptrs; // prevent converted column destruct @@ -1007,7 +1007,7 @@ class FunctionJsonNullable : public IFunction { return make_nullable(std::make_shared()); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto result_column = ColumnString::create(); auto null_map = ColumnUInt8::create(input_rows_count, 0); std::vector data_columns; @@ -1049,7 +1049,7 @@ class FunctionJsonValid : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const IColumn& col_from = *(block.get_by_position(arguments[0]).column); auto null_map = ColumnUInt8::create(input_rows_count, 0); @@ -1168,7 +1168,7 @@ class FunctionJsonContains : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const IColumn& col_json = *(block.get_by_position(arguments[0]).column); const IColumn& col_search = *(block.get_by_position(arguments[1]).column); const IColumn& col_path = *(block.get_by_position(arguments[2]).column); @@ -1241,7 +1241,7 @@ class FunctionJsonUnquote : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const IColumn& col_from = *(block.get_by_position(arguments[0]).column); auto null_map = ColumnUInt8::create(input_rows_count, 0); @@ -1410,7 +1410,7 @@ class FunctionJsonModifyImpl : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto result_column = ColumnString::create(); bool is_nullable = false; ColumnUInt8::MutablePtr ret_null_map = nullptr; diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index 3da8d514402ba76..0e78eb894b20c55 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -189,7 +189,7 @@ class FunctionJsonbParseBase : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const IColumn& col_from = *(block.get_by_position(arguments[0]).column); auto null_map = ColumnUInt8::create(0, 0); @@ -363,7 +363,7 @@ class FunctionJsonbExtract : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 2); ColumnPtr jsonb_data_column; @@ -447,7 +447,7 @@ class FunctionJsonbKeys : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 1); if (arguments.size() != 1 && arguments.size() != 2) { // here has argument param error @@ -619,7 +619,7 @@ class FunctionJsonbExtractPath : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr jsonb_data_column; bool jsonb_data_const = false; // prepare jsonb data column @@ -1353,14 +1353,14 @@ class FunctionJsonbLength : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); } }; struct JsonbLengthUtil { static Status jsonb_length_execute(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { DCHECK_GE(arguments.size(), 2); ColumnPtr jsonb_data_column; @@ -1427,7 +1427,7 @@ struct JsonbLengthImpl { static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto path = ColumnString::create(); std::string root_path = "$"; @@ -1451,7 +1451,7 @@ struct JsonbLengthAndPathImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result, input_rows_count); @@ -1478,14 +1478,14 @@ class FunctionJsonbContains : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); } }; struct JsonbContainsUtil { static Status jsonb_contains_execute(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { DCHECK_GE(arguments.size(), 3); @@ -1569,7 +1569,7 @@ struct JsonbContainsImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto path = ColumnString::create(); std::string root_path = "$"; @@ -1594,7 +1594,7 @@ struct JsonbContainsAndPathImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result, input_rows_count); @@ -1828,7 +1828,7 @@ class FunctionJsonSearch : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { // the json_doc, one_or_all, and search_str must be given. // and we require the positions are static. if (arguments.size() < 3) { diff --git a/be/src/vec/functions/function_map.cpp b/be/src/vec/functions/function_map.cpp index d781fc6cac402b2..5b4e4202b20de8c 100644 --- a/be/src/vec/functions/function_map.cpp +++ b/be/src/vec/functions/function_map.cpp @@ -80,7 +80,7 @@ class FunctionMap : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK(arguments.size() % 2 == 0) << "function: " << get_name() << ", arguments should not be even number"; @@ -166,7 +166,7 @@ class FunctionMapContains : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { // backup original argument 0 auto orig_arg0 = block.get_by_position(arguments[0]); auto left_column = @@ -261,7 +261,7 @@ class FunctionMapEntries : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto left_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const ColumnMap* map_column = nullptr; diff --git a/be/src/vec/functions/function_math_log.h b/be/src/vec/functions/function_math_log.h index e8653e4220d460d..60780d0ff39a4fb 100644 --- a/be/src/vec/functions/function_math_log.h +++ b/be/src/vec/functions/function_math_log.h @@ -51,7 +51,7 @@ class FunctionMathLog : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto* col = assert_cast(block.get_by_position(arguments[0]).column.get()); diff --git a/be/src/vec/functions/function_math_unary.h b/be/src/vec/functions/function_math_unary.h index e5101692b86d757..617b26169e246a6 100644 --- a/be/src/vec/functions/function_math_unary.h +++ b/be/src/vec/functions/function_math_unary.h @@ -54,7 +54,7 @@ class FunctionMathUnary : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto* col = assert_cast(block.get_by_position(arguments[0]).column.get()); diff --git a/be/src/vec/functions/function_math_unary_alway_nullable.h b/be/src/vec/functions/function_math_unary_alway_nullable.h index 8d2cea1bc0db87d..1b6e86c395a192d 100644 --- a/be/src/vec/functions/function_math_unary_alway_nullable.h +++ b/be/src/vec/functions/function_math_unary_alway_nullable.h @@ -55,7 +55,7 @@ class FunctionMathUnaryAlwayNullable : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnFloat64* col = assert_cast(block.get_by_position(arguments[0]).column.get()); auto dst = ColumnFloat64::create(); diff --git a/be/src/vec/functions/function_multi_match.cpp b/be/src/vec/functions/function_multi_match.cpp index 02697dd4b0aafe2..8ab0cb2f2e542c5 100644 --- a/be/src/vec/functions/function_multi_match.cpp +++ b/be/src/vec/functions/function_multi_match.cpp @@ -34,7 +34,7 @@ namespace doris::vectorized { Status FunctionMultiMatch::execute_impl(FunctionContext* /*context*/, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t /*input_rows_count*/) const { return Status::RuntimeError("only inverted index queries are supported"); } diff --git a/be/src/vec/functions/function_multi_match.h b/be/src/vec/functions/function_multi_match.h index 4aca75acc02123b..ba395450a30f6ff 100644 --- a/be/src/vec/functions/function_multi_match.h +++ b/be/src/vec/functions/function_multi_match.h @@ -54,7 +54,7 @@ class FunctionMultiMatch : public IFunction { } Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, - size_t result, size_t /*input_rows_count*/) const override; + uint32_t result, size_t /*input_rows_count*/) const override; bool can_push_down_to_index() const override { return true; } diff --git a/be/src/vec/functions/function_multi_same_args.h b/be/src/vec/functions/function_multi_same_args.h index 0c45c7cd446ef7a..f22bae640aeda4a 100644 --- a/be/src/vec/functions/function_multi_same_args.h +++ b/be/src/vec/functions/function_multi_same_args.h @@ -45,7 +45,7 @@ class FunctionMultiSameArgs : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 1); block.replace_by_position(result, Impl::execute(block, arguments, input_rows_count)); return Status::OK(); diff --git a/be/src/vec/functions/function_nullables.cpp b/be/src/vec/functions/function_nullables.cpp index 0fdcfbc21752717..88bb2b38fab65cb 100644 --- a/be/src/vec/functions/function_nullables.cpp +++ b/be/src/vec/functions/function_nullables.cpp @@ -52,7 +52,7 @@ class FunctionNullable : public IFunction { // trans nullable column to non-nullable column. If argument is already non-nullable, raise error. Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr& col = block.get_by_position(arguments[0]).column; if (const auto* col_null = check_and_get_column(col); col_null == nullptr) { // not null @@ -83,7 +83,7 @@ class FunctionNonNullable : public IFunction { // trans nullable column to non-nullable column. If argument is already non-nullable, raise error. Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto& data = block.get_by_position(arguments[0]); if (const auto* col_null = check_and_get_column(data.column); col_null == nullptr) // raise error if input is not nullable. diff --git a/be/src/vec/functions/function_quantile_state.cpp b/be/src/vec/functions/function_quantile_state.cpp index 524909314dc5442..95afbf1db32d233 100644 --- a/be/src/vec/functions/function_quantile_state.cpp +++ b/be/src/vec/functions/function_quantile_state.cpp @@ -126,7 +126,7 @@ class FunctionToQuantileState : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr& column = block.get_by_position(arguments[0]).column; const DataTypePtr& data_type = block.get_by_position(arguments[0]).type; auto compression_arg = check_and_get_column_const( @@ -175,7 +175,7 @@ class FunctionQuantileStatePercent : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res_data_column = ColumnFloat64::create(); auto& res = res_data_column->get_data(); auto data_null_map = ColumnUInt8::create(input_rows_count, 0); diff --git a/be/src/vec/functions/function_regexp.cpp b/be/src/vec/functions/function_regexp.cpp index ae24831862a4434..ae508120cf9ee89 100644 --- a/be/src/vec/functions/function_regexp.cpp +++ b/be/src/vec/functions/function_regexp.cpp @@ -429,7 +429,7 @@ class FunctionRegexp : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { size_t argument_size = arguments.size(); auto result_null_map = ColumnUInt8::create(input_rows_count, 0); diff --git a/be/src/vec/functions/function_reverse.h b/be/src/vec/functions/function_reverse.h index 9a50c011b91dc29..ee0005a305d8ce9 100644 --- a/be/src/vec/functions/function_reverse.h +++ b/be/src/vec/functions/function_reverse.h @@ -40,7 +40,7 @@ class FunctionReverseCommon : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr& src_column = block.get_by_position(arguments[0]).column; if (const ColumnString* col_string = check_and_get_column(src_column.get())) { auto col_res = ColumnString::create(); diff --git a/be/src/vec/functions/function_rpc.cpp b/be/src/vec/functions/function_rpc.cpp index c27383dac62b08d..6d3a64f78409c6e 100644 --- a/be/src/vec/functions/function_rpc.cpp +++ b/be/src/vec/functions/function_rpc.cpp @@ -43,7 +43,7 @@ RPCFnImpl::RPCFnImpl(const TFunction& fn) : _fn(fn) { } Status RPCFnImpl::vec_call(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { PFunctionCallRequest request; PFunctionCallResponse response; if (_client == nullptr) { @@ -108,8 +108,8 @@ Status FunctionRPC::open(FunctionContext* context, FunctionContext::FunctionStat } Status FunctionRPC::execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run) const { - RPCFnImpl* fn = reinterpret_cast( + uint32_t result, size_t input_rows_count, bool dry_run) const { + auto* fn = reinterpret_cast( context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); return fn->vec_call(context, block, arguments, result, input_rows_count); } diff --git a/be/src/vec/functions/function_rpc.h b/be/src/vec/functions/function_rpc.h index ae71632f9746e51..fd17577ddad4ffd 100644 --- a/be/src/vec/functions/function_rpc.h +++ b/be/src/vec/functions/function_rpc.h @@ -48,7 +48,7 @@ class RPCFnImpl { RPCFnImpl(const TFunction& fn); ~RPCFnImpl() = default; Status vec_call(FunctionContext* context, vectorized::Block& block, - const std::vector& arguments, size_t result, size_t input_rows_count); + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count); bool available() { return _client != nullptr; } private: @@ -88,14 +88,14 @@ class FunctionRPC : public IFunctionBase { const DataTypePtr& get_return_type() const override { return _return_type; } PreparedFunctionPtr prepare(FunctionContext* context, const Block& sample_block, - const ColumnNumbers& arguments, size_t result) const override { + const ColumnNumbers& arguments, uint32_t result) const override { return nullptr; } Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run = false) const override; + uint32_t result, size_t input_rows_count, bool dry_run = false) const override; bool is_use_default_implementation_for_constants() const override { return true; } diff --git a/be/src/vec/functions/function_size.cpp b/be/src/vec/functions/function_size.cpp index 803d6d283d37ff1..68c873c8ea6c4b8 100644 --- a/be/src/vec/functions/function_size.cpp +++ b/be/src/vec/functions/function_size.cpp @@ -46,7 +46,7 @@ class FunctionSize : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& [left_column, left_const] = unpack_if_const(block.get_by_position(arguments[0]).column); const auto type = block.get_by_position(arguments[0]).type; diff --git a/be/src/vec/functions/function_split_by_regexp.cpp b/be/src/vec/functions/function_split_by_regexp.cpp index cee2e186b5770fb..a4c46b59ac1f759 100644 --- a/be/src/vec/functions/function_split_by_regexp.cpp +++ b/be/src/vec/functions/function_split_by_regexp.cpp @@ -178,7 +178,7 @@ class SplitByRegexp : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); } }; @@ -186,7 +186,7 @@ class SplitByRegexp : public IFunction { struct ExecuteImpl { using NullMapType = PaddedPODArray; static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { const auto& [first_column, left_const] = unpack_if_const(block.get_by_position(arguments[0]).column); @@ -347,7 +347,7 @@ struct TwoArgumentImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 2); auto max_limit = ColumnConst::create(ColumnInt32::create(1, -1), input_rows_count); @@ -363,7 +363,7 @@ struct ThreeArgumentImpl { std::make_shared()}; } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 3); return ExecuteImpl::execute_impl(context, block, arguments, result, input_rows_count); diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index a7bcf9942a88d51..cee141db2a78209 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -682,7 +682,7 @@ struct Trim1Impl { static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { const ColumnPtr column = block.get_by_position(arguments[0]).column; if (const auto* col = assert_cast(column.get())) { auto col_res = ColumnString::create(); @@ -711,7 +711,7 @@ struct Trim2Impl { } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { const ColumnPtr column = block.get_by_position(arguments[0]).column; const auto& rcol = assert_cast(block.get_by_position(arguments[1]).column.get()) @@ -783,7 +783,7 @@ class FunctionTrim : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return impl::execute(context, block, arguments, result, input_rows_count); } }; diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 8c01b51f298b110..6143e61f86f0c66 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -165,7 +165,7 @@ struct StringOP { struct SubstringUtil { static constexpr auto name = "substring"; - static void substring_execute(Block& block, const ColumnNumbers& arguments, size_t result, + static void substring_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 3); auto res = ColumnString::create(); @@ -336,7 +336,7 @@ class FunctionStrcmp : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& [arg0_column, arg0_const] = unpack_if_const(block.get_by_position(arguments[0]).column); const auto& [arg1_column, arg1_const] = @@ -396,7 +396,7 @@ class FunctionAutoPartitionName : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { size_t argument_size = arguments.size(); auto const_null_map = ColumnUInt8::create(input_rows_count, 0); auto null_map = ColumnUInt8::create(input_rows_count, 0); @@ -488,7 +488,7 @@ class FunctionAutoPartitionName : public IFunction { std::vector& is_const_args, const std::vector& null_list, auto& res_data, auto& res_offset, size_t input_rows_count, - size_t argument_size, Block& block, size_t result, + size_t argument_size, Block& block, uint32_t result, auto& res) const { int curr_len = 0; for (int row = 0; row < input_rows_count; row++) { @@ -546,7 +546,7 @@ class FunctionAutoPartitionName : public IFunction { std::vector& offsets_list, std::vector& is_const_args, auto& res_data, auto& res_offset, size_t input_rows_count, - size_t argument_size, Block& block, size_t result, + size_t argument_size, Block& block, uint32_t result, auto& res) const { const char* range_type = chars_list[1]->raw_data(); @@ -631,7 +631,7 @@ class FunctionSubstring : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); } }; @@ -643,7 +643,7 @@ struct Substr3Impl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { SubstringUtil::substring_execute(block, arguments, result, input_rows_count); return Status::OK(); @@ -656,7 +656,7 @@ struct Substr2Impl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto col_len = ColumnInt32::create(input_rows_count); auto& strlen_data = col_len->get_data(); @@ -705,7 +705,7 @@ class FunctionMask : public IFunction { bool is_variadic() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 1); DCHECK_LE(arguments.size(), 4); @@ -802,7 +802,7 @@ class FunctionMaskPartial : public IFunction { bool is_variadic() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { int n = -1; // means unassigned auto res = ColumnString::create(); @@ -879,7 +879,7 @@ class FunctionLeft : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto int_type = std::make_shared(); size_t num_columns_without_result = block.columns(); block.insert({int_type->create_column_const(input_rows_count, to_field(1)), int_type, @@ -905,7 +905,7 @@ class FunctionRight : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto int_type = std::make_shared(); auto params1 = ColumnInt32::create(input_rows_count); auto params2 = ColumnInt32::create(input_rows_count); @@ -947,7 +947,7 @@ struct NullOrEmptyImpl { static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool reverse) { + uint32_t result, size_t input_rows_count, bool reverse) { auto res_map = ColumnUInt8::create(input_rows_count, 0); auto column = block.get_by_position(arguments[0]).column; @@ -988,7 +988,7 @@ class FunctionNullOrEmpty : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result, input_rows_count, false)); return Status::OK(); @@ -1009,7 +1009,7 @@ class FunctionNotNullOrEmpty : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result, input_rows_count, true)); return Status::OK(); @@ -1068,7 +1068,7 @@ class FunctionStringConcat : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 1); if (arguments.size() == 1) { @@ -1096,7 +1096,7 @@ class FunctionStringConcat : public IFunction { } } - Status execute_vecotr(Block& block, const ColumnNumbers& arguments, size_t result, + Status execute_vecotr(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { int argument_size = arguments.size(); std::vector argument_columns(argument_size); @@ -1158,7 +1158,7 @@ class FunctionStringConcat : public IFunction { template Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str, - size_t result, size_t input_rows_count) const { + uint32_t result, size_t input_rows_count) const { // using tail optimize auto res = ColumnString::create(); @@ -1211,7 +1211,7 @@ class FunctionStringElt : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { int arguent_size = arguments.size(); int num_children = arguent_size - 1; auto res = ColumnString::create(); @@ -1332,7 +1332,7 @@ class FunctionStringConcatWs : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 2); auto null_map = ColumnUInt8::create(input_rows_count, 0); // we create a zero column to simply implement @@ -1529,7 +1529,7 @@ class FunctionStringRepeat : public IFunction { return make_nullable(std::make_shared()); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 2); auto res = ColumnString::create(); auto null_map = ColumnUInt8::create(); @@ -1635,7 +1635,7 @@ class FunctionStringPad : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 3); auto null_map = ColumnUInt8::create(input_rows_count, 0); // we create a zero column to simply implement @@ -1781,7 +1781,7 @@ class FunctionSplitPart : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 3); auto null_map = ColumnUInt8::create(input_rows_count, 0); @@ -1952,7 +1952,7 @@ class FunctionSubstringIndex : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 3); // Create a zero column to simply implement @@ -2115,7 +2115,7 @@ class FunctionSplitByString : public IFunction { } Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, - size_t result, size_t /*input_rows_count*/) const override { + uint32_t result, size_t /*input_rows_count*/) const override { DCHECK_EQ(arguments.size(), 2); const auto& [src_column, left_const] = @@ -2392,7 +2392,7 @@ class FunctionCountSubString : public IFunction { } Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 2); const auto& [src_column, left_const] = unpack_if_const(block.get_by_position(arguments[0]).column); @@ -2510,7 +2510,7 @@ class FunctionStringDigestOneArg : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 1); int argument_size = arguments.size(); @@ -2574,7 +2574,7 @@ class FunctionStringDigestSHA1 : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 1); ColumnPtr str_col = block.get_by_position(arguments[0]).column; @@ -2613,7 +2613,7 @@ class FunctionStringDigestSHA2 : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK(!is_column_const(*block.get_by_position(arguments[0]).column)); ColumnPtr str_col = block.get_by_position(arguments[0]).column; @@ -2674,7 +2674,7 @@ class FunctionExtractURLParameter : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto col_url = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); auto col_parameter = @@ -2718,7 +2718,7 @@ class FunctionStringParseUrl : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto null_map = ColumnUInt8::create(input_rows_count, 0); auto& null_map_data = null_map->get_data(); DCHECK_GE(3, arguments.size()); @@ -2839,7 +2839,7 @@ class FunctionUrlDecode : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res = ColumnString::create(); res->get_offsets().reserve(input_rows_count); @@ -2872,7 +2872,7 @@ class FunctionUrlEncode : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res = ColumnString::create(); res->get_offsets().reserve(input_rows_count); @@ -2907,7 +2907,7 @@ class FunctionRandomBytes : public IFunction { bool use_default_implementation_for_constants() const final { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res = ColumnString::create(); auto& res_offsets = res->get_offsets(); auto& res_chars = res->get_chars(); @@ -2973,7 +2973,7 @@ class FunctionMoneyFormat : public IFunction { size_t get_number_of_arguments() const override { return 1; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res_column = ColumnString::create(); ColumnPtr argument_column = block.get_by_position(arguments[0]).column; @@ -3272,7 +3272,7 @@ class FunctionStringLocatePos : public IFunction { bool is_variadic() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 3); bool col_const[3]; ColumnPtr argument_columns[3]; @@ -3423,7 +3423,7 @@ class FunctionReplace : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { // We need a local variable to hold a reference to the converted column. // So that the converted column will not be released before we use it. ColumnPtr col[3]; @@ -3556,13 +3556,13 @@ class FunctionSubReplace : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); } }; struct SubReplaceImpl { - static Status replace_execute(Block& block, const ColumnNumbers& arguments, size_t result, + static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto res_column = ColumnString::create(); auto* result_column = assert_cast(res_column.get()); @@ -3684,7 +3684,7 @@ struct SubReplaceThreeImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto params = ColumnInt32::create(input_rows_count); auto& strlen_data = params->get_data(); @@ -3715,7 +3715,7 @@ struct SubReplaceFourImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count); } @@ -3754,7 +3754,7 @@ class FunctionConvertTo : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr argument_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const ColumnString* str_col = static_cast(argument_column.get()); @@ -3945,7 +3945,7 @@ class FunctionIntToChar : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_GE(arguments.size(), 2); int argument_size = arguments.size(); @@ -4143,7 +4143,7 @@ class FunctionOverlay : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 4); bool col_const[4]; @@ -4268,7 +4268,7 @@ class FunctionNgramSearch : public IFunction { // ngram_search(text,pattern,gram_num) Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { CHECK_EQ(arguments.size(), 3); auto col_res = ColumnFloat64::create(); bool col_const[3]; @@ -4398,7 +4398,7 @@ class FunctionTranslate : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { CHECK_EQ(arguments.size(), 3); auto col_res = ColumnString::create(); bool col_const[3]; diff --git a/be/src/vec/functions/function_string_to_string.h b/be/src/vec/functions/function_string_to_string.h index 3dac2cf94fd1a7d..ea8c654faa1d31c 100644 --- a/be/src/vec/functions/function_string_to_string.h +++ b/be/src/vec/functions/function_string_to_string.h @@ -60,7 +60,7 @@ class FunctionStringToString : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const ColumnPtr column = block.get_by_position(arguments[0]).column; if (const auto* col = check_and_get_column(column.get())) { auto col_res = ColumnString::create(); diff --git a/be/src/vec/functions/function_struct.cpp b/be/src/vec/functions/function_struct.cpp index a709589a285bc5c..49348f56f9036bd 100644 --- a/be/src/vec/functions/function_struct.cpp +++ b/be/src/vec/functions/function_struct.cpp @@ -74,7 +74,7 @@ class FunctionStruct : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto result_col = block.get_by_position(result).type->create_column(); auto struct_column = assert_cast(result_col.get()); ColumnNumbers args_num; diff --git a/be/src/vec/functions/function_struct_element.cpp b/be/src/vec/functions/function_struct_element.cpp index 8c66386cd90c5ff..f547588dece6467 100644 --- a/be/src/vec/functions/function_struct_element.cpp +++ b/be/src/vec/functions/function_struct_element.cpp @@ -67,7 +67,7 @@ class FunctionStructElement : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto struct_type = check_and_get_data_type( block.get_by_position(arguments[0]).type.get()); auto struct_col = check_and_get_column( diff --git a/be/src/vec/functions/function_time_value_to_field.cpp b/be/src/vec/functions/function_time_value_to_field.cpp index da5fbca58cdbb39..8c8ec3cf9ab2199 100644 --- a/be/src/vec/functions/function_time_value_to_field.cpp +++ b/be/src/vec/functions/function_time_value_to_field.cpp @@ -50,7 +50,7 @@ class FunctionTimeValueToField : public IFunction { bool use_default_implementation_for_nulls() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { DCHECK_EQ(arguments.size(), 1); const auto* column_time = assert_cast( diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index 7c2e62e5723067f..cc812b8968124f7 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -107,7 +107,7 @@ struct StrToDate { } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { auto null_map = ColumnUInt8::create(input_rows_count, 0); const auto& col0 = block.get_by_position(arguments[0]).column; @@ -253,7 +253,7 @@ struct MakeDateImpl { } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { auto null_map = ColumnUInt8::create(input_rows_count, 0); DCHECK_EQ(arguments.size(), 2); @@ -458,7 +458,7 @@ struct DateTrunc { } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 2); auto null_map = ColumnUInt8::create(input_rows_count, 0); @@ -502,7 +502,7 @@ class FromDays : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto null_map = ColumnUInt8::create(input_rows_count, 0); ColumnPtr& argument_column = block.get_by_position(arguments[0]).column; @@ -573,7 +573,7 @@ struct UnixTimeStampImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto col_result = ColumnVector::create(); col_result->resize(1); @@ -608,7 +608,7 @@ struct UnixTimeStampDateImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { const ColumnPtr& col = block.get_by_position(arguments[0]).column; DCHECK(!col->is_nullable()); @@ -692,7 +692,7 @@ struct UnixTimeStampStrImpl { } static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { ColumnPtr col_left = nullptr, col_right = nullptr; bool source_const = false, format_const = false; @@ -763,7 +763,7 @@ class FunctionUnixTimestamp : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); } }; @@ -800,7 +800,7 @@ class DateTimeToTimestamp : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& arg_col = block.get_by_position(arguments[0]).column; const auto& column_data = assert_cast(*arg_col); auto res_col = ColumnInt64::create(); @@ -860,7 +860,7 @@ class FunctionDateOrDateTimeToDate : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); } }; @@ -878,7 +878,7 @@ struct LastDayImpl { using ResultNativeType = date_cast::ValueTypeOfColumnV; static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { const auto is_nullable = block.get_by_position(result).type->is_nullable(); ColumnPtr res_column; @@ -982,7 +982,7 @@ struct MondayImpl { using ResultNativeType = date_cast::ValueTypeOfColumnV; static Status execute_impl(FunctionContext* context, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { const auto is_nullable = block.get_by_position(result).type->is_nullable(); ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column); @@ -1151,7 +1151,7 @@ class FunctionOtherTypesToDateType : public IFunction { //ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return Impl::execute(context, block, arguments, result, input_rows_count); } }; @@ -1170,7 +1170,7 @@ struct FromIso8601DateV2 { } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { + uint32_t result, size_t input_rows_count) { const auto* src_column_ptr = block.get_by_position(arguments[0]).column.get(); auto null_map = ColumnUInt8::create(input_rows_count, 0); diff --git a/be/src/vec/functions/function_tokenize.cpp b/be/src/vec/functions/function_tokenize.cpp index be0eb5dddc960d3..b8e1565ebb48221 100644 --- a/be/src/vec/functions/function_tokenize.cpp +++ b/be/src/vec/functions/function_tokenize.cpp @@ -109,7 +109,7 @@ void FunctionTokenize::_do_tokenize(const ColumnString& src_column_string, } Status FunctionTokenize::execute_impl(FunctionContext* /*context*/, Block& block, - const ColumnNumbers& arguments, size_t result, + const ColumnNumbers& arguments, uint32_t result, size_t /*input_rows_count*/) const { DCHECK_EQ(arguments.size(), 2); const auto& [src_column, left_const] = diff --git a/be/src/vec/functions/function_tokenize.h b/be/src/vec/functions/function_tokenize.h index f632d8fd4cebc1e..4a7cb0dad26214a 100644 --- a/be/src/vec/functions/function_tokenize.h +++ b/be/src/vec/functions/function_tokenize.h @@ -69,7 +69,7 @@ class FunctionTokenize : public IFunction { IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets, NullMapType* dest_nested_null_map) const; Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, - size_t result, size_t /*input_rows_count*/) const override; + uint32_t result, size_t /*input_rows_count*/) const override; Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { return Status::OK(); diff --git a/be/src/vec/functions/function_totype.h b/be/src/vec/functions/function_totype.h index 3309fbcacdad9d1..e5a2e6cfd7b8d34 100644 --- a/be/src/vec/functions/function_totype.h +++ b/be/src/vec/functions/function_totype.h @@ -52,7 +52,7 @@ class FunctionUnaryToType : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { return execute_impl(block, arguments, result, input_rows_count); } @@ -67,7 +67,7 @@ class FunctionUnaryToType : public IFunction { // handle result == DataTypeString template requires std::is_same_v - Status execute_impl(Block& block, const ColumnNumbers& arguments, size_t result, + Status execute_impl(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { const ColumnPtr column = block.get_by_position(arguments[0]).column; if constexpr (typeindex_is_int(Impl::TYPE_INDEX)) { @@ -95,7 +95,7 @@ class FunctionUnaryToType : public IFunction { } template requires(!std::is_same_v) - Status execute_impl(Block& block, const ColumnNumbers& arguments, size_t result, + Status execute_impl(Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const { const ColumnPtr column = block.get_by_position(arguments[0]).column; if constexpr (Impl::TYPE_INDEX == TypeIndex::String) { @@ -143,7 +143,7 @@ class FunctionBinaryToType : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t /*input_rows_count*/) const override { + uint32_t result, size_t /*input_rows_count*/) const override { DCHECK_EQ(arguments.size(), 2); const auto& [lcol, left_const] = unpack_if_const(block.get_by_position(arguments[0]).column); @@ -218,7 +218,7 @@ class FunctionBinaryToType : public bool is_variadic() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t /*input_rows_count*/) const override { + uint32_t result, size_t /*input_rows_count*/) const override { const auto& left = block.get_by_position(arguments[0]); const auto& right = block.get_by_position(arguments[1]); return execute_inner_impl(left, right, block, arguments, result); @@ -313,7 +313,7 @@ class FunctionBinaryToNullType : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto null_map = ColumnUInt8::create(input_rows_count, 0); DCHECK_EQ(arguments.size(), 2); @@ -393,7 +393,7 @@ class FunctionBinaryStringOperateToNullType : public IFunction { } } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto null_map = ColumnUInt8::create(input_rows_count, 0); ColumnPtr argument_columns[2]; bool col_const[2]; @@ -465,7 +465,7 @@ class FunctionStringOperateToNullType : public IFunction { bool use_default_implementation_for_nulls() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto null_map = ColumnUInt8::create(input_rows_count, 0); auto& col_ptr = block.get_by_position(arguments[0]).column; @@ -503,7 +503,7 @@ class FunctionStringEncode : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto& col_ptr = block.get_by_position(arguments[0]).column; auto res = Impl::ColumnType::create(); diff --git a/be/src/vec/functions/function_unary_arithmetic.h b/be/src/vec/functions/function_unary_arithmetic.h index 91e33a7b9d45e24..c0febedb812ebfe 100644 --- a/be/src/vec/functions/function_unary_arithmetic.h +++ b/be/src/vec/functions/function_unary_arithmetic.h @@ -107,7 +107,7 @@ class FunctionUnaryArithmetic : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { bool valid = cast_type(block.get_by_position(arguments[0]).type.get(), [&](const auto& type) { using DataType = std::decay_t; diff --git a/be/src/vec/functions/function_utility.cpp b/be/src/vec/functions/function_utility.cpp index b201434537625ed..40dd11677758ded 100644 --- a/be/src/vec/functions/function_utility.cpp +++ b/be/src/vec/functions/function_utility.cpp @@ -75,7 +75,7 @@ class FunctionSleep : public IFunction { bool use_default_implementation_for_constants() const override { return false; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& argument_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); @@ -133,7 +133,7 @@ class FunctionVersion : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { auto res_column = ColumnString::create(); res_column->insert_data(version.c_str(), version.length()); auto col_const = ColumnConst::create(std::move(res_column), input_rows_count); diff --git a/be/src/vec/functions/function_uuid.cpp b/be/src/vec/functions/function_uuid.cpp index cee5fd7a3635034..1fb2f855bbb6df8 100644 --- a/be/src/vec/functions/function_uuid.cpp +++ b/be/src/vec/functions/function_uuid.cpp @@ -63,7 +63,7 @@ class FunctionUuidtoInt : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& arg_column = assert_cast(*block.get_by_position(arguments[0]).column); @@ -164,7 +164,7 @@ class FunctionInttoUuid : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& arg_column = assert_cast(*block.get_by_position(arguments[0]).column); auto result_column = ColumnString::create(); diff --git a/be/src/vec/functions/function_variadic_arguments.h b/be/src/vec/functions/function_variadic_arguments.h index c8148fc90d078bf..530b204e8067a18 100644 --- a/be/src/vec/functions/function_variadic_arguments.h +++ b/be/src/vec/functions/function_variadic_arguments.h @@ -55,7 +55,7 @@ class FunctionVariadicArgumentsBase : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ToDataType to_type; auto column = to_type.create_column(); column->reserve(input_rows_count); diff --git a/be/src/vec/functions/function_variant_element.cpp b/be/src/vec/functions/function_variant_element.cpp index 53340cd61ea8d86..e04ff54dbe9d7c2 100644 --- a/be/src/vec/functions/function_variant_element.cpp +++ b/be/src/vec/functions/function_variant_element.cpp @@ -95,7 +95,7 @@ class FunctionVariantElement : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto* variant_col = check_and_get_column( remove_nullable(block.get_by_position(arguments[0]).column).get()); if (!variant_col) { diff --git a/be/src/vec/functions/function_width_bucket.cpp b/be/src/vec/functions/function_width_bucket.cpp index 7f9a3ae31e87653..83ba96118a0efd5 100644 --- a/be/src/vec/functions/function_width_bucket.cpp +++ b/be/src/vec/functions/function_width_bucket.cpp @@ -61,7 +61,7 @@ class FunctionWidthBucket : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { ColumnPtr expr_ptr = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); ColumnPtr min_value_ptr = diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h index bb1666ab8640701..78a89071c76efda 100644 --- a/be/src/vec/functions/functions_comparison.h +++ b/be/src/vec/functions/functions_comparison.h @@ -273,7 +273,7 @@ class FunctionComparison : public IFunction { private: template - bool execute_num_right_type(Block& block, size_t result, const ColumnVector* col_left, + bool execute_num_right_type(Block& block, uint32_t result, const ColumnVector* col_left, const IColumn* col_right_untyped) const { if (const ColumnVector* col_right = check_and_get_column>(col_right_untyped)) { @@ -303,7 +303,7 @@ class FunctionComparison : public IFunction { } template - bool execute_num_const_right_type(Block& block, size_t result, const ColumnConst* col_left, + bool execute_num_const_right_type(Block& block, uint32_t result, const ColumnConst* col_left, const IColumn* col_right_untyped) const { if (const ColumnVector* col_right = check_and_get_column>(col_right_untyped)) { @@ -332,7 +332,7 @@ class FunctionComparison : public IFunction { } template - bool execute_num_left_type(Block& block, size_t result, const IColumn* col_left_untyped, + bool execute_num_left_type(Block& block, uint32_t result, const IColumn* col_left_untyped, const IColumn* col_right_untyped) const { if (const ColumnVector* col_left = check_and_get_column>(col_left_untyped)) { @@ -392,7 +392,7 @@ class FunctionComparison : public IFunction { return false; } - Status execute_decimal(Block& block, size_t result, const ColumnWithTypeAndName& col_left, + Status execute_decimal(Block& block, uint32_t result, const ColumnWithTypeAndName& col_left, const ColumnWithTypeAndName& col_right) const { TypeIndex left_number = col_left.type->get_type_id(); TypeIndex right_number = col_right.type->get_type_id(); @@ -414,7 +414,8 @@ class FunctionComparison : public IFunction { return Status::OK(); } - Status execute_string(Block& block, size_t result, const IColumn* c0, const IColumn* c1) const { + Status execute_string(Block& block, uint32_t result, const IColumn* c0, + const IColumn* c1) const { const ColumnString* c0_string = check_and_get_column(c0); const ColumnString* c1_string = check_and_get_column(c1); const ColumnConst* c0_const = check_and_get_column_const_string_or_fixedstring(c0); @@ -485,7 +486,7 @@ class FunctionComparison : public IFunction { return Status::OK(); } - void execute_generic_identical_types(Block& block, size_t result, const IColumn* c0, + void execute_generic_identical_types(Block& block, uint32_t result, const IColumn* c0, const IColumn* c1) const { bool c0_const = is_column_const(*c0); bool c1_const = is_column_const(*c1); @@ -512,7 +513,7 @@ class FunctionComparison : public IFunction { } } - Status execute_generic(Block& block, size_t result, const ColumnWithTypeAndName& c0, + Status execute_generic(Block& block, uint32_t result, const ColumnWithTypeAndName& c0, const ColumnWithTypeAndName& c1) const { execute_generic_identical_types(block, result, c0.column.get(), c1.column.get()); return Status::OK(); @@ -600,7 +601,7 @@ class FunctionComparison : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { const auto& col_with_type_and_name_left = block.get_by_position(arguments[0]); const auto& col_with_type_and_name_right = block.get_by_position(arguments[1]); const IColumn* col_left_untyped = col_with_type_and_name_left.column.get(); diff --git a/be/src/vec/functions/functions_geo.h b/be/src/vec/functions/functions_geo.h index ac0358d42f59f41..92d6bc1b1b0381d 100644 --- a/be/src/vec/functions/functions_geo.h +++ b/be/src/vec/functions/functions_geo.h @@ -71,7 +71,7 @@ class GeoFunction : public IFunction { } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) const override { + uint32_t result, size_t input_rows_count) const override { if constexpr (Impl::NEED_CONTEXT) { return Impl::execute(context, block, arguments, result); } else { diff --git a/be/src/vec/functions/functions_logical.cpp b/be/src/vec/functions/functions_logical.cpp index 0643fc0a9057873..0f474851f032ee0 100644 --- a/be/src/vec/functions/functions_logical.cpp +++ b/be/src/vec/functions/functions_logical.cpp @@ -206,7 +206,7 @@ DataTypePtr FunctionAnyArityLogical::get_return_type_impl( template Status FunctionAnyArityLogical::execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result_index, + uint32_t result_index, size_t input_rows_count) const { ColumnRawPtrs args_in; for (const auto arg_index : arguments) @@ -268,7 +268,8 @@ bool functionUnaryExecuteType(Block& block, const ColumnNumbers& arguments, size template