From db28d0d651e8e427ab8e039d6d35ca4a77c81470 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Tue, 16 Apr 2024 13:10:57 -0700 Subject: [PATCH] Fix dataSourceWallNanos calculation Summary: We were not counting `DataSource::addSplit` time before, adding this as `dataSourceAddSplitWallNanos` and rename `dataSourceWallNanos` to `dataSourceNextWallNanos`. Differential Revision: D56201634 --- velox/exec/OperatorUtils.cpp | 3 ++- velox/exec/TableScan.cpp | 10 ++++++++-- velox/exec/tests/PrintPlanWithStatsTest.cpp | 6 ++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/velox/exec/OperatorUtils.cpp b/velox/exec/OperatorUtils.cpp index 8f3bab082be0c..e1935e6631ca0 100644 --- a/velox/exec/OperatorUtils.cpp +++ b/velox/exec/OperatorUtils.cpp @@ -120,7 +120,8 @@ void gatherCopy( // per event. This function returns true for such metrics. bool shouldAggregateRuntimeMetric(const std::string& name) { static const folly::F14FastSet metricNames{ - "dataSourceWallNanos", + "dataSourceAddSplitWallNanos", + "dataSourceNextWallNanos", "dataSourceLazyWallNanos", "queuedWallNanos", "flushTimes"}; diff --git a/velox/exec/TableScan.cpp b/velox/exec/TableScan.cpp index 56355bb650f28..6d2c51d87b19f 100644 --- a/velox/exec/TableScan.cpp +++ b/velox/exec/TableScan.cpp @@ -179,7 +179,13 @@ RowVectorPtr TableScan::getOutput() { dataSource_->setFromDataSource(std::move(preparedDataSource)); } else { curStatus_ = "getOutput: adding split"; + const auto addSplitStartMicros = getCurrentTimeMicro(); dataSource_->addSplit(connectorSplit); + stats_.wlock()->addRuntimeStat( + "dataSourceAddSplitWallNanos", + RuntimeCounter( + (getCurrentTimeMicro() - addSplitStartMicros) * 1'000, + RuntimeCounter::Unit::kNanos)); } curStatus_ = "getOutput: updating stats_.numSplits"; ++stats_.wlock()->numSplits; @@ -218,10 +224,10 @@ RowVectorPtr TableScan::getOutput() { checkPreload(); { - curStatus_ = "getOutput: updating stats_.dataSourceWallNanos"; + curStatus_ = "getOutput: updating stats_.dataSourceNextWallNanos"; auto lockedStats = stats_.wlock(); lockedStats->addRuntimeStat( - "dataSourceWallNanos", + "dataSourceNextWallNanos", RuntimeCounter( (getCurrentTimeMicro() - ioTimeStartMicros) * 1'000, RuntimeCounter::Unit::kNanos)); diff --git a/velox/exec/tests/PrintPlanWithStatsTest.cpp b/velox/exec/tests/PrintPlanWithStatsTest.cpp index 2c952f7ddea9d..8312644f45eb2 100644 --- a/velox/exec/tests/PrintPlanWithStatsTest.cpp +++ b/velox/exec/tests/PrintPlanWithStatsTest.cpp @@ -185,7 +185,8 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) { {" runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" -- TableScan\\[table: hive_table\\] -> c0:INTEGER, c1:BIGINT"}, {" Input: 2000 rows \\(.+\\), Raw Input: 20480 rows \\(.+\\), Output: 2000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, Splits: 20"}, - {" dataSourceWallNanos [ ]* sum: .+, count: 1, min: .+, max: .+"}, + {" dataSourceAddSplitWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"}, + {" dataSourceNextWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"}, {" dynamicFiltersAccepted[ ]* sum: 1, count: 1, min: 1, max: 1"}, {" flattenStringDictionaryValues [ ]* sum: 0, count: 1, min: 0, max: 0"}, {" ioWaitNanos [ ]* sum: .+, count: .+ min: .+, max: .+"}, @@ -280,7 +281,8 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) { {" runningGetOutputWallNanos\\s+sum: .+, count: 1, min: .+, max: .+"}, {" -- TableScan\\[table: hive_table\\] -> c0:BIGINT, c1:INTEGER, c2:SMALLINT, c3:REAL, c4:DOUBLE, c5:VARCHAR"}, {" Input: 10000 rows \\(.+\\), Output: 10000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, Splits: 1"}, - {" dataSourceWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"}, + {" dataSourceAddSplitWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"}, + {" dataSourceNextWallNanos[ ]* sum: .+, count: 1, min: .+, max: .+"}, {" flattenStringDictionaryValues [ ]* sum: 0, count: 1, min: 0, max: 0"}, {" ioWaitNanos [ ]* sum: .+, count: .+ min: .+, max: .+"}, {" localReadBytes [ ]* sum: 0B, count: 1, min: 0B, max: 0B"},