Skip to content

Commit

Permalink
Add processedStrides and processedSplits metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Mar 27, 2024
1 parent 7fc0966 commit 41a0f72
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 0 deletions.
1 change: 1 addition & 0 deletions velox/connectors/hive/SplitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ void SplitReader::prepareSplit(
runtimeStats.skippedSplitBytes += hiveSplit_->length;
return;
}
++runtimeStats.processedSplits;

auto& fileType = baseReader_->rowType();
auto columnTypes = adaptColumns(fileType, baseReaderOpts_.getFileSchema());
Expand Down
8 changes: 8 additions & 0 deletions velox/dwio/common/Statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,20 +529,28 @@ struct RuntimeStatistics {
// Number of splits skipped based on statistics.
int64_t skippedSplits{0};

// Number of splits processed based on statistics.
int64_t processedSplits{0};

// Total bytes in splits skipped based on statistics.
int64_t skippedSplitBytes{0};

// Number of strides (row groups) skipped based on statistics.
int64_t skippedStrides{0};

// Number of strides (row groups) processed based on statistics.
int64_t processedStrides{0};

ColumnReaderStatistics columnReaderStatistics;

std::unordered_map<std::string, RuntimeCounter> toMap() {
return {
{"skippedSplits", RuntimeCounter(skippedSplits)},
{"processedSplits", RuntimeCounter(processedSplits)},
{"skippedSplitBytes",
RuntimeCounter(skippedSplitBytes, RuntimeCounter::Unit::kBytes)},
{"skippedStrides", RuntimeCounter(skippedStrides)},
{"processedStrides", RuntimeCounter(processedStrides)},
{"flattenStringDictionaryValues",
RuntimeCounter(columnReaderStatistics.flattenStringDictionaryValues)}};
}
Expand Down
1 change: 1 addition & 0 deletions velox/dwio/dwrf/reader/DwrfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ void DwrfRowReader::checkSkipStrides(uint64_t strideSize) {
currentStride++;
skippedStrides_++;
}
processedStrides_++;
if (foundStridesToSkip && currentRowInStripe_ < rowsInCurrentStripe_) {
selectiveColumnReader_->seekToRowGroup(currentStride);
}
Expand Down
3 changes: 3 additions & 0 deletions velox/dwio/dwrf/reader/DwrfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class DwrfRowReader : public StrideIndexProvider,
void updateRuntimeStats(
dwio::common::RuntimeStatistics& stats) const override {
stats.skippedStrides += skippedStrides_;
stats.processedStrides += processedStrides_;
stats.columnReaderStatistics.flattenStringDictionaryValues +=
columnReaderStatistics_.flattenStringDictionaryValues;
}
Expand Down Expand Up @@ -190,6 +191,8 @@ class DwrfRowReader : public StrideIndexProvider,
std::unordered_map<uint32_t, std::vector<uint64_t>> stripeStridesToSkip_;
// Number of skipped strides.
int64_t skippedStrides_{0};
// Number of processed strides.
int64_t processedStrides_{0};

// Set to true after clearing filter caches, i.e. adding a dynamic
// filter. Causes filters to be re-evaluated against stride stats on
Expand Down
1 change: 1 addition & 0 deletions velox/dwio/parquet/reader/ParquetReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,7 @@ class ParquetRowReader::Impl {

void updateRuntimeStats(dwio::common::RuntimeStatistics& stats) const {
stats.skippedStrides += rowGroups_.size() - rowGroupIds_.size();
stats.processedStrides += rowGroupIds_.size();
}

void resetFilterCaches() {
Expand Down
4 changes: 4 additions & 0 deletions velox/exec/tests/PrintPlanWithStatsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) {
{" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
true},
{" processedSplits [ ]* sum: 1, count: 1, min: 1, max: 1"},
{" processedStrides [ ]* sum: 40, count: 1, min: 40, max: 40"},
{" queryThreadIoLatency[ ]* sum: .+, count: .+ min: .+, max: .+"},
{" ramReadBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" readyPreloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
Expand Down Expand Up @@ -292,6 +294,8 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) {
{" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"},

{" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" processedSplits [ ]* sum: 1, count: 1, min: 1, max: 1"},
{" processedStrides [ ]* sum: 3, count: 1, min: 3, max: 3"},
{" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
true},
{" queryThreadIoLatency[ ]* sum: .+, count: .+ min: .+, max: .+"},
Expand Down

0 comments on commit 41a0f72

Please sign in to comment.