Skip to content

Commit

Permalink
Add processedStrides and processedSplits metrics facebookincubator#6123
Browse files Browse the repository at this point in the history
  • Loading branch information
Binwei Yang authored and binwei committed Jul 1, 2024
1 parent 08a5c03 commit bf7b601
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 1 deletion.
1 change: 1 addition & 0 deletions velox/connectors/hive/SplitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ bool SplitReader::checkIfSplitIsEmpty(
}
}

++runtimeStats.processedSplits;
return emptySplit_;
}

Expand Down
8 changes: 8 additions & 0 deletions velox/dwio/common/Statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,20 +529,28 @@ struct RuntimeStatistics {
// Number of splits skipped based on statistics.
int64_t skippedSplits{0};

// Number of splits processed based on statistics.
int64_t processedSplits{0};

// Total bytes in splits skipped based on statistics.
int64_t skippedSplitBytes{0};

// Number of strides (row groups) skipped based on statistics.
int64_t skippedStrides{0};

// Number of strides (row groups) processed based on statistics.
int64_t processedStrides{0};

ColumnReaderStatistics columnReaderStatistics;

std::unordered_map<std::string, RuntimeCounter> toMap() {
return {
{"skippedSplits", RuntimeCounter(skippedSplits)},
{"processedSplits", RuntimeCounter(processedSplits)},
{"skippedSplitBytes",
RuntimeCounter(skippedSplitBytes, RuntimeCounter::Unit::kBytes)},
{"skippedStrides", RuntimeCounter(skippedStrides)},
{"processedStrides", RuntimeCounter(processedStrides)},
{"flattenStringDictionaryValues",
RuntimeCounter(columnReaderStatistics.flattenStringDictionaryValues)}};
}
Expand Down
1 change: 1 addition & 0 deletions velox/dwio/dwrf/reader/DwrfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ void DwrfRowReader::checkSkipStrides(uint64_t strideSize) {
currentStride++;
skippedStrides_++;
}
processedStrides_++;
if (foundStridesToSkip && currentRowInStripe_ < rowsInCurrentStripe_) {
getSelectiveColumnReader()->seekToRowGroup(currentStride);
}
Expand Down
5 changes: 4 additions & 1 deletion velox/dwio/dwrf/reader/DwrfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ class DwrfRowReader : public StrideIndexProvider,
void updateRuntimeStats(
dwio::common::RuntimeStatistics& stats) const override {
stats.skippedStrides += skippedStrides_;
stats.processedStrides += processedStrides_;
stats.columnReaderStatistics.flattenStringDictionaryValues +=
columnReaderStatistics_.flattenStringDictionaryValues;
}
Expand Down Expand Up @@ -203,7 +204,9 @@ class DwrfRowReader : public StrideIndexProvider,
std::unordered_map<uint32_t, std::vector<uint64_t>> stripeStridesToSkip_;
// Number of skipped strides.
int64_t skippedStrides_{0};

// Number of processed strides.
int64_t processedStrides_{0};

// Set to true after clearing filter caches, i.e. adding a dynamic
// filter. Causes filters to be re-evaluated against stride stats on
// next stride instead of next stripe.
Expand Down
1 change: 1 addition & 0 deletions velox/dwio/parquet/reader/ParquetReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,7 @@ class ParquetRowReader::Impl {

void updateRuntimeStats(dwio::common::RuntimeStatistics& stats) const {
stats.skippedStrides += rowGroups_.size() - rowGroupIds_.size();
stats.processedStrides += rowGroupIds_.size();
}

void resetFilterCaches() {
Expand Down
4 changes: 4 additions & 0 deletions velox/exec/tests/PrintPlanWithStatsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) {
{" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
true},
{" processedSplits [ ]* sum: 1, count: 1, min: 1, max: 1"},
{" processedStrides [ ]* sum: 40, count: 1, min: 40, max: 40"},
{" ramReadBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" readyPreloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
true},
Expand Down Expand Up @@ -293,6 +295,8 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) {
{" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"},

{" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" processedSplits [ ]* sum: 1, count: 1, min: 1, max: 1"},
{" processedStrides [ ]* sum: 3, count: 1, min: 3, max: 3"},
{" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
true},
{" ramReadBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
Expand Down

0 comments on commit bf7b601

Please sign in to comment.