Skip to content

Commit

Permalink
Simplify ReaderBase::scheduleRowGroups() in ParquetReader (#7337)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: #7337

Reviewed By: xiaoxmeng

Differential Revision: D51437465

Pulled By: Yuhta

fbshipit-source-id: 68f31a212b1efcee63a23d5f754931c46841a849
  • Loading branch information
yingsu00 authored and facebook-github-bot committed Nov 17, 2023
1 parent c9f2b2a commit f0c1158
Showing 1 changed file with 8 additions and 16 deletions.
24 changes: 8 additions & 16 deletions velox/dwio/parquet/reader/ParquetReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,24 +568,16 @@ void ReaderBase::scheduleRowGroups(
const std::vector<uint32_t>& rowGroupIds,
int32_t currentGroup,
StructColumnReader& reader) {
auto thisGroup = rowGroupIds[currentGroup];
auto nextGroup =
currentGroup + 1 < rowGroupIds.size() ? rowGroupIds[currentGroup + 1] : 0;
auto input = inputs_[thisGroup].get();
if (!input) {
inputs_[thisGroup] = reader.loadRowGroup(thisGroup, input_);
}
for (auto counter = 0; counter < options_.prefetchRowGroups(); ++counter) {
if (nextGroup) {
if (inputs_.count(nextGroup) == 0) {
inputs_[nextGroup] = reader.loadRowGroup(nextGroup, input_);
}
} else {
break;
auto numRowGroupsToLoad = std::min(
options_.prefetchRowGroups() + 1,
static_cast<int64_t>(rowGroupIds.size() - currentGroup));
for (auto i = 0; i < numRowGroupsToLoad; i++) {
auto thisGroup = rowGroupIds[currentGroup + i];
if (!inputs_[thisGroup]) {
inputs_[thisGroup] = reader.loadRowGroup(thisGroup, input_);
}
nextGroup =
nextGroup + 1 < rowGroupIds.size() ? rowGroupIds[nextGroup + 1] : 0;
}

if (currentGroup >= 1) {
inputs_.erase(rowGroupIds[currentGroup - 1]);
}
Expand Down

0 comments on commit f0c1158

Please sign in to comment.