Skip to content

Commit

Permalink
Remove the time-consuming tests in RowStreamingWindow (#10883)
Browse files Browse the repository at this point in the history
Summary:
[rowBasedStreamingWindowOOM](https://github.com/facebookincubator/velox/blob/main/velox/exec/tests/WindowTest.cpp#L84) already test the oom case for RowBasedStreamingWindow. So no need rowBasedStreamingWindowMemoryUsage test to compare the memory usage.

Pull Request resolved: #10883

Reviewed By: gggrace14

Differential Revision: D61956298

Pulled By: xiaoxmeng

fbshipit-source-id: 5233070235b96a14fc7d1be3c4095ebf7de191f4
  • Loading branch information
JkSelf authored and facebook-github-bot committed Aug 29, 2024
1 parent 00194ad commit 2ce8f71
Showing 1 changed file with 0 additions and 50 deletions.
50 changes: 0 additions & 50 deletions velox/exec/tests/WindowTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,56 +141,6 @@ TEST_F(WindowTest, rowBasedStreamingWindowOOM) {
testWindowBuild(false);
}

TEST_F(WindowTest, rowBasedStreamingWindowMemoryUsage) {
auto memoryUsage = [&](bool useStreamingWindow, vector_size_t size) {
auto data = makeRowVector(
{"d", "p", "s"},
{
// Payload.
makeFlatVector<int64_t>(size, [](auto row) { return row; }),
// Partition key.
makeFlatVector<int16_t>(size, [](auto row) { return row % 11; }),
// Sorting key.
makeFlatVector<int32_t>(size, [](auto row) { return row; }),
});

createDuckDbTable({data});

// Abstract the common values vector split.
auto valuesSplit = split(data, 10);
core::PlanNodeId windowId;
auto builder = PlanBuilder().values(valuesSplit);
if (useStreamingWindow) {
builder.orderBy({"p", "s"}, false)
.streamingWindow({"row_number() over (partition by p order by s)"});
} else {
builder.window({"row_number() over (partition by p order by s)"});
}
auto plan = builder.capturePlanNodeId(windowId).planNode();
auto task =
AssertQueryBuilder(plan, duckDbQueryRunner_)
.config(core::QueryConfig::kPreferredOutputBatchBytes, "1024")
.assertResults(
"SELECT *, row_number() over (partition by p order by s) FROM tmp");

return exec::toPlanStats(task->taskStats()).at(windowId).peakMemoryBytes;
};

const vector_size_t smallSize = 100'000;
const vector_size_t largeSize = 1'000'000;
// As the volume of data increases, the peak memory usage of the sort-based
// window will increase (2418624 vs 17098688). Since the peak memory usage of
// the RowBased Window represents the one batch data in a single partition,
// the peak memory usage will not increase as the volume of data grows.
auto sortWindowSmallUsage = memoryUsage(false, smallSize);
auto sortWindowLargeUsage = memoryUsage(false, largeSize);
ASSERT_GT(sortWindowLargeUsage, sortWindowSmallUsage);

auto rowWindowSmallUsage = memoryUsage(true, smallSize);
auto rowWindowLargeUsage = memoryUsage(true, largeSize);
ASSERT_EQ(rowWindowSmallUsage, rowWindowLargeUsage);
}

DEBUG_ONLY_TEST_F(WindowTest, rankRowStreamingWindowBuild) {
auto data = makeRowVector(
{"c1"},
Expand Down

0 comments on commit 2ce8f71

Please sign in to comment.