From fdb309f5a60d19859c870606d116c63bd42964af Mon Sep 17 00:00:00 2001 From: Krishna Pai Date: Wed, 18 Dec 2024 13:10:39 -0800 Subject: [PATCH] misc(revert): Backout `Add toString() to OutputBuffer stats` (#11562) (#11906) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11906 Backing out D67304118: Add toString() to OutputBuffer stats since it breaks certain tests : https://github.com/facebookincubator/velox/actions/runs/12305614280/job/34345592257 . Reviewed By: bikramSingh91 Differential Revision: D67403265 fbshipit-source-id: 8c0723b2748645e88a68ddf57c2b1cc5dbbb2095 --- velox/common/testutil/CMakeLists.txt | 4 -- velox/common/testutil/OutputMatcher.cpp | 67 -------------------- velox/common/testutil/OutputMatcher.h | 47 -------------- velox/exec/OutputBuffer.h | 36 +---------- velox/exec/tests/CMakeLists.txt | 1 - velox/exec/tests/OutputBufferManagerTest.cpp | 39 +----------- velox/exec/tests/PrintPlanWithStatsTest.cpp | 50 +++++++++++++-- 7 files changed, 45 insertions(+), 199 deletions(-) delete mode 100644 velox/common/testutil/OutputMatcher.cpp delete mode 100644 velox/common/testutil/OutputMatcher.h diff --git a/velox/common/testutil/CMakeLists.txt b/velox/common/testutil/CMakeLists.txt index 2a5a62c3dfeb..6e8e05b849ff 100644 --- a/velox/common/testutil/CMakeLists.txt +++ b/velox/common/testutil/CMakeLists.txt @@ -21,9 +21,5 @@ velox_link_libraries( PRIVATE glog::glog Folly::folly) if(${VELOX_BUILD_TESTING}) - velox_add_library(velox_test_output_matcher OutputMatcher.cpp) - velox_link_libraries(velox_test_output_matcher PUBLIC Folly::folly - GTest::gtest re2::re2) - add_subdirectory(tests) endif() diff --git a/velox/common/testutil/OutputMatcher.cpp b/velox/common/testutil/OutputMatcher.cpp deleted file mode 100644 index 1aaba67ff593..000000000000 --- a/velox/common/testutil/OutputMatcher.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/common/testutil/OutputMatcher.h" - -#include - -#include -#include -#include - -void OutputMatcher::compareOutputs( - const std::string& testName, - const std::string& result, - const std::vector& expectedRegex) { - std::string line; - std::string eline; - std::istringstream iss(result); - int lineCount = 0; - int expectedLineIndex = 0; - for (; std::getline(iss, line);) { - lineCount++; - std::vector potentialLines; - auto expectedLine = expectedRegex.at(expectedLineIndex++); - while (!RE2::FullMatch(line, expectedLine.line)) { - potentialLines.push_back(expectedLine.line); - if (!expectedLine.optional) { - ASSERT_FALSE(true) << "Output did not match " << "Source:" << testName - << ", Line number:" << lineCount - << ", Line: " << line << ", Expected Line one of: " - << folly::join(",", potentialLines); - } - expectedLine = expectedRegex.at(expectedLineIndex++); - } - } - for (int i = expectedLineIndex; i < expectedRegex.size(); i++) { - ASSERT_TRUE(expectedRegex[expectedLineIndex].optional); - } -} diff --git a/velox/common/testutil/OutputMatcher.h b/velox/common/testutil/OutputMatcher.h deleted file mode 100644 index 792731c92133..000000000000 --- a/velox/common/testutil/OutputMatcher.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -struct ExpectedLine { - std::string line; - bool optional = false; -}; - -class OutputMatcher { - public: - static void compareOutputs( - const std::string& testName, - const std::string& result, - const std::vector& expectedRegex); -}; diff --git a/velox/exec/OutputBuffer.h b/velox/exec/OutputBuffer.h index fa28d743bd8c..640c1bfcd8e7 100644 --- a/velox/exec/OutputBuffer.h +++ b/velox/exec/OutputBuffer.h @@ -115,18 +115,6 @@ class DestinationBuffer { int64_t bytesSent{0}; int64_t rowsSent{0}; int64_t pagesSent{0}; - - std::string toString() const { - return fmt::format( - "[finished: {}, bytesBuffered: {}, rowsBuffered: {}, pagesBuffered: {}, bytesSent: {}, rowsSent: {}, pagesSent:{}]", - finished, - succinctBytes(bytesBuffered), - rowsBuffered, - pagesBuffered, - succinctBytes(bytesSent), - rowsSent, - pagesSent); - } }; void enqueue(std::shared_ptr data); @@ -266,29 +254,7 @@ class OutputBuffer { /// Stats of the OutputBuffer's destinations. std::vector buffersStats; - std::string toString() const { - std::string destinationBufferStats; - if (!buffersStats.empty()) { - for (int i = 0; i < buffersStats.size(); i++) { - auto& destinationBufferStat = buffersStats[i]; - destinationBufferStats += - fmt::format(" D{}: {}\n", i, destinationBufferStat.toString()); - } - } - - return fmt::format( - "[bufferedBytes: {}, bufferedPages: {}, " - "totalBytesSent: {}, totalRowsSent: {}, totalPagesSent: {}, " - "averageBufferTimeMs: {}, numTopBuffers: {}]\n{}", - succinctBytes(bufferedBytes), - bufferedPages, - succinctBytes(totalBytesSent), - totalRowsSent, - totalPagesSent, - averageBufferTimeMs, - numTopBuffers, - destinationBufferStats); - } + std::string toString() const; }; OutputBuffer( diff --git a/velox/exec/tests/CMakeLists.txt b/velox/exec/tests/CMakeLists.txt index 9683f5008376..2190fc5ae567 100644 --- a/velox/exec/tests/CMakeLists.txt +++ b/velox/exec/tests/CMakeLists.txt @@ -139,7 +139,6 @@ target_link_libraries( velox_hive_connector velox_memory velox_serialization - velox_test_output_matcher velox_test_util velox_type velox_type_test_lib diff --git a/velox/exec/tests/OutputBufferManagerTest.cpp b/velox/exec/tests/OutputBufferManagerTest.cpp index 6b6213d5934d..3902ead5ffc4 100644 --- a/velox/exec/tests/OutputBufferManagerTest.cpp +++ b/velox/exec/tests/OutputBufferManagerTest.cpp @@ -17,12 +17,12 @@ #include #include "folly/experimental/EventCount.h" #include "velox/common/base/tests/GTestUtils.h" -#include "velox/common/testutil/OutputMatcher.h" #include "velox/dwio/common/tests/utils/BatchMaker.h" #include "velox/exec/Task.h" #include "velox/exec/tests/utils/PlanBuilder.h" #include "velox/exec/tests/utils/SerializedPageUtil.h" #include "velox/serializers/CompactRowSerializer.h" +#include "velox/serializers/PrestoSerializer.h" #include "velox/serializers/UnsafeRowSerializer.h" using namespace facebook::velox; @@ -1323,42 +1323,6 @@ TEST_P(AllOutputBufferManagerTest, outputBufferUtilization) { verifyOutputBuffer(task, OutputBufferStatus::kFinished); } -TEST_P(AllOutputBufferManagerTest, printOutputBufferStats) { - const vector_size_t vectorSize = 100; - const std::string taskId = std::to_string(folly::Random::rand32()); - const int numDestinations = 4; - initializeTask( - taskId, - rowType_, - core::PartitionedOutputNode::Kind::kPartitioned, - numDestinations, - 1); - - const int numPages = numDestinations; - for (int pageId = 0; pageId < numPages; ++pageId) { - enqueue(taskId, pageId, rowType_, vectorSize); - fetchOneAndAck(taskId, pageId, 0); - } - - const auto statsEnqueue = getStats(taskId); - OutputMatcher::compareOutputs( - ::testing::UnitTest::GetInstance()->current_test_info()->name(), - statsEnqueue.toString(), - {{"\\[bufferedBytes: ([\\d.]+[KMGT]?[B]?), bufferedPages: (\\d+), totalBytesSent: ([\\d.]+[KMGT]?[B]?), totalRowsSent: (\\d+), totalPagesSent: (\\d+), averageBufferTimeMs: (\\d+), numTopBuffers: (\\d+)\\]"}, - {"\\s*D0: \\[finished: (true|false), bytesBuffered: ([\\d.]+[KMGT]?[B]?), rowsBuffered: (\\d+), pagesBuffered: (\\d+), bytesSent: ([\\d.]+[KMGT]?[B]?), rowsSent: (\\d+), pagesSent:(\\d+)\\]"}, - {"\\s*D1: \\[finished: (true|false), bytesBuffered: ([\\d.]+[KMGT]?[B]?), rowsBuffered: (\\d+), pagesBuffered: (\\d+), bytesSent: ([\\d.]+[KMGT]?[B]?), rowsSent: (\\d+), pagesSent:(\\d+)\\]"}, - {"\\s*D2: \\[finished: (true|false), bytesBuffered: ([\\d.]+[KMGT]?[B]?), rowsBuffered: (\\d+), pagesBuffered: (\\d+), bytesSent: ([\\d.]+[KMGT]?[B]?), rowsSent: (\\d+), pagesSent:(\\d+)\\]"}, - {"\\s*D3: \\[finished: (true|false), bytesBuffered: ([\\d.]+[KMGT]?[B]?), rowsBuffered: (\\d+), pagesBuffered: (\\d+), bytesSent: ([\\d.]+[KMGT]?[B]?), rowsSent: (\\d+), pagesSent:(\\d+)\\]"}}); - - bufferManager_->updateOutputBuffers(taskId, numDestinations, true); - noMoreData(taskId); - for (int pageId = 0; pageId < numPages; ++pageId) { - fetchEndMarker(taskId, pageId, 1); - deleteResults(taskId, pageId); - } - bufferManager_->removeTask(taskId); -} - TEST_P(AllOutputBufferManagerTest, outputBufferStats) { const vector_size_t vectorSize = 100; const std::string taskId = std::to_string(folly::Random::rand32()); @@ -1390,7 +1354,6 @@ TEST_P(AllOutputBufferManagerTest, outputBufferStats) { fetchOne(taskId, 0, pageId); } const auto statsEnqueue = getStats(taskId); - std::cout << statsEnqueue.toString(); ASSERT_EQ(statsEnqueue.buffersStats[0].pagesBuffered, 1); ASSERT_EQ(statsEnqueue.buffersStats[0].rowsBuffered, vectorSize); if (outputKind_ == core::PartitionedOutputNode::Kind::kBroadcast) { diff --git a/velox/exec/tests/PrintPlanWithStatsTest.cpp b/velox/exec/tests/PrintPlanWithStatsTest.cpp index f258e8c55c9a..eb9f9bff740c 100644 --- a/velox/exec/tests/PrintPlanWithStatsTest.cpp +++ b/velox/exec/tests/PrintPlanWithStatsTest.cpp @@ -14,13 +14,15 @@ * limitations under the License. */ -#include "velox/common/testutil/OutputMatcher.h" #include "velox/exec/PlanNodeStats.h" #include "velox/exec/tests/utils/AssertQueryBuilder.h" #include "velox/exec/tests/utils/HiveConnectorTestBase.h" #include "velox/exec/tests/utils/PlanBuilder.h" #include "velox/exec/tests/utils/TempDirectoryPath.h" +#include +#include + using namespace facebook::velox; using namespace facebook::velox::exec::test; @@ -28,6 +30,40 @@ using facebook::velox::exec::test::PlanBuilder; class PrintPlanWithStatsTest : public HiveConnectorTestBase {}; +struct ExpectedLine { + std::string line; + bool optional = false; +}; + +void compareOutputs( + const std::string& testName, + const std::string& result, + const std::vector& expectedRegex) { + std::string line; + std::string eline; + std::istringstream iss(result); + int lineCount = 0; + int expectedLineIndex = 0; + for (; std::getline(iss, line);) { + lineCount++; + std::vector potentialLines; + auto expectedLine = expectedRegex.at(expectedLineIndex++); + while (!RE2::FullMatch(line, expectedLine.line)) { + potentialLines.push_back(expectedLine.line); + if (!expectedLine.optional) { + ASSERT_FALSE(true) << "Output did not match " << "Source:" << testName + << ", Line number:" << lineCount + << ", Line: " << line << ", Expected Line one of: " + << folly::join(",", potentialLines); + } + expectedLine = expectedRegex.at(expectedLineIndex++); + } + } + for (int i = expectedLineIndex; i < expectedRegex.size(); i++) { + ASSERT_TRUE(expectedRegex[expectedLineIndex].optional); + } +} + void ensureTaskCompletion(exec::Task* task) { // ASSERT_TRUE requires a function with return type void. ASSERT_TRUE(waitForTaskCompletion(task)); @@ -93,7 +129,7 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) { "SELECT t.c0, t.c1 + 1, t.c1 + u.c1 FROM t, u WHERE t.c0 = u.c0"); ensureTaskCompletion(task.get()); - OutputMatcher::compareOutputs( + compareOutputs( ::testing::UnitTest::GetInstance()->current_test_info()->name(), printPlanWithStats(*op, task->taskStats()), {{"-- Project\\[4\\]\\[expressions: \\(c0:INTEGER, ROW\\[\"c0\"\\]\\), \\(p1:BIGINT, plus\\(ROW\\[\"c1\"\\],1\\)\\), \\(p2:BIGINT, plus\\(ROW\\[\"c1\"\\],ROW\\[\"u_c1\"\\]\\)\\)\\] -> c0:INTEGER, p1:BIGINT, p2:BIGINT"}, @@ -110,7 +146,7 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) { {" Input: 0 rows \\(.+\\), Output: 100 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: 0B, Memory allocations: .+, Threads: 1, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"}}); // with custom stats - OutputMatcher::compareOutputs( + compareOutputs( ::testing::UnitTest::GetInstance()->current_test_info()->name(), printPlanWithStats(*op, task->taskStats(), true), {{"-- Project\\[4\\]\\[expressions: \\(c0:INTEGER, ROW\\[\"c0\"\\]\\), \\(p1:BIGINT, plus\\(ROW\\[\"c1\"\\],1\\)\\), \\(p2:BIGINT, plus\\(ROW\\[\"c1\"\\],ROW\\[\"u_c1\"\\]\\)\\)\\] -> c0:INTEGER, p1:BIGINT, p2:BIGINT"}, @@ -216,7 +252,7 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) { .assertResults( "SELECT c5, max(c0), sum(c1), sum(c2), sum(c3), sum(c4) FROM tmp group by c5"); ensureTaskCompletion(task.get()); - OutputMatcher::compareOutputs( + compareOutputs( ::testing::UnitTest::GetInstance()->current_test_info()->name(), printPlanWithStats(*op, task->taskStats()), {{"-- Aggregation\\[1\\]\\[PARTIAL \\[c5\\] a0 := max\\(ROW\\[\"c0\"\\]\\), a1 := sum\\(ROW\\[\"c1\"\\]\\), a2 := sum\\(ROW\\[\"c2\"\\]\\), a3 := sum\\(ROW\\[\"c3\"\\]\\), a4 := sum\\(ROW\\[\"c4\"\\]\\)\\] -> c5:VARCHAR, a0:BIGINT, a1:BIGINT, a2:BIGINT, a3:DOUBLE, a4:DOUBLE"}, @@ -224,7 +260,7 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) { {" -- TableScan\\[0\\]\\[table: hive_table\\] -> c0:BIGINT, c1:INTEGER, c2:SMALLINT, c3:REAL, c4:DOUBLE, c5:VARCHAR"}, {" Input: 10000 rows \\(.+\\), Output: 10000 rows \\(.+\\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, Splits: 1, CPU breakdown: B/I/O/F (.+/.+/.+/.+)"}}); - OutputMatcher::compareOutputs( + compareOutputs( ::testing::UnitTest::GetInstance()->current_test_info()->name(), printPlanWithStats(*op, task->taskStats(), true), {{"-- Aggregation\\[1\\]\\[PARTIAL \\[c5\\] a0 := max\\(ROW\\[\"c0\"\\]\\), a1 := sum\\(ROW\\[\"c1\"\\]\\), a2 := sum\\(ROW\\[\"c2\"\\]\\), a3 := sum\\(ROW\\[\"c3\"\\]\\), a4 := sum\\(ROW\\[\"c4\"\\]\\)\\] -> c5:VARCHAR, a0:BIGINT, a1:BIGINT, a2:BIGINT, a3:DOUBLE, a4:DOUBLE"}, @@ -289,7 +325,7 @@ TEST_F(PrintPlanWithStatsTest, tableWriterWithTableScan) { .splits(makeHiveConnectorSplits({filePath})) .copyResults(pool(), task); ensureTaskCompletion(task.get()); - OutputMatcher::compareOutputs( + compareOutputs( ::testing::UnitTest::GetInstance()->current_test_info()->name(), printPlanWithStats(*writePlan, task->taskStats()), {{R"(-- TableWrite\[1\]\[.+InsertTableHandle .+)"}, @@ -297,7 +333,7 @@ TEST_F(PrintPlanWithStatsTest, tableWriterWithTableScan) { {R"( -- TableScan\[0\]\[table: hive_table\] -> c0:BIGINT, c1:INTEGER, c2:SMALLINT, c3:REAL, c4:DOUBLE, c5:VARCHAR)"}, {R"( Input: 100 rows \(.+\), Output: 100 rows \(.+\), Cpu time: .+, Blocked wall time: .+, Peak memory: .+, Memory allocations: .+, Threads: 1, Splits: 1, CPU breakdown: B/I/O/F (.+/.+/.+/.+))"}}); - OutputMatcher::compareOutputs( + compareOutputs( ::testing::UnitTest::GetInstance()->current_test_info()->name(), printPlanWithStats(*writePlan, task->taskStats(), true), {{R"(-- TableWrite\[1\]\[.+InsertTableHandle .+)"},