From 5629a0735ed5dab6dc25edc8d7bdc611a7e0ded9 Mon Sep 17 00:00:00 2001 From: Bikramjeet Vig Date: Thu, 10 Aug 2023 22:25:48 -0700 Subject: [PATCH] Log task Id when a fatal signal is received Test Plan: Modified existing tests --- CMakeLists.txt | 5 +++-- velox/common/process/ThreadDebugInfo.cpp | 7 +++++-- velox/common/process/ThreadDebugInfo.h | 1 + velox/exec/Driver.cpp | 2 +- velox/exec/Task.cpp | 2 +- velox/exec/tests/ThreadDebugInfoTest.cpp | 9 +++++---- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd7753d893ef..b9fec7d858a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -463,10 +463,11 @@ endif() # not provide new enough versions for us to use. if(CMAKE_HOST_SYSTEM_NAME MATCHES "Darwin") execute_process( - COMMAND brew --prefix bison + COMMAND /opt/homebrew/bin/brew --prefix bison RESULT_VARIABLE BREW_BISON OUTPUT_VARIABLE BREW_BISON_PREFIX OUTPUT_STRIP_TRAILING_WHITESPACE) + message(BREW_BISON="${BREW_BISON}") if(BREW_BISON EQUAL 0 AND EXISTS "${BREW_BISON_PREFIX}") message( STATUS "Found Bison keg installed by Homebrew at ${BREW_BISON_PREFIX}") @@ -474,7 +475,7 @@ if(CMAKE_HOST_SYSTEM_NAME MATCHES "Darwin") endif() execute_process( - COMMAND brew --prefix flex + COMMAND /opt/homebrew/bin/brew --prefix flex RESULT_VARIABLE BREW_FLEX OUTPUT_VARIABLE BREW_FLEX_PREFIX OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/velox/common/process/ThreadDebugInfo.cpp b/velox/common/process/ThreadDebugInfo.cpp index d8bc751f86c0..20ad2d7cfb84 100644 --- a/velox/common/process/ThreadDebugInfo.cpp +++ b/velox/common/process/ThreadDebugInfo.cpp @@ -30,9 +30,12 @@ static void printCurrentQueryId() { "ThreadDebugInfo object not found."; write(STDERR_FILENO, msg, strlen(msg)); } else { - const char* msg = "Fatal signal handler. Query Id= "; - write(STDERR_FILENO, msg, strlen(msg)); + const char* msg1 = "Fatal signal handler. Query Id= "; + write(STDERR_FILENO, msg1, strlen(msg1)); write(STDERR_FILENO, info->queryId_.c_str(), info->queryId_.length()); + const char* msg2 = " Task Id= "; + write(STDERR_FILENO, msg2, strlen(msg2)); + write(STDERR_FILENO, info->taskId_.c_str(), info->taskId_.length()); } write(STDERR_FILENO, "\n", 1); } diff --git a/velox/common/process/ThreadDebugInfo.h b/velox/common/process/ThreadDebugInfo.h index 57a6e6cdf921..03965741ce97 100644 --- a/velox/common/process/ThreadDebugInfo.h +++ b/velox/common/process/ThreadDebugInfo.h @@ -24,6 +24,7 @@ namespace facebook::velox::process { // handler and logged. struct ThreadDebugInfo { std::string queryId_; + std::string taskId_; }; // A RAII class to store thread local debug information. diff --git a/velox/exec/Driver.cpp b/velox/exec/Driver.cpp index 72d765260da2..7c130a59f2bf 100644 --- a/velox/exec/Driver.cpp +++ b/velox/exec/Driver.cpp @@ -41,7 +41,7 @@ DriverCtx::DriverCtx( splitGroupId(_splitGroupId), partitionId(_partitionId), task(_task), - threadDebugInfo({.queryId_ = task->queryCtx()->queryId()}) {} + threadDebugInfo({task->queryCtx()->queryId(), task->taskId()}) {} const core::QueryConfig& DriverCtx::queryConfig() const { return task->queryCtx()->queryConfig(); diff --git a/velox/exec/Task.cpp b/velox/exec/Task.cpp index a611e50645df..ac8b3ce5c81f 100644 --- a/velox/exec/Task.cpp +++ b/velox/exec/Task.cpp @@ -540,7 +540,7 @@ void Task::start( uint32_t maxDrivers, uint32_t concurrentSplitGroups) { facebook::velox::process::ThreadDebugInfo threadDebugInfo{ - .queryId_ = self->queryCtx()->queryId()}; + self->queryCtx()->queryId(), self->taskId_}; facebook::velox::process::ScopedThreadDebugInfo scopedInfo(threadDebugInfo); try { VELOX_CHECK_GE( diff --git a/velox/exec/tests/ThreadDebugInfoTest.cpp b/velox/exec/tests/ThreadDebugInfoTest.cpp index 8721f5ee61ac..aedc34545df9 100644 --- a/velox/exec/tests/ThreadDebugInfoTest.cpp +++ b/velox/exec/tests/ThreadDebugInfoTest.cpp @@ -63,10 +63,10 @@ TEST_F(ThreadDebugInfoDeathTest, withinSeperateDriverThread) { auto vector = makeRowVector({makeFlatVector({1, 2, 3, 4, 5, 6})}); registerFunction({"segFault"}); auto op = PlanBuilder().values({vector}).project({"segFault(c0)"}).planNode(); - + assertQuery(op, vector); ASSERT_DEATH( (assertQuery(op, vector)), - ".*Fatal signal handler. Query Id= TaskCursorQuery_0.*"); + ".*Fatal signal handler. Query Id= TaskCursorQuery_0 Task Id= test_cursor 1.*"); } TEST_F(ThreadDebugInfoDeathTest, withinQueryCompilation) { @@ -78,7 +78,7 @@ TEST_F(ThreadDebugInfoDeathTest, withinQueryCompilation) { ASSERT_DEATH( (assertQuery(op, vector)), - ".*Fatal signal handler. Query Id= TaskCursorQuery_0.*"); + ".*Fatal signal handler. Query Id= TaskCursorQuery_0 Task Id= test_cursor 1.*"); } TEST_F(ThreadDebugInfoDeathTest, withinTheCallingThread) { @@ -99,7 +99,8 @@ TEST_F(ThreadDebugInfoDeathTest, withinTheCallingThread) { "single.execution.task.0", std::move(plan), 0, queryCtx); ASSERT_DEATH( - (task->next()), ".*Fatal signal handler. Query Id= TaskCursorQuery_0.*"); + (task->next()), + ".*Fatal signal handler. Query Id= TaskCursorQuery_0 Task Id= single.execution.task.0.*"); } #endif