From 1c17f162a794a5f799f321d124de491ae43f90a1 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Tue, 15 Aug 2023 00:12:31 +0000 Subject: [PATCH] Refined states of the worker tasks Introduced an additional state STARTED to differentiate between two events - when a task was started by a scheduler from the actual start time of the corresponding MySQL query. The change is meant to improve the monitoring of the worker tasks. The corresponding monitoring page of the Web Dashboard was modified as well. --- src/wbase/Task.cc | 12 +++++++++-- src/wbase/Task.h | 16 +++++++++------ src/wbase/TaskState.h | 13 +++++++++++- src/wdb/QueryRunner.cc | 1 + src/www/qserv/js/QservWorkerTasks.js | 30 ++++++++++++++++++---------- 5 files changed, 52 insertions(+), 20 deletions(-) diff --git a/src/wbase/Task.cc b/src/wbase/Task.cc index de6640918..c33a78f57 100644 --- a/src/wbase/Task.cc +++ b/src/wbase/Task.cc @@ -383,6 +383,7 @@ void Task::queued(std::chrono::system_clock::time_point const& now) { bool Task::isRunning() const { std::lock_guard lock(_stateMtx); switch (_state) { + case TaskState::STARTED: case TaskState::EXECUTING_QUERY: case TaskState::READING_DATA: return true; @@ -391,13 +392,18 @@ bool Task::isRunning() const { } } -/// Set values associated with the Task being started. void Task::started(std::chrono::system_clock::time_point const& now) { std::lock_guard guard(_stateMtx); - _state = TaskState::EXECUTING_QUERY; + _state = TaskState::STARTED; _startTime = now; } +void Task::queryExecutionStarted() { + std::lock_guard guard(_stateMtx); + _state = TaskState::EXECUTING_QUERY; + _queryExecTime = std::chrono::system_clock::now(); +} + void Task::queried() { std::lock_guard guard(_stateMtx); _state = TaskState::READING_DATA; @@ -430,6 +436,7 @@ std::chrono::milliseconds Task::getRunTime() const { switch (_state) { case TaskState::FINISHED: return std::chrono::duration_cast(_finishTime - _startTime); + case TaskState::STARTED: case TaskState::EXECUTING_QUERY: case TaskState::READING_DATA: return std::chrono::duration_cast(std::chrono::system_clock::now() - @@ -483,6 +490,7 @@ nlohmann::json Task::getJson() const { js["createTime_msec"] = util::TimeUtils::tp2ms(_createTime); js["queueTime_msec"] = util::TimeUtils::tp2ms(_queueTime); js["startTime_msec"] = util::TimeUtils::tp2ms(_startTime); + js["queryExecTime_msec"] = util::TimeUtils::tp2ms(_queryExecTime); js["queryTime_msec"] = util::TimeUtils::tp2ms(_queryTime); js["finishTime_msec"] = util::TimeUtils::tp2ms(_finishTime); js["sizeSoFar"] = _totalSize; diff --git a/src/wbase/Task.h b/src/wbase/Task.h index 849714b2c..2365a9db2 100644 --- a/src/wbase/Task.h +++ b/src/wbase/Task.h @@ -252,6 +252,9 @@ class Task : public util::CommandForThreadPool { void queued(std::chrono::system_clock::time_point const& now); void started(std::chrono::system_clock::time_point const& now); + /// The actual execution of the corresponding MySQL query (or queries) started. + void queryExecutionStarted(); + /// MySQL finished executing queries. void queried(); @@ -335,12 +338,13 @@ class Task : public util::CommandForThreadPool { mutable std::mutex _stateMtx; ///< Mutex to protect state related members _state, _???Time. std::atomic _state{TaskState::CREATED}; std::chrono::system_clock::time_point _createTime = - std::chrono::system_clock::now(); ///< task was created - std::chrono::system_clock::time_point _queueTime; ///< task was queued - std::chrono::system_clock::time_point _startTime; ///< task processing started - std::chrono::system_clock::time_point _queryTime; ///< MySQL finished executing queries - std::chrono::system_clock::time_point _finishTime; ///< data transmission to Czar fiished - size_t _totalSize = 0; ///< Total size of the result so far. + std::chrono::system_clock::now(); ///< task was created + std::chrono::system_clock::time_point _queueTime; ///< task was queued + std::chrono::system_clock::time_point _startTime; ///< task processing started + std::chrono::system_clock::time_point _queryExecTime; ///< query execution at MySQL started + std::chrono::system_clock::time_point _queryTime; ///< MySQL finished executing queries + std::chrono::system_clock::time_point _finishTime; ///< data transmission to Czar fiished + size_t _totalSize = 0; ///< Total size of the result so far. /// Stores information on the query's resource usage. std::weak_ptr _queryStats; diff --git a/src/wbase/TaskState.h b/src/wbase/TaskState.h index 6dae81896..e53e1ab98 100644 --- a/src/wbase/TaskState.h +++ b/src/wbase/TaskState.h @@ -42,7 +42,14 @@ namespace lsst::qserv::wbase { * introducing an additional (Protobuf) representation for those, or converting * the values to strings and vs. */ -enum class TaskState : std::uint64_t { CREATED = 0, QUEUED, EXECUTING_QUERY, READING_DATA, FINISHED }; +enum class TaskState : std::uint64_t { + CREATED = 0, + QUEUED, + STARTED, + EXECUTING_QUERY, + READING_DATA, + FINISHED +}; /// @return The string representation of the input state. /// @throw std::invalid_argument If the string can't be parsed into a valid state. @@ -52,6 +59,8 @@ inline std::string taskState2str(TaskState state) { return "CREATED"; case TaskState::QUEUED: return "QUEUED"; + case TaskState::STARTED: + return "STARTED"; case TaskState::EXECUTING_QUERY: return "EXECUTING_QUERY"; case TaskState::READING_DATA: @@ -71,6 +80,8 @@ inline TaskState str2taskState(std::string const& state) { return TaskState::CREATED; else if (state == "QUEUED") return TaskState::QUEUED; + else if (state == "STARTED") + return TaskState::STARTED; else if (state == "EXECUTING_QUERY") return TaskState::EXECUTING_QUERY; else if (state == "READING_DATA") diff --git a/src/wdb/QueryRunner.cc b/src/wdb/QueryRunner.cc index 60cc11317..49fa1bc8e 100644 --- a/src/wdb/QueryRunner.cc +++ b/src/wdb/QueryRunner.cc @@ -282,6 +282,7 @@ bool QueryRunner::_dispatchChannel() { string const& query = _task->getQueryString(); util::Timer primeT; primeT.start(); + _task->queryExecutionStarted(); MYSQL_RES* res = _primeResult(query); // This runs the SQL query, throws SqlErrorObj on failure. primeT.stop(); needToFreeRes = true; diff --git a/src/www/qserv/js/QservWorkerTasks.js b/src/www/qserv/js/QservWorkerTasks.js index 5dd167afc..7f3d55293 100644 --- a/src/www/qserv/js/QservWorkerTasks.js +++ b/src/www/qserv/js/QservWorkerTasks.js @@ -51,7 +51,7 @@ function(CSSLoader, -
+
@@ -62,15 +62,17 @@ function(CSSLoader,
-
+
@@ -133,6 +135,8 @@ function(CSSLoader, started + query started + queried finished @@ -150,7 +154,7 @@ function(CSSLoader, }); cont.find("button#reset-tasks-form").click(() => { this._set_query(''); - this._set_state("EXECUTING_QUERY,READING_DATA"); + this._set_state("STARTED,EXECUTING_QUERY,READING_DATA"); this._set_max_tasks(200); this._set_update_interval_sec(10); this._load(); @@ -328,7 +332,9 @@ function(CSSLoader,
${task.queueTime_msec ? QservWorkerTasks._timestamp2hhmmss(task.queueTime_msec) : ""}
${QservWorkerTasks._timestamps_diff2str(task.queueTime_msec, task.startTime_msec, snapshotTime_msec)}
${task.startTime_msec ? QservWorkerTasks._timestamp2hhmmss(task.startTime_msec) : ""}
-
${QservWorkerTasks._timestamps_diff2str(task.startTime_msec, task.queryTime_msec, snapshotTime_msec)}
+
${QservWorkerTasks._timestamps_diff2str(task.startTime_msec, task.queryExecTime_msec, snapshotTime_msec)}
+
${task.queryExecTime_msec ? QservWorkerTasks._timestamp2hhmmss(task.queryExecTime_msec) : ""}
+
${QservWorkerTasks._timestamps_diff2str(task.queryExecTime_msec, task.queryTime_msec, snapshotTime_msec)}
${task.queryTime_msec ? QservWorkerTasks._timestamp2hhmmss(task.queryTime_msec) : ""}
${QservWorkerTasks._timestamps_diff2str(task.queryTime_msec, task.finishTime_msec, snapshotTime_msec)}
${task.finishTime_msec ? QservWorkerTasks._timestamp2hhmmss(task.finishTime_msec) : ""}
@@ -362,9 +368,10 @@ function(CSSLoader, switch (state) { case 0: return 'table-warning'; case 1: return 'table-light'; - case 2: return 'table-primary'; - case 3: return 'table-info'; - case 4: return 'table-secondary'; + case 2: return 'table-danger'; + case 3: return 'table-primary'; + case 4: return 'table-info'; + case 5: return 'table-secondary'; default: return 'table-warning'; } } @@ -372,9 +379,10 @@ function(CSSLoader, switch (state) { case 0: return 'CREATED'; case 1: return 'QUEUED'; - case 2: return 'EXECUTING_QUERY'; - case 3: return 'READING_DATA'; - case 4: return 'FINISHED'; + case 2: return 'STARTED'; + case 3: return 'EXECUTING_QUERY'; + case 4: return 'READING_DATA'; + case 5: return 'FINISHED'; default: return 'UNKNOWN'; } }