From cc60822f39967d6dc05e8ca61e78e00957ba9db1 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Thu, 22 Jun 2023 19:04:02 -0700 Subject: [PATCH] Added a monitor for the results filesystem Also, extended the Web Dashboard to display the filesystem status and usage statistics. --- src/wbase/FileChannelShared.cc | 39 ++++ src/wbase/FileChannelShared.h | 4 + src/wpublish/GetStatusCommand.cc | 2 + src/www/dashboard.html | 2 +- .../css/QservWorkerResultsFilesystem.css | 24 +++ src/www/qserv/js/QservMonitoringDashboard.js | 3 + .../qserv/js/QservWorkerResultsFilesystem.js | 193 ++++++++++++++++++ 7 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 src/www/qserv/css/QservWorkerResultsFilesystem.css create mode 100644 src/www/qserv/js/QservWorkerResultsFilesystem.js diff --git a/src/wbase/FileChannelShared.cc b/src/wbase/FileChannelShared.cc index 111394e094..1f4951d5b9 100644 --- a/src/wbase/FileChannelShared.cc +++ b/src/wbase/FileChannelShared.cc @@ -43,6 +43,7 @@ #include "lsst/log/Log.h" using namespace std; +using namespace nlohmann; namespace fs = boost::filesystem; namespace wconfig = lsst::qserv::wconfig; @@ -146,6 +147,44 @@ void FileChannelShared::cleanUpResults(QueryId queryId) { context << "removed " << numFilesRemoved << " result files from " << dirPath << "."); } +json FileChannelShared::statusToJson() { + string const context = "FileChannelShared::" + string(__func__) + " "; + auto const config = wconfig::WorkerConfig::instance(); + string const protocol = wconfig::WorkerConfig::protocol2str(config->resultDeliveryProtocol()); + fs::path const dirPath = config->resultsDirname(); + json result = json::object({{"protocol", protocol}, + {"folder", dirPath.string()}, + {"capacity_bytes", -1}, + {"free_bytes", -1}, + {"available_bytes", -1}, + {"num_result_files", -1}, + {"size_result_files_bytes", -1}}); + lock_guard const lock(_resultsDirCleanupMtx); + try { + auto const space = fs::space(dirPath); + result["capacity_bytes"] = space.capacity; + result["free_bytes"] = space.free; + result["available_bytes"] = space.available; + uintmax_t sizeResultFilesBytes = 0; + uintmax_t numResultFiles = 0; + string const ext = ".proto"; + auto itr = fs::directory_iterator(dirPath); + for (auto&& entry : boost::make_iterator_range(itr, {})) { + auto const filePath = entry.path(); + if (filePath.has_filename() && filePath.has_extension() && (filePath.extension() == ext)) { + numResultFiles++; + sizeResultFilesBytes += fs::file_size(filePath); + } + } + result["num_result_files"] = numResultFiles; + result["size_result_files_bytes"] = sizeResultFilesBytes; + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to get folder stats for " << dirPath << ", ex: " << ex.what()); + } + return result; +} + FileChannelShared::Ptr FileChannelShared::create(shared_ptr const& sendChannel, shared_ptr const& transmitMgr, shared_ptr const& taskMsg) { diff --git a/src/wbase/FileChannelShared.h b/src/wbase/FileChannelShared.h index db6dbb98e0..c86e84a902 100644 --- a/src/wbase/FileChannelShared.h +++ b/src/wbase/FileChannelShared.h @@ -31,6 +31,7 @@ // Third-party headers #include +#include "nlohmann/json.hpp" // Qserv headers #include "global/intTypes.h" @@ -94,6 +95,9 @@ class FileChannelShared : public ChannelShared { */ static void cleanUpResults(QueryId queryId); + /// @return Status and statistics on the results folder (capacity, usage, etc.) + static nlohmann::json statusToJson(); + /// The factory method for the channel class. static Ptr create(std::shared_ptr const& sendChannel, std::shared_ptr const& transmitMgr, diff --git a/src/wpublish/GetStatusCommand.cc b/src/wpublish/GetStatusCommand.cc index 174602b86f..54ccec4c57 100644 --- a/src/wpublish/GetStatusCommand.cc +++ b/src/wpublish/GetStatusCommand.cc @@ -26,6 +26,7 @@ // Qserv headers #include "proto/worker.pb.h" +#include "wbase/FileChannelShared.h" #include "wbase/MsgProcessor.h" #include "wbase/SendChannel.h" #include "wpublish/ResourceMonitor.h" @@ -58,6 +59,7 @@ void GetStatusCommand::run() { nlohmann::json result; result["processor"] = _processor->statusToJson(_taskSelector); result["resources"] = _resourceMonitor->statusToJson(); + result["filesystem"] = wbase::FileChannelShared::statusToJson(); proto::WorkerCommandGetStatusR reply; reply.set_info(result.dump()); diff --git a/src/www/dashboard.html b/src/www/dashboard.html index ee076bf147..6e81101f74 100644 --- a/src/www/dashboard.html +++ b/src/www/dashboard.html @@ -4,7 +4,7 @@ Qserv monitoring dashboard - + diff --git a/src/www/qserv/css/QservWorkerResultsFilesystem.css b/src/www/qserv/css/QservWorkerResultsFilesystem.css new file mode 100644 index 0000000000..8f1a2669b6 --- /dev/null +++ b/src/www/qserv/css/QservWorkerResultsFilesystem.css @@ -0,0 +1,24 @@ +#fwk-qserv-results-filesystem-controls label { + font-weight: bold; +} +table#fwk-qserv-results-filesystem caption { + caption-side: top; + text-align: right; + padding-top: 0; +} +table#fwk-qserv-results-filesystem tbody th, +table#fwk-qserv-results-filesystem tbody td { + vertical-align:middle; +} +table#fwk-qserv-results-filesystem pre { + padding: 0; + margin: 0; +} +table#fwk-qserv-results-filesystem caption.updating { + background-color: #ffeeba; +} +table#fwk-qserv-results-filesystem > thead > tr > th.sticky { + position:sticky; + top:80px; + z-index:2; +} diff --git a/src/www/qserv/js/QservMonitoringDashboard.js b/src/www/qserv/js/QservMonitoringDashboard.js index c37147752e..2facd2a454 100644 --- a/src/www/qserv/js/QservMonitoringDashboard.js +++ b/src/www/qserv/js/QservMonitoringDashboard.js @@ -48,6 +48,7 @@ require([ 'qserv/QservWorkerSchedulerHist', 'qserv/QservWorkerTasks', 'qserv/QservWorkerTaskHist', + 'qserv/QservWorkerResultsFilesystem', 'qserv/ReplicationController', 'qserv/ReplicationTools', 'qserv/ReplicationConfigGeneral', @@ -85,6 +86,7 @@ function(CSSLoader, QservWorkerSchedulerHist, QservWorkerTasks, QservWorkerTaskHist, + QservWorkerResultsFilesystem, ReplicationController, ReplicationTools, ReplicationConfigGeneral, @@ -173,6 +175,7 @@ function(CSSLoader, new QservWorkerSchedulerHist('Scheduler Histograms'), new QservWorkerTasks('Tasks'), new QservWorkerTaskHist('Task Histograms'), + new QservWorkerResultsFilesystem('Results Filesystem'), new QservCss('CSS') ] } diff --git a/src/www/qserv/js/QservWorkerResultsFilesystem.js b/src/www/qserv/js/QservWorkerResultsFilesystem.js new file mode 100644 index 0000000000..9846e20fbe --- /dev/null +++ b/src/www/qserv/js/QservWorkerResultsFilesystem.js @@ -0,0 +1,193 @@ +define([ + 'webfwk/CSSLoader', + 'webfwk/Fwk', + 'webfwk/FwkApplication', + 'qserv/Common', + 'underscore'], + +function(CSSLoader, + Fwk, + FwkApplication, + Common, + _) { + + CSSLoader.load('qserv/css/QservWorkerResultsFilesystem.css'); + + class QservWorkerResultsFilesystem extends FwkApplication { + + constructor(name) { + super(name); + } + fwk_app_on_show() { + console.log('show: ' + this.fwk_app_name); + this.fwk_app_on_update(); + } + fwk_app_on_hide() { + console.log('hide: ' + this.fwk_app_name); + } + fwk_app_on_update() { + if (this.fwk_app_visible) { + this._init(); + if (this._prev_update_sec === undefined) { + this._prev_update_sec = 0; + } + let now_sec = Fwk.now().sec; + if (now_sec - this._prev_update_sec > this._update_interval_sec()) { + this._prev_update_sec = now_sec; + this._init(); + this._load(); + } + } + } + _init() { + if (this._initialized === undefined) this._initialized = false; + if (this._initialized) return; + this._initialized = true; + let html = ` +
+
+
+
+ + +
+
+ + +
+
+
+
+
+
+ + + + + + + + + + + + + + + + +
workerprotocolfoldercapacity [GB]free [GB]available [GB]used [%]#filessize [GB]
Loading...
+
+
`; + let cont = this.fwk_app_container.html(html); + cont.find(".form-control-selector").change(() => { + this._load(); + }); + cont.find("button#reset-controls-form").click(() => { + this._set_update_interval_sec(10); + this._load(); + }); + } + _form_control(elem_type, id) { + if (this._form_control_obj === undefined) this._form_control_obj = {}; + if (!_.has(this._form_control_obj, id)) { + this._form_control_obj[id] = this.fwk_app_container.find(elem_type + '#' + id); + } + return this._form_control_obj[id]; + } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } + _set_update_interval_sec(val) { this._form_control('select', 'update-interval').val(val); } + + /** + * Table for displaying info on MySQL connections that are being open at workers. + */ + _table() { + if (this._table_obj === undefined) { + this._table_obj = this.fwk_app_container.find('table#fwk-qserv-results-filesystem'); + } + return this._table_obj; + } + + /** + * Load data from a web service then render it to the application's page. + */ + _load() { + if (this._loading === undefined) this._loading = false; + if (this._loading) return; + this._loading = true; + + this._table().children('caption').addClass('updating'); + + Fwk.web_service_GET( + "/replication/qserv/worker/status", + {timeout_sec: 2, version: Common.RestAPIVersion}, + (data) => { + this._display(data.status); + Fwk.setLastUpdate(this._table().children('caption')); + this._table().children('caption').removeClass('updating'); + this._loading = false; + }, + (msg) => { + console.log('request failed', this.fwk_app_name, msg); + this._table().children('caption').html('No Response'); + this._table().children('caption').removeClass('updating'); + this._loading = false; + } + ); + } + + /** + * Display MySQL connections + */ + _display(data) { + let html = ''; + for (let worker in data) { + if (!data[worker].success) { + html += ` + + ${worker} +   +   +   +   +   +   +   +   +`; + } else { + let filesystem = data[worker].info.filesystem; + const used = (filesystem.capacity_bytes > 0 && filesystem.available_bytes > 0) ? + (100.0 * (filesystem.capacity_bytes - filesystem.available_bytes) / filesystem.capacity_bytes).toFixed(1) : + -1; + html += ` + + ${worker} + ${filesystem.protocol} + ${filesystem.folder} +
${QservWorkerResultsFilesystem._bytes2gb(filesystem.capacity_bytes)}
+
${QservWorkerResultsFilesystem._bytes2gb(filesystem.free_bytes)}
+
${QservWorkerResultsFilesystem._bytes2gb(filesystem.available_bytes)}
+
${used}
+
${filesystem.num_result_files}
+
${QservWorkerResultsFilesystem._bytes2gb(filesystem.size_result_files_bytes)}
+`; + } + } + this._table().children('tbody').html(html); + } + static _GiB = 1024 * 1024 * 1024; + static _bytes2gb(bytes) { + return bytes < 0 ? bytes : (bytes / QservWorkerResultsFilesystem._GiB).toFixed(1); + } + } + return QservWorkerResultsFilesystem; +});