Skip to content

Commit

Permalink
Added a monitor for the results filesystem
Browse files Browse the repository at this point in the history
Also, extended the Web Dashboard to display the filesystem status
and usage statistics.
  • Loading branch information
iagaponenko committed Jun 23, 2023
1 parent 74e5187 commit cc60822
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 1 deletion.
39 changes: 39 additions & 0 deletions src/wbase/FileChannelShared.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "lsst/log/Log.h"

using namespace std;
using namespace nlohmann;
namespace fs = boost::filesystem;
namespace wconfig = lsst::qserv::wconfig;

Expand Down Expand Up @@ -146,6 +147,44 @@ void FileChannelShared::cleanUpResults(QueryId queryId) {
context << "removed " << numFilesRemoved << " result files from " << dirPath << ".");
}

json FileChannelShared::statusToJson() {
string const context = "FileChannelShared::" + string(__func__) + " ";
auto const config = wconfig::WorkerConfig::instance();
string const protocol = wconfig::WorkerConfig::protocol2str(config->resultDeliveryProtocol());
fs::path const dirPath = config->resultsDirname();
json result = json::object({{"protocol", protocol},
{"folder", dirPath.string()},
{"capacity_bytes", -1},
{"free_bytes", -1},
{"available_bytes", -1},
{"num_result_files", -1},
{"size_result_files_bytes", -1}});
lock_guard<mutex> const lock(_resultsDirCleanupMtx);
try {
auto const space = fs::space(dirPath);
result["capacity_bytes"] = space.capacity;
result["free_bytes"] = space.free;
result["available_bytes"] = space.available;
uintmax_t sizeResultFilesBytes = 0;
uintmax_t numResultFiles = 0;
string const ext = ".proto";
auto itr = fs::directory_iterator(dirPath);
for (auto&& entry : boost::make_iterator_range(itr, {})) {
auto const filePath = entry.path();
if (filePath.has_filename() && filePath.has_extension() && (filePath.extension() == ext)) {
numResultFiles++;
sizeResultFilesBytes += fs::file_size(filePath);
}
}
result["num_result_files"] = numResultFiles;
result["size_result_files_bytes"] = sizeResultFilesBytes;
} catch (exception const& ex) {
LOGS(_log, LOG_LVL_WARN,
context << "failed to get folder stats for " << dirPath << ", ex: " << ex.what());
}
return result;
}

FileChannelShared::Ptr FileChannelShared::create(shared_ptr<wbase::SendChannel> const& sendChannel,
shared_ptr<wcontrol::TransmitMgr> const& transmitMgr,
shared_ptr<proto::TaskMsg> const& taskMsg) {
Expand Down
4 changes: 4 additions & 0 deletions src/wbase/FileChannelShared.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

// Third-party headers
#include <mysql/mysql.h>
#include "nlohmann/json.hpp"

// Qserv headers
#include "global/intTypes.h"
Expand Down Expand Up @@ -94,6 +95,9 @@ class FileChannelShared : public ChannelShared {
*/
static void cleanUpResults(QueryId queryId);

/// @return Status and statistics on the results folder (capacity, usage, etc.)
static nlohmann::json statusToJson();

/// The factory method for the channel class.
static Ptr create(std::shared_ptr<wbase::SendChannel> const& sendChannel,
std::shared_ptr<wcontrol::TransmitMgr> const& transmitMgr,
Expand Down
2 changes: 2 additions & 0 deletions src/wpublish/GetStatusCommand.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

// Qserv headers
#include "proto/worker.pb.h"
#include "wbase/FileChannelShared.h"
#include "wbase/MsgProcessor.h"
#include "wbase/SendChannel.h"
#include "wpublish/ResourceMonitor.h"
Expand Down Expand Up @@ -58,6 +59,7 @@ void GetStatusCommand::run() {
nlohmann::json result;
result["processor"] = _processor->statusToJson(_taskSelector);
result["resources"] = _resourceMonitor->statusToJson();
result["filesystem"] = wbase::FileChannelShared::statusToJson();

proto::WorkerCommandGetStatusR reply;
reply.set_info(result.dump());
Expand Down
2 changes: 1 addition & 1 deletion src/www/dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<title>Qserv monitoring dashboard</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script data-main="qserv/js/QservMonitoringDashboard.js?bust=64" src="https://requirejs.org/docs/release/2.3.6/minified/require.js"></script>
<script data-main="qserv/js/QservMonitoringDashboard.js?bust=65" src="https://requirejs.org/docs/release/2.3.6/minified/require.js"></script>
</head>
<body></body>
</html>
24 changes: 24 additions & 0 deletions src/www/qserv/css/QservWorkerResultsFilesystem.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#fwk-qserv-results-filesystem-controls label {
font-weight: bold;
}
table#fwk-qserv-results-filesystem caption {
caption-side: top;
text-align: right;
padding-top: 0;
}
table#fwk-qserv-results-filesystem tbody th,
table#fwk-qserv-results-filesystem tbody td {
vertical-align:middle;
}
table#fwk-qserv-results-filesystem pre {
padding: 0;
margin: 0;
}
table#fwk-qserv-results-filesystem caption.updating {
background-color: #ffeeba;
}
table#fwk-qserv-results-filesystem > thead > tr > th.sticky {
position:sticky;
top:80px;
z-index:2;
}
3 changes: 3 additions & 0 deletions src/www/qserv/js/QservMonitoringDashboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ require([
'qserv/QservWorkerSchedulerHist',
'qserv/QservWorkerTasks',
'qserv/QservWorkerTaskHist',
'qserv/QservWorkerResultsFilesystem',
'qserv/ReplicationController',
'qserv/ReplicationTools',
'qserv/ReplicationConfigGeneral',
Expand Down Expand Up @@ -85,6 +86,7 @@ function(CSSLoader,
QservWorkerSchedulerHist,
QservWorkerTasks,
QservWorkerTaskHist,
QservWorkerResultsFilesystem,
ReplicationController,
ReplicationTools,
ReplicationConfigGeneral,
Expand Down Expand Up @@ -173,6 +175,7 @@ function(CSSLoader,
new QservWorkerSchedulerHist('Scheduler Histograms'),
new QservWorkerTasks('Tasks'),
new QservWorkerTaskHist('Task Histograms'),
new QservWorkerResultsFilesystem('Results Filesystem'),
new QservCss('CSS')
]
}
Expand Down
193 changes: 193 additions & 0 deletions src/www/qserv/js/QservWorkerResultsFilesystem.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
define([
'webfwk/CSSLoader',
'webfwk/Fwk',
'webfwk/FwkApplication',
'qserv/Common',
'underscore'],

function(CSSLoader,
Fwk,
FwkApplication,
Common,
_) {

CSSLoader.load('qserv/css/QservWorkerResultsFilesystem.css');

class QservWorkerResultsFilesystem extends FwkApplication {

constructor(name) {
super(name);
}
fwk_app_on_show() {
console.log('show: ' + this.fwk_app_name);
this.fwk_app_on_update();
}
fwk_app_on_hide() {
console.log('hide: ' + this.fwk_app_name);
}
fwk_app_on_update() {
if (this.fwk_app_visible) {
this._init();
if (this._prev_update_sec === undefined) {
this._prev_update_sec = 0;
}
let now_sec = Fwk.now().sec;
if (now_sec - this._prev_update_sec > this._update_interval_sec()) {
this._prev_update_sec = now_sec;
this._init();
this._load();
}
}
}
_init() {
if (this._initialized === undefined) this._initialized = false;
if (this._initialized) return;
this._initialized = true;
let html = `
<div class="row" id="fwk-qserv-results-filesystem-controls">
<div class="col">
<div class="form-row">
<div class="form-group col-md-1">
<label for="update-interval"><i class="bi bi-arrow-repeat"></i> interval:</label>
<select id="update-interval" class="form-control form-control-selector">
<option value="5">5 sec</option>
<option value="10" selected>10 sec</option>
<option value="20">20 sec</option>
<option value="30">30 sec</option>
<option value="60">1 min</option>
<option value="120">2 min</option>
<option value="300">5 min</option>
</select>
</div>
<div class="form-group col-md-1">
<label for="reset-controls-form">&nbsp;</label>
<button id="reset-controls-form" class="btn btn-primary form-control">Reset</button>
</div>
</div>
</div>
</div>
<div class="row">
<div class="col">
<table class="table table-sm table-hover table-bordered" id="fwk-qserv-results-filesystem">
<thead class="thead-light">
<tr>
<th class="sticky">worker</th>
<th class="sticky">protocol</th>
<th class="sticky">folder</th>
<th class="sticky" style="text-align:right;">capacity [GB]</th>
<th class="sticky" style="text-align:right;">free [GB]</th>
<th class="sticky" style="text-align:right;">available [GB]</th>
<th class="sticky" style="text-align:right;">used [%]</th>
<th class="sticky" style="text-align:right;">#files</th>
<th class="sticky" style="text-align:right;">size [GB]</th>
</tr>
</thead>
<caption class="updating">Loading...</caption>
<tbody></tbody>
</table>
</div>
</div>`;
let cont = this.fwk_app_container.html(html);
cont.find(".form-control-selector").change(() => {
this._load();
});
cont.find("button#reset-controls-form").click(() => {
this._set_update_interval_sec(10);
this._load();
});
}
_form_control(elem_type, id) {
if (this._form_control_obj === undefined) this._form_control_obj = {};
if (!_.has(this._form_control_obj, id)) {
this._form_control_obj[id] = this.fwk_app_container.find(elem_type + '#' + id);
}
return this._form_control_obj[id];
}
_update_interval_sec() { return this._form_control('select', 'update-interval').val(); }
_set_update_interval_sec(val) { this._form_control('select', 'update-interval').val(val); }

/**
* Table for displaying info on MySQL connections that are being open at workers.
*/
_table() {
if (this._table_obj === undefined) {
this._table_obj = this.fwk_app_container.find('table#fwk-qserv-results-filesystem');
}
return this._table_obj;
}

/**
* Load data from a web service then render it to the application's page.
*/
_load() {
if (this._loading === undefined) this._loading = false;
if (this._loading) return;
this._loading = true;

this._table().children('caption').addClass('updating');

Fwk.web_service_GET(
"/replication/qserv/worker/status",
{timeout_sec: 2, version: Common.RestAPIVersion},
(data) => {
this._display(data.status);
Fwk.setLastUpdate(this._table().children('caption'));
this._table().children('caption').removeClass('updating');
this._loading = false;
},
(msg) => {
console.log('request failed', this.fwk_app_name, msg);
this._table().children('caption').html('<span style="color:maroon">No Response</span>');
this._table().children('caption').removeClass('updating');
this._loading = false;
}
);
}

/**
* Display MySQL connections
*/
_display(data) {
let html = '';
for (let worker in data) {
if (!data[worker].success) {
html += `
<tr>
<th class="table-warning">${worker}</th>
<td class="table-secondary">&nbsp;</td>
<td class="table-secondary">&nbsp;</td>
<td class="table-secondary">&nbsp;</td>
<td class="table-secondary">&nbsp;</td>
<td class="table-secondary">&nbsp;</td>
<td class="table-secondary">&nbsp;</td>
<td class="table-secondary">&nbsp;</td>
<td class="table-secondary">&nbsp;</td>
</tr>`;
} else {
let filesystem = data[worker].info.filesystem;
const used = (filesystem.capacity_bytes > 0 && filesystem.available_bytes > 0) ?
(100.0 * (filesystem.capacity_bytes - filesystem.available_bytes) / filesystem.capacity_bytes).toFixed(1) :
-1;
html += `
<tr>
<th>${worker}</th>
<td>${filesystem.protocol}</td>
<td>${filesystem.folder}</td>
<td style="text-align:right;"><pre>${QservWorkerResultsFilesystem._bytes2gb(filesystem.capacity_bytes)}</pre></td>
<td style="text-align:right;"><pre>${QservWorkerResultsFilesystem._bytes2gb(filesystem.free_bytes)}</pre></td>
<td style="text-align:right;"><pre>${QservWorkerResultsFilesystem._bytes2gb(filesystem.available_bytes)}</pre></td>
<td style="text-align:right;"><pre>${used}</pre></td>
<td style="text-align:right;"><pre>${filesystem.num_result_files}</pre></td>
<td style="text-align:right;"><pre>${QservWorkerResultsFilesystem._bytes2gb(filesystem.size_result_files_bytes)}</pre></td>
</tr>`;
}
}
this._table().children('tbody').html(html);
}
static _GiB = 1024 * 1024 * 1024;
static _bytes2gb(bytes) {
return bytes < 0 ? bytes : (bytes / QservWorkerResultsFilesystem._GiB).toFixed(1);
}
}
return QservWorkerResultsFilesystem;
});

0 comments on commit cc60822

Please sign in to comment.