Skip to content

Commit

Permalink
Merge pull request noobaa#7562 from naveenpaul1/nsfs_metrics
Browse files Browse the repository at this point in the history
NSFS | NC |endpoint metrics should aggregated fork
  • Loading branch information
naveenpaul1 authored Dec 8, 2023
2 parents d08cf47 + 868ff1e commit 13878f4
Show file tree
Hide file tree
Showing 11 changed files with 258 additions and 40 deletions.
7 changes: 7 additions & 0 deletions docs/non_containerized_NSFS.md
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,13 @@ NSFS management CLI command will create both account and bucket dir if it's miss

Non containerized NSFS certificates/ directory location will be under the config_root path. The certificates/ directory should contain SSL files tls.key and tls.crt. System will use a cert from this dir to create a valid HTTPS connection. If cert is missing in this dir a self-signed SSL certificate will be generated. Make sure the path to certificates/ directory is valid before running nsfs command, If the path is invalid then cert flow will fail.

Non containerized NSFS restrict insecure HTTP connections when `allow_http` is set to false in cofig.json. This is not the default behaviour.

## Monitoring

Prometheus metrics port can be passed through the argument `--metrics_port` while executing the nsfs command.
NSFS state and output metrics can be fetched from URL `http:{host}:{metrics_port}/metrics/nsfs_stats`.

## Log and Logrotate
Noobaa logs are configured using rsyslog and logrotate. RPM will configure rsyslog and logrotate if both are already running.

Expand Down
12 changes: 6 additions & 6 deletions src/endpoint/endpoint.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ const server_rpc = require('../server/server_rpc');
const debug_config = require('../util/debug_config');
const auth_server = require('../server/common_services/auth_server');
const system_store = require('../server/system_services/system_store');
const prom_reporting = require('../server/analytic_services/prometheus_reporting');
const background_scheduler = require('../util/background_scheduler').get_instance();
const endpoint_stats_collector = require('../sdk/endpoint_stats_collector');
const { NamespaceMonitor } = require('../server/bg_services/namespace_monitor');
const { SemaphoreMonitor } = require('../server/bg_services/semaphore_monitor');
const prom_reporting = require('../server/analytic_services/prometheus_reporting');
const cluster = /** @type {import('node:cluster').Cluster} */ (
/** @type {unknown} */ (require('node:cluster'))
);
Expand Down Expand Up @@ -91,12 +91,12 @@ async function main(options = {}) {
try {
// the primary just forks and returns, workers will continue to serve
fork_count = options.forks ?? config.ENDPOINT_FORKS;
if (fork_utils.start_workers(fork_count)) return;
const metrics_port = options.metrics_port || config.EP_METRICS_SERVER_PORT;
if (fork_utils.start_workers(metrics_port, fork_count)) return;

const http_port = options.http_port || Number(process.env.ENDPOINT_PORT) || 6001;
const https_port = options.https_port || Number(process.env.ENDPOINT_SSL_PORT) || 6443;
const https_port_sts = options.https_port_sts || Number(process.env.ENDPOINT_SSL_STS_PORT) || 7443;
const metrics_port = options.metrics_port || config.EP_METRICS_SERVER_PORT;
const https_port_sts = options.https_port_sts || Number(process.env.ENDPOINT_SSL_PORT_STS) || 7443;
const endpoint_group_id = process.env.ENDPOINT_GROUP_ID || 'default-endpoint-group';

const virtual_hosts = Object.freeze(
Expand Down Expand Up @@ -182,9 +182,9 @@ async function main(options = {}) {
await listen_http(https_port_sts, https_server_sts);
dbg.log0('Started STS HTTPS successfully');
}
if (metrics_port > 0) {
if (metrics_port > 0 && cluster.isPrimary) {
dbg.log0('Starting metrics server', metrics_port);
await prom_reporting.start_server(metrics_port);
await prom_reporting.start_server(metrics_port, false);
dbg.log0('Started metrics server successfully');
}

Expand Down
15 changes: 14 additions & 1 deletion src/sdk/endpoint_stats_collector.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ const mime = require('mime');

const dbg = require('../util/debug_module')(__filename);
const prom_report = require('../server/analytic_services/prometheus_reporting');
const stats_aggregator = require('../server/system_services/stats_aggregator');
const DelayedCollector = require('../util/delayed_collector');
const config = require('../../config');
const cluster = /** @type {import('node:cluster').Cluster} */ (
/** @type {unknown} */ (require('node:cluster'))
);

/**
* @typedef {{
Expand Down Expand Up @@ -154,13 +158,14 @@ class EndpointStatsCollector {
for (const [k, v] of Object.entries(data.fs_workers_stats ?? {})) {
dbg.log0(`nsfs stats - FS op=${k} :`, v);
}

if (this.rpc_client) {
await this.rpc_client.stats.update_nsfs_stats({
nsfs_stats: data
}, {
timeout: SEND_STATS_TIMEOUT
});
} else {
await stats_aggregator.standalon_update_nsfs_stats(data);
}
}

Expand Down Expand Up @@ -350,6 +355,14 @@ class EndpointStatsCollector {
this.prom_metrics_report.observe('hub_write_latency', { bucket_name }, hub_write_latency);
}
}
update_fork_counter() {
// add fork related metrics to prometheus
const code = `worker_${cluster.worker.id}`;
this.prom_metrics_report.inc('fork_counter', {code});
}
}
if (cluster.isWorker) {
EndpointStatsCollector.instance().update_fork_counter();
}

EndpointStatsCollector._instance = null;
Expand Down
123 changes: 109 additions & 14 deletions src/server/analytic_services/prometheus_reporting.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ const config = require('../../../config');
const { NodeJsReport } = require('./prometheus_reports/nodejs_report');
const { NooBaaCoreReport } = require('./prometheus_reports/noobaa_core_report');
const { NooBaaEndpointReport } = require('./prometheus_reports/noobaa_endpoint_report');
const stats_aggregator = require('../system_services/stats_aggregator');
const AggregatorRegistry = require('prom-client').AggregatorRegistry;
const aggregatorRegistry = new AggregatorRegistry();

// Currenty supported reprots
const reports = Object.seal({
Expand All @@ -18,6 +21,9 @@ const reports = Object.seal({
endpoint: null // optional
});

let io_stats_complete = {};
let ops_stats_complete = {};

function get_nodejs_report() {
return reports.nodejs;
}
Expand All @@ -43,6 +49,7 @@ async function export_all_metrics() {

async function start_server(
port,
fork_enabled,
retry_count = config.PROMETHEUS_SERVER_RETRY_COUNT,
delay = config.PROMETHEUS_SERVER_RETRY_DELAY
) {
Expand All @@ -51,20 +58,50 @@ async function start_server(
}

const server = http.createServer(async (req, res) => {
// Serve all metrics on the root path.
if (req.url === '' || req.url === '/') {
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end(await export_all_metrics());
return;
}

// Serve report's metrics on the report name path
const report_name = req.url.substr(1);
const report = reports[report_name];
if (report) {
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end(await report.export_metrics(report_name));
return;
// Serve all metrics on the root path for system that do have one or more fork running.
if (fork_enabled) {
const metrics = await aggregatorRegistry.clusterMetrics();
if (req.url === '' || req.url === '/') {
res.writeHead(200, { 'Content-Type': aggregatorRegistry.contentType });
res.end(metrics);
return;
}
if (req.url === '/metrics/nsfs_stats') {
res.writeHead(200, { 'Content-Type': 'text/plain' });
const nsfs_report = {
nsfs_counters: io_stats_complete,
op_stats_counters: ops_stats_complete,
};
res.end(JSON.stringify(nsfs_report));
return;
}
// Serve report's metrics on the report name path
const report_name = req.url.substr(1);
const single_metrics = export_single_metrics(metrics, report_name);
if (single_metrics !== "") {
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end(single_metrics);
return;
}
} else {
// Serve all metrics on the root path for system that do not have any fork running.
if (req.url === '' || req.url === '/') {
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end(await export_all_metrics());
return;
}
if (req.url === '/metrics/nsfs_stats') {
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end(await metrics_nsfs_stats_handler());
return;
}
const report_name = req.url.substr(1);
const report = reports[report_name];
if (report) {
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end(await report.export_metrics(report_name));
return;
}
}

res.writeHead(404, { 'Content-Type': 'text/plain' });
Expand Down Expand Up @@ -94,6 +131,62 @@ async function start_server(
}
}

async function metrics_nsfs_stats_handler() {
const nsfs_io_stats = {};
const nsfs_counters = stats_aggregator.get_nsfs_io_stats(false);
// Building the report per io and value
for (const [key, value] of Object.entries(nsfs_counters)) {
nsfs_io_stats[`noobaa_nsfs_io_${key}`.toLowerCase()] = value;
}

const op_stats_counters = {};
const op_stats = stats_aggregator.get_op_stats(false);
// Building the report per op name key and value
for (const [op_name, obj] of Object.entries(op_stats)) {
for (const [key, value] of Object.entries(obj)) {
op_stats_counters[`noobaa_nsfs_op_${op_name}_${key}`.toLowerCase()] = value;
}
}

const nsfs_report = {
nsfs_counters: nsfs_io_stats,
op_stats_counters: op_stats_counters,
};
dbg.log1(`_create_nsfs_report: nsfs_report ${nsfs_report}`);
return JSON.stringify(nsfs_report);
}

function export_single_metrics(all_metrics, report_name) {
let single_metrics = "";
const metrics_arr = all_metrics.split('\n');
for (const metrics_line of metrics_arr) {
if (metrics_line.includes(report_name)) {
single_metrics = single_metrics + metrics_line + "\n";
}
}
return single_metrics;

}

function set_io_stats(io_stats) {
const nsfs_io_stats = {};
for (const [key, value] of Object.entries(io_stats)) {
nsfs_io_stats[`noobaa_nsfs_io_${key}`.toLowerCase()] = value;
}
io_stats_complete = nsfs_io_stats;
}

function set_ops_stats(ops_stats) {
const op_stats_counters = {};
// Building the report per op name key and value
for (const [op_name, obj] of Object.entries(ops_stats)) {
for (const [key, value] of Object.entries(obj)) {
op_stats_counters[`noobaa_nsfs_op_${op_name}_${key}`.toLowerCase()] = value;
}
}
ops_stats_complete = op_stats_counters;
}

// -----------------------------------------
// exports
// -----------------------------------------
Expand All @@ -102,3 +195,5 @@ exports.get_core_report = get_core_report;
exports.get_endpoint_report = get_endpoint_report;
exports.export_all_metrics = export_all_metrics;
exports.start_server = start_server;
exports.set_io_stats = set_io_stats;
exports.set_ops_stats = set_ops_stats;
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ const config = require('../../../../config.js');

class BasePrometheusReport {
constructor() {
this._registry = new this.prom_client.Registry();
this._register = this.prom_client.register;
}

get prom_client() {
return prom_client;
}

get registry() {
return this._registry;
get register() {
return this._register;
}

get metric_prefix() {
Expand All @@ -30,7 +30,7 @@ class BasePrometheusReport {
}

export_metrics() {
return this.registry.metrics();
return this.register.metrics();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class NodeJsReport extends BasePrometheusReport {

if (this.enabled) {
this.prom_client.collectDefaultMetrics({
register: this.registry,
register: this.register,
prefix: this.metric_prefix
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ class NooBaaCoreReport extends BasePrometheusReport {
}
this._metrics[m.name] = new this.prom_client[m.type]({
name: this.get_prefixed_name(m.name),
registers: [this.registry],
registers: [this.register],
...m.configuration,
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,15 @@ const NOOBAA_ENDPOINT_METRICS = js_utils.deep_freeze([{
total_values = 0;
},
},
{
type: 'Counter',
name: 'fork_counter',
configuration: {
help: 'Counter on number of fork hit',
labelNames: ['code']
},
aggregator: 'average',
}
]);

class NooBaaEndpointReport extends BasePrometheusReport {
Expand All @@ -241,7 +250,7 @@ class NooBaaEndpointReport extends BasePrometheusReport {
collect: m.collect,
prom_instance: new this.prom_client[m.type]({
name: this.get_prefixed_name(m.name),
registers: [this.registry],
registers: [this.register],
...m.configuration,
collect() {
if (m.collect && this.average_intervals) {
Expand Down
Loading

0 comments on commit 13878f4

Please sign in to comment.