diff --git a/CHANGELOG.md b/CHANGELOG.md index c8896fe8..b2d5484e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ project adheres to [Semantic Versioning](http://semver.org/). ### Added +- Added: `nodejs_eventloop_utilization` metric to 'collectDefaultMetrics()`. + ## [15.1.0] - 2023-12-15 ### Changed diff --git a/README.md b/README.md index 1487a50e..dcc83f8b 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,11 @@ available on Linux. - `register` to which registry the metrics should be registered. Default: the global default registry. - `gcDurationBuckets` with custom buckets for GC duration histogram. Default buckets of GC duration histogram are `[0.001, 0.01, 0.1, 1, 2, 5]` (in seconds). - `eventLoopMonitoringPrecision` with sampling rate in milliseconds. Must be greater than zero. Default: 10. +- `eventLoopUtilizationTimeout` interval in milliseconds to calculate event loop utilization. Must be greater than zero. Default: 100. +- `eventLoopUtilizationBuckets` with custom buckets for event loop utilization histogram. Default buckets of event loop utilization histogram are `[0.01, 0.05, 0.1, 0.25, 0.5, 0.6, 0.7, 0.75, 0.8, 0.9, 0.95, 0.99, 1]` (in seconds). +- `eventLoopUtilizationPercentiles` with custom percentiles for event loop utilization summary. Default percentiles of event loop utilization summary are `[0.01, 0.05, 0.5, 0.9, 0.95, 0.99, 0.999]`. +- `eventLoopUtilizationMaxAgeSeconds` summary sliding window time in seconds. Must be greater than zero. Default: 60. +- `eventLoopUtilizationAgeBuckets` summary sliding window buckets. Must be greater than zero. Default: 5. To register metrics to another registry, pass it in as `register`: diff --git a/index.d.ts b/index.d.ts index 13e0051e..544bfab2 100644 --- a/index.d.ts +++ b/index.d.ts @@ -760,6 +760,11 @@ export interface DefaultMetricsCollectorConfiguration< prefix?: string; gcDurationBuckets?: number[]; eventLoopMonitoringPrecision?: number; + eventLoopUtilizationTimeout?: number; + eventLoopUtilizationBuckets?: number[]; + eventLoopUtilizationPercentiles?: number[]; + eventLoopUtilizationAgeBuckets: number; + eventLoopUtilizationMaxAgeSeconds: number; labels?: Object; } diff --git a/lib/defaultMetrics.js b/lib/defaultMetrics.js index f285981a..c4421ac9 100644 --- a/lib/defaultMetrics.js +++ b/lib/defaultMetrics.js @@ -9,6 +9,7 @@ const osMemoryHeap = require('./metrics/osMemoryHeap'); const processOpenFileDescriptors = require('./metrics/processOpenFileDescriptors'); const processMaxFileDescriptors = require('./metrics/processMaxFileDescriptors'); const eventLoopLag = require('./metrics/eventLoopLag'); +const eventLoopUtilization = require('./metrics/eventLoopUtilization'); const processHandles = require('./metrics/processHandles'); const processRequests = require('./metrics/processRequests'); const processResources = require('./metrics/processResources'); @@ -24,6 +25,7 @@ const metrics = { processOpenFileDescriptors, processMaxFileDescriptors, eventLoopLag, + eventLoopUtilization, ...(typeof process.getActiveResourcesInfo === 'function' ? { processResources } : {}), diff --git a/lib/metrics/eventLoopUtilization.js b/lib/metrics/eventLoopUtilization.js new file mode 100644 index 00000000..e17a8cad --- /dev/null +++ b/lib/metrics/eventLoopUtilization.js @@ -0,0 +1,106 @@ +'use strict'; + +const Summary = require('../summary'); +const Histogram = require('../histogram'); + +// Check if perf_hooks module is available +let perf_hooks; +try { + /* eslint-disable node/no-unsupported-features/node-builtins */ + perf_hooks = require('perf_hooks'); +} catch { + // node version is too old +} + +// Reported always. +const NODEJS_EVENTLOOP_UTILIZATION_SUMMARY = + 'nodejs_eventloop_utilization_summary'; + +const NODEJS_EVENTLOOP_UTILIZATION_HISTOGRAM = + 'nodejs_eventloop_utilization_histogram'; + +const DEFAULT_ELU_HISTOGRAM_BUCKETS = [ + 0.01, 0.05, 0.1, 0.25, 0.5, 0.6, 0.7, 0.75, 0.8, 0.9, 0.95, 0.99, 1, +]; + +const DEFAULT_ELU_SUMMARY_PERCENTILES = [ + 0.01, 0.05, 0.5, 0.9, 0.95, 0.99, 0.999, +]; + +module.exports = (registry, config = {}) => { + if ( + !perf_hooks || + !perf_hooks.performance || + !perf_hooks.performance.eventLoopUtilization + ) { + return; + } + + const eventLoopUtilization = perf_hooks.performance.eventLoopUtilization; + + const namePrefix = config.prefix ? config.prefix : ''; + const labels = config.labels ? config.labels : {}; + const labelNames = Object.keys(labels); + const registers = registry ? [registry] : undefined; + + const ageBuckets = config.eventLoopUtilizationAgeBuckets + ? config.eventLoopUtilizationAgeBuckets + : 5; + + const maxAgeSeconds = config.eventLoopUtilizationMaxAgeSeconds + ? config.eventLoopUtilizationMaxAgeSeconds + : 60; + + const percentiles = config.eventLoopUtilizationSummaryPercentiles + ? config.eventLoopUtilizationSummaryPercentiles + : DEFAULT_ELU_SUMMARY_PERCENTILES; + + const summary = new Summary({ + name: namePrefix + NODEJS_EVENTLOOP_UTILIZATION_SUMMARY, + help: 'Ratio of time the event loop is not idling in the event provider to the total time the event loop is running.', + maxAgeSeconds, + ageBuckets, + percentiles, + registers, + labelNames, + }); + + const buckets = config.eventLoopUtilizationBuckets + ? config.eventLoopUtilizationBuckets + : DEFAULT_ELU_HISTOGRAM_BUCKETS; + + const histogram = new Histogram({ + name: namePrefix + NODEJS_EVENTLOOP_UTILIZATION_HISTOGRAM, + help: 'Ratio of time the event loop is not idling in the event provider to the total time the event loop is running.', + buckets, + registers, + labelNames, + }); + + const intervalTimeout = config.eventLoopUtilizationTimeout || 100; + + let elu1 = eventLoopUtilization(); + let start = process.hrtime(); + + setInterval(() => { + const elu2 = eventLoopUtilization(); + const end = process.hrtime(); + + const timeMs = (end[0] - start[0]) * 1000 + (end[1] - start[1]) / 1e6; + const value = eventLoopUtilization(elu2, elu1).utilization; + + const blockedIntervalsNumber = Math.round(timeMs / intervalTimeout); + for (let i = 0; i < blockedIntervalsNumber; i++) { + summary.observe(value); + histogram.observe(value); + } + + elu1 = elu2; + start = end; + }, intervalTimeout).unref(); +}; + +module.exports.metricNames = [ + NODEJS_EVENTLOOP_UTILIZATION_SUMMARY, + NODEJS_EVENTLOOP_UTILIZATION_HISTOGRAM, +]; diff --git a/test/metrics/eventLoopUtilizationTest.js b/test/metrics/eventLoopUtilizationTest.js new file mode 100644 index 00000000..96e5da60 --- /dev/null +++ b/test/metrics/eventLoopUtilizationTest.js @@ -0,0 +1,108 @@ +'use strict'; + +const { setTimeout: sleep } = require('timers/promises'); +const register = require('../../index').register; +const elu = require('../../lib/metrics/eventLoopUtilization'); +const { eventLoopUtilization } = require('perf_hooks').performance; + +describe('eventLoopUtilization', () => { + beforeAll(() => { + register.clear(); + }); + + afterEach(() => { + register.clear(); + }); + + it('should add metric to the registry', async () => { + if (!eventLoopUtilization) return; + + expect(await register.getMetricsAsJSON()).toHaveLength(0); + + elu(register, { eventLoopUtilizationTimeout: 50 }); + + const expectedELU = Math.random(); + await blockEventLoop(expectedELU, 3000); + + const metrics = await register.getMetricsAsJSON(); + expect(metrics).toHaveLength(2); + + { + const percentilesCount = 7; + + const eluSummaryMetric = metrics[0]; + expect(eluSummaryMetric.type).toEqual('summary'); + expect(eluSummaryMetric.name).toEqual( + 'nodejs_eventloop_utilization_summary', + ); + expect(eluSummaryMetric.help).toEqual( + 'Ratio of time the event loop is not idling in the event provider to the total time the event loop is running.', + ); + expect(eluSummaryMetric.values).toHaveLength(percentilesCount + 2); + + const sum = eluSummaryMetric.values[percentilesCount]; + const count = eluSummaryMetric.values[percentilesCount + 1]; + + expect(sum.metricName).toEqual( + 'nodejs_eventloop_utilization_summary_sum', + ); + expect(count.metricName).toEqual( + 'nodejs_eventloop_utilization_summary_count', + ); + const calculatedELU = sum.value / count.value; + const delta = Math.abs(calculatedELU - expectedELU); + expect(delta).toBeLessThanOrEqual(0.05); + } + + { + const bucketsCount = 14; + + const eluHistogramMetric = metrics[1]; + expect(eluHistogramMetric.type).toEqual('histogram'); + expect(eluHistogramMetric.name).toEqual( + 'nodejs_eventloop_utilization_histogram', + ); + expect(eluHistogramMetric.help).toEqual( + 'Ratio of time the event loop is not idling in the event provider to the total time the event loop is running.', + ); + expect(eluHistogramMetric.values).toHaveLength(bucketsCount + 2); + + const sum = eluHistogramMetric.values[bucketsCount]; + const count = eluHistogramMetric.values[bucketsCount + 1]; + + expect(sum.metricName).toEqual( + 'nodejs_eventloop_utilization_histogram_sum', + ); + expect(count.metricName).toEqual( + 'nodejs_eventloop_utilization_histogram_count', + ); + const calculatedELU = sum.value / count.value; + const delta = Math.abs(calculatedELU - expectedELU); + expect(delta).toBeLessThanOrEqual(0.05); + + const infBucket = eluHistogramMetric.values[bucketsCount - 1]; + expect(infBucket.labels.le).toEqual('+Inf'); + expect(infBucket.value).toEqual(count.value); + + const le1Bucket = eluHistogramMetric.values[bucketsCount - 2]; + expect(le1Bucket.labels.le).toEqual(1); + expect(le1Bucket.value).toEqual(count.value); + } + }); +}); + +async function blockEventLoop(ratio, ms) { + const frameMs = 1000; + const framesNumber = Math.round(ms / frameMs); + + const blockedFrameTime = ratio * frameMs; + const freeFrameTime = frameMs - blockedFrameTime; + + for (let i = 0; i < framesNumber; i++) { + const endBlockedTime = Date.now() + blockedFrameTime; + while (Date.now() < endBlockedTime) { + // heavy operations + } + await sleep(freeFrameTime); + } +}