From a949ece8cc77e7d75bf20c64e7f3b9a0ceb706cb Mon Sep 17 00:00:00 2001 From: Pavel Karpy Date: Tue, 1 Aug 2023 13:54:00 +0300 Subject: [PATCH] metrics: Add histograms for duration metrics Deprecate `prometheus.Counter` analogs. Closes #2351. Signed-off-by: Pavel Karpy --- CHANGELOG.md | 6 ++ pkg/metrics/engine.go | 221 +++++++++++++++++++++++++++++++++--------- pkg/metrics/object.go | 138 ++++++++++++++++++++------ 3 files changed, 291 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a23f8bef6a..e688cb7141 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Changelog for NeoFS Node - `renew-domain` command for adm - Stored payload metric per container (#2116) - Stored payload metric per shard (#2023) +- Histogram metrics for RPC and engine operations (#2351) ### Fixed - `neo-go` RPC connection loss handling (#1337) @@ -41,6 +42,11 @@ on timeout, try increasing the value, for example, twice. Also note that the execution of commands with the `--await` flag and without an explicitly specified time period is now limited to 1 minute. This value can be changed with `--timeout` flag. +Histogram (not counter) RPC/engine operation handling time metrics were added. For +an old engine `*operation_name*_duration` a new `*operation_name*_time` is available. +For an old `*operation_name*_req_duration` RPC a new `rpc_*operation_name*_time` is +available. The old ones (the counters) have been deprecated and will be removed with +the following minor release. Deprecated `morph.rpc_endpoint` SN and `morph.endpoint.client` IR configurations have been removed. Use `morph.endpoints` for both instead. diff --git a/pkg/metrics/engine.go b/pkg/metrics/engine.go index 533a2b98cb..bce8af2817 100644 --- a/pkg/metrics/engine.go +++ b/pkg/metrics/engine.go @@ -8,19 +8,32 @@ import ( type ( engineMetrics struct { - listContainersDuration prometheus.Counter - estimateContainerSizeDuration prometheus.Counter - deleteDuration prometheus.Counter - existsDuration prometheus.Counter - getDuration prometheus.Counter - headDuration prometheus.Counter - inhumeDuration prometheus.Counter - putDuration prometheus.Counter - rangeDuration prometheus.Counter - searchDuration prometheus.Counter - listObjectsDuration prometheus.Counter - containerSize prometheus.GaugeVec - payloadSize prometheus.GaugeVec + listContainersDurationCounter prometheus.Counter + estimateContainerSizeDurationCounter prometheus.Counter + deleteDurationCounter prometheus.Counter + existsDurationCounter prometheus.Counter + getDurationCounter prometheus.Counter + headDurationCounter prometheus.Counter + inhumeDurationCounter prometheus.Counter + putDurationCounter prometheus.Counter + rangeDurationCounter prometheus.Counter + searchDurationCounter prometheus.Counter + listObjectsDurationCounter prometheus.Counter + + listContainersDuration prometheus.Histogram + estimateContainerSizeDuration prometheus.Histogram + deleteDuration prometheus.Histogram + existsDuration prometheus.Histogram + getDuration prometheus.Histogram + headDuration prometheus.Histogram + inhumeDuration prometheus.Histogram + putDuration prometheus.Histogram + rangeDuration prometheus.Histogram + searchDuration prometheus.Histogram + listObjectsDuration prometheus.Histogram + + containerSize prometheus.GaugeVec + payloadSize prometheus.GaugeVec } ) @@ -28,83 +41,164 @@ const engineSubsystem = "engine" func newEngineMetrics() engineMetrics { var ( - listContainersDuration = prometheus.NewCounter(prometheus.CounterOpts{ + listContainersDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "list_containers_time", + Help: "Engine 'list containers' operations handling time", + }) + + estimateContainerSizeDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "estimate_container_size_time", + Help: "Engine 'container size' operations handling time", + }) + + deleteDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "delete_time", + Help: "Engine 'delete' operations handling time", + }) + + existsDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "exists_time", + Help: "Engine 'exists' operations handling time", + }) + + getDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "get_time", + Help: "Engine 'get' operations handling time", + }) + + headDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "head_time", + Help: "Engine 'head' operations handling time", + }) + + inhumeDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "inhume_time", + Help: "Engine 'inhume' operations handling time", + }) + + putDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "put_time", + Help: "Engine 'put' operations handling time", + }) + + rangeDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "range_time", + Help: "Engine 'range' operations handling time", + }) + + searchDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "search_time", + Help: "Engine 'search' operations handling time", + }) + + listObjectsDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: engineSubsystem, + Name: "list_objects_time", + Help: "Engine 'list objects' operations handling time", + }) + ) + + var ( + listContainersDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "list_containers_duration", - Help: "Accumulated duration of engine list containers operations", + Help: "Accumulated duration of engine 'list containers' operations [DEPRECATED]", }) - estimateContainerSizeDuration = prometheus.NewCounter(prometheus.CounterOpts{ + estimateContainerSizeDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "estimate_container_size_duration", - Help: "Accumulated duration of engine container size estimate operations", + Help: "Accumulated duration of engine 'container size estimate' operations [DEPRECATED]", }) - deleteDuration = prometheus.NewCounter(prometheus.CounterOpts{ + deleteDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "delete_duration", - Help: "Accumulated duration of engine delete operations", + Help: "Accumulated duration of engine 'delete' operations [DEPRECATED]", }) - existsDuration = prometheus.NewCounter(prometheus.CounterOpts{ + existsDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "exists_duration", - Help: "Accumulated duration of engine exists operations", + Help: "Accumulated duration of engine 'exists' operations [DEPRECATED]", }) - getDuration = prometheus.NewCounter(prometheus.CounterOpts{ + getDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "get_duration", - Help: "Accumulated duration of engine get operations", + Help: "Accumulated duration of engine 'get' operations [DEPRECATED]", }) - headDuration = prometheus.NewCounter(prometheus.CounterOpts{ + headDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "head_duration", - Help: "Accumulated duration of engine head operations", + Help: "Accumulated duration of engine 'head' operations [DEPRECATED]", }) - inhumeDuration = prometheus.NewCounter(prometheus.CounterOpts{ + inhumeDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "inhume_duration", - Help: "Accumulated duration of engine inhume operations", + Help: "Accumulated duration of engine 'inhume' operations [DEPRECATED]", }) - putDuration = prometheus.NewCounter(prometheus.CounterOpts{ + putDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "put_duration", - Help: "Accumulated duration of engine put operations", + Help: "Accumulated duration of engine 'put' operations [DEPRECATED]", }) - rangeDuration = prometheus.NewCounter(prometheus.CounterOpts{ + rangeDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "range_duration", - Help: "Accumulated duration of engine range operations", + Help: "Accumulated duration of engine 'range' operations [DEPRECATED]", }) - searchDuration = prometheus.NewCounter(prometheus.CounterOpts{ + searchDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "search_duration", - Help: "Accumulated duration of engine search operations", + Help: "Accumulated duration of engine 'search' operations [DEPRECATED]", }) - listObjectsDuration = prometheus.NewCounter(prometheus.CounterOpts{ + listObjectsDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, Name: "list_objects_duration", - Help: "Accumulated duration of engine list objects operations", + Help: "Accumulated duration of engine 'list objects' operations [DEPRECATED]", }) + ) + var ( containerSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: storageNodeNameSpace, Subsystem: engineSubsystem, @@ -134,6 +228,18 @@ func newEngineMetrics() engineMetrics { listObjectsDuration: listObjectsDuration, containerSize: *containerSize, payloadSize: *payloadSize, + + listContainersDurationCounter: listContainersDurationCounter, + estimateContainerSizeDurationCounter: estimateContainerSizeDurationCounter, + deleteDurationCounter: deleteDurationCounter, + existsDurationCounter: existsDurationCounter, + getDurationCounter: getDurationCounter, + headDurationCounter: headDurationCounter, + inhumeDurationCounter: inhumeDurationCounter, + putDurationCounter: putDurationCounter, + rangeDurationCounter: rangeDurationCounter, + searchDurationCounter: searchDurationCounter, + listObjectsDurationCounter: listObjectsDurationCounter, } } @@ -151,50 +257,73 @@ func (m engineMetrics) register() { prometheus.MustRegister(m.listObjectsDuration) prometheus.MustRegister(m.containerSize) prometheus.MustRegister(m.payloadSize) + + prometheus.MustRegister(m.listContainersDurationCounter) + prometheus.MustRegister(m.estimateContainerSizeDurationCounter) + prometheus.MustRegister(m.deleteDurationCounter) + prometheus.MustRegister(m.existsDurationCounter) + prometheus.MustRegister(m.getDurationCounter) + prometheus.MustRegister(m.headDurationCounter) + prometheus.MustRegister(m.inhumeDurationCounter) + prometheus.MustRegister(m.putDurationCounter) + prometheus.MustRegister(m.rangeDurationCounter) + prometheus.MustRegister(m.searchDurationCounter) + prometheus.MustRegister(m.listObjectsDurationCounter) } func (m engineMetrics) AddListContainersDuration(d time.Duration) { - m.listObjectsDuration.Add(float64(d)) + m.listObjectsDurationCounter.Add(float64(d)) + m.listObjectsDuration.Observe(d.Seconds()) } func (m engineMetrics) AddEstimateContainerSizeDuration(d time.Duration) { - m.estimateContainerSizeDuration.Add(float64(d)) + m.estimateContainerSizeDurationCounter.Add(float64(d)) + m.estimateContainerSizeDuration.Observe(d.Seconds()) } func (m engineMetrics) AddDeleteDuration(d time.Duration) { - m.deleteDuration.Add(float64(d)) + m.deleteDurationCounter.Add(float64(d)) + m.deleteDuration.Observe(d.Seconds()) } func (m engineMetrics) AddExistsDuration(d time.Duration) { - m.existsDuration.Add(float64(d)) + m.existsDurationCounter.Add(float64(d)) + m.existsDuration.Observe(d.Seconds()) } func (m engineMetrics) AddGetDuration(d time.Duration) { - m.getDuration.Add(float64(d)) + m.getDurationCounter.Add(float64(d)) + m.getDuration.Observe(d.Seconds()) } func (m engineMetrics) AddHeadDuration(d time.Duration) { - m.headDuration.Add(float64(d)) + m.headDurationCounter.Add(float64(d)) + m.headDuration.Observe(d.Seconds()) } func (m engineMetrics) AddInhumeDuration(d time.Duration) { - m.inhumeDuration.Add(float64(d)) + m.inhumeDurationCounter.Add(float64(d)) + m.inhumeDuration.Observe(d.Seconds()) } func (m engineMetrics) AddPutDuration(d time.Duration) { - m.putDuration.Add(float64(d)) + m.putDurationCounter.Add(float64(d)) + m.putDuration.Observe(d.Seconds()) } func (m engineMetrics) AddRangeDuration(d time.Duration) { - m.rangeDuration.Add(float64(d)) + m.rangeDurationCounter.Add(float64(d)) + m.rangeDuration.Observe(d.Seconds()) } func (m engineMetrics) AddSearchDuration(d time.Duration) { - m.searchDuration.Add(float64(d)) + m.searchDurationCounter.Add(float64(d)) + m.searchDuration.Observe(d.Seconds()) } func (m engineMetrics) AddListObjectsDuration(d time.Duration) { - m.listObjectsDuration.Add(float64(d)) + m.listObjectsDurationCounter.Add(float64(d)) + m.listObjectsDuration.Observe(d.Seconds()) } func (m engineMetrics) AddToContainerSize(cnrID string, size int64) { diff --git a/pkg/metrics/object.go b/pkg/metrics/object.go index 3ea7ac2e41..ac72f5ba0f 100644 --- a/pkg/metrics/object.go +++ b/pkg/metrics/object.go @@ -24,19 +24,27 @@ type ( rangeCounter methodCount rangeHashCounter methodCount - getDuration prometheus.Counter - putDuration prometheus.Counter - headDuration prometheus.Counter - searchDuration prometheus.Counter - deleteDuration prometheus.Counter - rangeDuration prometheus.Counter - rangeHashDuration prometheus.Counter + getDuration prometheus.Histogram + putDuration prometheus.Histogram + headDuration prometheus.Histogram + searchDuration prometheus.Histogram + deleteDuration prometheus.Histogram + rangeDuration prometheus.Histogram + rangeHashDuration prometheus.Histogram putPayload prometheus.Counter getPayload prometheus.Counter shardMetrics *prometheus.GaugeVec shardsReadonly *prometheus.GaugeVec + + getDurationCounter prometheus.Counter + putDurationCounter prometheus.Counter + headDurationCounter prometheus.Counter + searchDurationCounter prometheus.Counter + deleteDurationCounter prometheus.Counter + rangeDurationCounter prometheus.Counter + rangeHashDurationCounter prometheus.Counter } ) @@ -87,53 +95,104 @@ func newObjectServiceMetrics() objectServiceMetrics { ) var ( // Request duration metrics. - getDuration = prometheus.NewCounter(prometheus.CounterOpts{ + getDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: objectSubsystem, + Name: "rpc_get_time", + Help: "RPC 'get' request handling time", + }) + + putDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: objectSubsystem, + Name: "rpc_put_time", + Help: "RPC 'put' request handling time", + }) + + headDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: objectSubsystem, + Name: "rpc_head_time", + Help: "RPC 'head' request handling time", + }) + + searchDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: objectSubsystem, + Name: "rpc_search_time", + Help: "RPC 'search' request handling time", + }) + + deleteDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: objectSubsystem, + Name: "rpc_delete_time", + Help: "RPC 'delete' request handling time", + }) + + rangeDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: objectSubsystem, + Name: "rpc_range_time", + Help: "RPC 'range request' handling time", + }) + + rangeHashDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: storageNodeNameSpace, + Subsystem: objectSubsystem, + Name: "rpc_range_hash_time", + Help: "RPC 'range hash' handling time", + }) + ) + + var ( // Request duration metrics (deprecated). + getDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: objectSubsystem, Name: "get_req_duration", - Help: "Accumulated get request process duration", + Help: "Accumulated 'get' request process duration [DEPRECATED]", }) - putDuration = prometheus.NewCounter(prometheus.CounterOpts{ + putDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: objectSubsystem, Name: "put_req_duration", - Help: "Accumulated put request process duration", + Help: "Accumulated 'put' request process duration [DEPRECATED]", }) - headDuration = prometheus.NewCounter(prometheus.CounterOpts{ + headDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: objectSubsystem, Name: "head_req_duration", - Help: "Accumulated head request process duration", + Help: "Accumulated 'head' request process duration [DEPRECATED]", }) - searchDuration = prometheus.NewCounter(prometheus.CounterOpts{ + searchDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: objectSubsystem, Name: "search_req_duration", - Help: "Accumulated search request process duration", + Help: "Accumulated 'search' request process duration [DEPRECATED]", }) - deleteDuration = prometheus.NewCounter(prometheus.CounterOpts{ + deleteDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: objectSubsystem, Name: "delete_req_duration", - Help: "Accumulated delete request process duration", + Help: "Accumulated 'delete' request process duration [DEPRECATED]", }) - rangeDuration = prometheus.NewCounter(prometheus.CounterOpts{ + rangeDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: objectSubsystem, Name: "range_req_duration", - Help: "Accumulated range request process duration", + Help: "Accumulated 'range' request process duration [DEPRECATED]", }) - rangeHashDuration = prometheus.NewCounter(prometheus.CounterOpts{ + rangeHashDurationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: storageNodeNameSpace, Subsystem: objectSubsystem, Name: "range_hash_req_duration", - Help: "Accumulated range hash request process duration", + Help: "Accumulated 'range hash' request process duration [DEPRECATED]", }) ) @@ -190,6 +249,14 @@ func newObjectServiceMetrics() objectServiceMetrics { getPayload: getPayload, shardMetrics: shardsMetrics, shardsReadonly: shardsReadonly, + + getDurationCounter: getDurationCounter, + putDurationCounter: putDurationCounter, + headDurationCounter: headDurationCounter, + searchDurationCounter: searchDurationCounter, + deleteDurationCounter: deleteDurationCounter, + rangeDurationCounter: rangeDurationCounter, + rangeHashDurationCounter: rangeHashDurationCounter, } } @@ -215,6 +282,14 @@ func (m objectServiceMetrics) register() { prometheus.MustRegister(m.shardMetrics) prometheus.MustRegister(m.shardsReadonly) + + prometheus.MustRegister(m.getDurationCounter) + prometheus.MustRegister(m.putDurationCounter) + prometheus.MustRegister(m.headDurationCounter) + prometheus.MustRegister(m.searchDurationCounter) + prometheus.MustRegister(m.deleteDurationCounter) + prometheus.MustRegister(m.rangeDurationCounter) + prometheus.MustRegister(m.rangeHashDurationCounter) } func (m objectServiceMetrics) IncGetReqCounter(success bool) { @@ -246,31 +321,38 @@ func (m objectServiceMetrics) IncRangeHashReqCounter(success bool) { } func (m objectServiceMetrics) AddGetReqDuration(d time.Duration) { - m.getDuration.Add(float64(d)) + m.getDurationCounter.Add(float64(d)) + m.getDuration.Observe(d.Seconds()) } func (m objectServiceMetrics) AddPutReqDuration(d time.Duration) { - m.putDuration.Add(float64(d)) + m.putDurationCounter.Add(float64(d)) + m.putDuration.Observe(d.Seconds()) } func (m objectServiceMetrics) AddHeadReqDuration(d time.Duration) { - m.headDuration.Add(float64(d)) + m.headDurationCounter.Add(float64(d)) + m.headDuration.Observe(d.Seconds()) } func (m objectServiceMetrics) AddSearchReqDuration(d time.Duration) { - m.searchDuration.Add(float64(d)) + m.searchDurationCounter.Add(float64(d)) + m.searchDuration.Observe(d.Seconds()) } func (m objectServiceMetrics) AddDeleteReqDuration(d time.Duration) { - m.deleteDuration.Add(float64(d)) + m.deleteDurationCounter.Add(float64(d)) + m.deleteDuration.Observe(d.Seconds()) } func (m objectServiceMetrics) AddRangeReqDuration(d time.Duration) { - m.rangeDuration.Add(float64(d)) + m.rangeDurationCounter.Add(float64(d)) + m.rangeDuration.Observe(d.Seconds()) } func (m objectServiceMetrics) AddRangeHashReqDuration(d time.Duration) { - m.rangeHashDuration.Add(float64(d)) + m.rangeHashDurationCounter.Add(float64(d)) + m.rangeHashDuration.Observe(d.Seconds()) } func (m objectServiceMetrics) AddPutPayload(ln int) {