diff --git a/pkg/ccl/changefeedccl/changefeed_processors.go b/pkg/ccl/changefeedccl/changefeed_processors.go index 9447043021c5..5cef0e4cca9e 100644 --- a/pkg/ccl/changefeedccl/changefeed_processors.go +++ b/pkg/ccl/changefeedccl/changefeed_processors.go @@ -817,7 +817,7 @@ func (j *jobState) checkpointCompleted(ctx context.Context, checkpointDuration t j.metrics.CheckpointHistNanos.RecordValue(checkpointDuration.Nanoseconds()) j.lastProgressUpdate = j.ts.Now() - j.checkpointDuration = time.Duration(j.metrics.CheckpointHistNanos.Snapshot().Mean()) + j.checkpointDuration = time.Duration(j.metrics.CheckpointHistNanos.Mean()) j.progressUpdatesSkipped = false } diff --git a/pkg/ccl/changefeedccl/metrics.go b/pkg/ccl/changefeedccl/metrics.go index 302babbde4ab..1cd2c7805d4b 100644 --- a/pkg/ccl/changefeedccl/metrics.go +++ b/pkg/ccl/changefeedccl/metrics.go @@ -417,20 +417,15 @@ func newAggregateMetrics(histogramWindow time.Duration) *AggMetrics { a := &AggMetrics{ ErrorRetries: b.Counter(metaChangefeedErrorRetries), EmittedMessages: b.Counter(metaChangefeedEmittedMessages), - MessageSize: b.Histogram(metaMessageSize, - histogramWindow, 10<<20 /* 10MB max message size */, 1), - EmittedBytes: b.Counter(metaChangefeedEmittedBytes), - FlushedBytes: b.Counter(metaChangefeedFlushedBytes), - Flushes: b.Counter(metaChangefeedFlushes), - - BatchHistNanos: b.Histogram(metaChangefeedBatchHistNanos, - histogramWindow, changefeedBatchHistMaxLatency.Nanoseconds(), 1), - FlushHistNanos: b.Histogram(metaChangefeedFlushHistNanos, - histogramWindow, changefeedFlushHistMaxLatency.Nanoseconds(), 2), - CommitLatency: b.Histogram(metaCommitLatency, - histogramWindow, commitLatencyMaxValue.Nanoseconds(), 1), - AdmitLatency: b.Histogram(metaAdmitLatency, histogramWindow, - admitLatencyMaxValue.Nanoseconds(), 1), + MessageSize: b.Histogram(metaMessageSize, histogramWindow, metric.DataSizeBuckets), + EmittedBytes: b.Counter(metaChangefeedEmittedBytes), + FlushedBytes: b.Counter(metaChangefeedFlushedBytes), + Flushes: b.Counter(metaChangefeedFlushes), + + BatchHistNanos: b.Histogram(metaChangefeedBatchHistNanos, histogramWindow, metric.BatchProcessLatencyBuckets), + FlushHistNanos: b.Histogram(metaChangefeedFlushHistNanos, histogramWindow, metric.BatchProcessLatencyBuckets), + CommitLatency: b.Histogram(metaCommitLatency, histogramWindow, metric.BatchProcessLatencyBuckets), + AdmitLatency: b.Histogram(metaAdmitLatency, histogramWindow, metric.BatchProcessLatencyBuckets), BackfillCount: b.Gauge(metaChangefeedBackfillCount), BackfillPendingRanges: b.Gauge(metaChangefeedBackfillPendingRanges), RunningCount: b.Gauge(metaChangefeedRunning), @@ -529,17 +524,16 @@ func (m *Metrics) getSLIMetrics(scope string) (*sliMetrics, error) { // MakeMetrics makes the metrics for changefeed monitoring. func MakeMetrics(histogramWindow time.Duration) metric.Struct { m := &Metrics{ - AggMetrics: newAggregateMetrics(histogramWindow), - KVFeedMetrics: kvevent.MakeMetrics(histogramWindow), - SchemaFeedMetrics: schemafeed.MakeMetrics(histogramWindow), - ResolvedMessages: metric.NewCounter(metaChangefeedForwardedResolvedMessages), - Failures: metric.NewCounter(metaChangefeedFailures), - QueueTimeNanos: metric.NewCounter(metaEventQueueTime), - CheckpointHistNanos: metric.NewHistogram(metaChangefeedCheckpointHistNanos, histogramWindow, - changefeedCheckpointHistMaxLatency.Nanoseconds(), 2), - FrontierUpdates: metric.NewCounter(metaChangefeedFrontierUpdates), - ThrottleMetrics: cdcutils.MakeMetrics(histogramWindow), - ReplanCount: metric.NewCounter(metaChangefeedReplanCount), + AggMetrics: newAggregateMetrics(histogramWindow), + KVFeedMetrics: kvevent.MakeMetrics(histogramWindow), + SchemaFeedMetrics: schemafeed.MakeMetrics(histogramWindow), + ResolvedMessages: metric.NewCounter(metaChangefeedForwardedResolvedMessages), + Failures: metric.NewCounter(metaChangefeedFailures), + QueueTimeNanos: metric.NewCounter(metaEventQueueTime), + CheckpointHistNanos: metric.NewHistogram(metaChangefeedCheckpointHistNanos, histogramWindow, metric.IOLatencyBuckets), + FrontierUpdates: metric.NewCounter(metaChangefeedFrontierUpdates), + ThrottleMetrics: cdcutils.MakeMetrics(histogramWindow), + ReplanCount: metric.NewCounter(metaChangefeedReplanCount), } m.mu.resolved = make(map[int]hlc.Timestamp) diff --git a/pkg/ccl/sqlproxyccl/connector_test.go b/pkg/ccl/sqlproxyccl/connector_test.go index 0d8eb941cdef..a93296da853b 100644 --- a/pkg/ccl/sqlproxyccl/connector_test.go +++ b/pkg/ccl/sqlproxyccl/connector_test.go @@ -375,7 +375,9 @@ func TestConnector_dialTenantCluster(t *testing.T) { defer cancel() c := &connector{ - DialTenantLatency: metric.NewLatency(metaDialTenantLatency, time.Millisecond), + DialTenantLatency: metric.NewHistogram( + metaDialTenantLatency, time.Millisecond, metric.IOLatencyBuckets, + ), DialTenantRetries: metric.NewCounter(metaDialTenantRetries), } c.testingKnobs.lookupAddr = func(ctx context.Context) (string, error) { @@ -403,8 +405,10 @@ func TestConnector_dialTenantCluster(t *testing.T) { var reportFailureFnCount int c := &connector{ - TenantID: roachpb.MakeTenantID(42), - DialTenantLatency: metric.NewLatency(metaDialTenantLatency, time.Millisecond), + TenantID: roachpb.MakeTenantID(42), + DialTenantLatency: metric.NewHistogram( + metaDialTenantLatency, time.Millisecond, metric.IOLatencyBuckets, + ), DialTenantRetries: metric.NewCounter(metaDialTenantRetries), } c.DirectoryCache = &testTenantDirectoryCache{ diff --git a/pkg/ccl/sqlproxyccl/metrics.go b/pkg/ccl/sqlproxyccl/metrics.go index cf7abe31f044..daf88d378c5d 100644 --- a/pkg/ccl/sqlproxyccl/metrics.go +++ b/pkg/ccl/sqlproxyccl/metrics.go @@ -224,16 +224,18 @@ func makeProxyMetrics() metrics { RoutingErrCount: metric.NewCounter(metaRoutingErrCount), RefusedConnCount: metric.NewCounter(metaRefusedConnCount), SuccessfulConnCount: metric.NewCounter(metaSuccessfulConnCount), - ConnectionLatency: metric.NewLatency( + ConnectionLatency: metric.NewHistogram( metaConnMigrationAttemptedCount, base.DefaultHistogramWindowInterval(), + metric.IOLatencyBuckets, ), AuthFailedCount: metric.NewCounter(metaAuthFailedCount), ExpiredClientConnCount: metric.NewCounter(metaExpiredClientConnCount), // Connector metrics. - DialTenantLatency: metric.NewLatency( + DialTenantLatency: metric.NewHistogram( metaDialTenantLatency, base.DefaultHistogramWindowInterval(), + metric.NetworkLatencyBuckets, ), DialTenantRetries: metric.NewCounter(metaDialTenantRetries), // Connection migration metrics. @@ -241,15 +243,15 @@ func makeProxyMetrics() metrics { ConnMigrationErrorFatalCount: metric.NewCounter(metaConnMigrationErrorFatalCount), ConnMigrationErrorRecoverableCount: metric.NewCounter(metaConnMigrationErrorRecoverableCount), ConnMigrationAttemptedCount: metric.NewCounter(metaConnMigrationAttemptedCount), - ConnMigrationAttemptedLatency: metric.NewLatency( + ConnMigrationAttemptedLatency: metric.NewHistogram( metaConnMigrationAttemptedLatency, base.DefaultHistogramWindowInterval(), + metric.NetworkLatencyBuckets, ), ConnMigrationTransferResponseMessageSize: metric.NewHistogram( metaConnMigrationTransferResponseMessageSize, base.DefaultHistogramWindowInterval(), - maxExpectedTransferResponseMessageSize, - 1, + metric.DataSizeBuckets, ), QueryCancelReceivedPGWire: metric.NewCounter(metaQueryCancelReceivedPGWire), QueryCancelReceivedHTTP: metric.NewCounter(metaQueryCancelReceivedHTTP), diff --git a/pkg/ccl/streamingccl/streamingest/metrics.go b/pkg/ccl/streamingccl/streamingest/metrics.go index 79b98a6667fb..f2f95069378b 100644 --- a/pkg/ccl/streamingccl/streamingest/metrics.go +++ b/pkg/ccl/streamingccl/streamingest/metrics.go @@ -135,11 +135,11 @@ func MakeMetrics(histogramWindow time.Duration) metric.Struct { ResolvedEvents: metric.NewCounter(metaStreamingResolvedEventsIngested), JobProgressUpdates: metric.NewCounter(metaJobProgressUpdates), FlushHistNanos: metric.NewHistogram(metaStreamingFlushHistNanos, - histogramWindow, streamingFlushHistMaxLatency.Nanoseconds(), 1), + histogramWindow, metric.BatchProcessLatencyBuckets), CommitLatency: metric.NewHistogram(metaStreamingCommitLatency, - histogramWindow, streamingCommitLatencyMaxValue.Nanoseconds(), 1), + histogramWindow, metric.BatchProcessLatencyBuckets), AdmitLatency: metric.NewHistogram(metaStreamingAdmitLatency, - histogramWindow, streamingAdmitLatencyMaxValue.Nanoseconds(), 1), + histogramWindow, metric.BatchProcessLatencyBuckets), RunningCount: metric.NewGauge(metaStreamsRunning), EarliestDataCheckpointSpan: metric.NewGauge(metaEarliestDataCheckpointSpan), LatestDataCheckpointSpan: metric.NewGauge(metaLatestDataCheckpointSpan), diff --git a/pkg/kv/bulk/bulk_metrics.go b/pkg/kv/bulk/bulk_metrics.go index d85d5883e103..cc6a8645d47a 100644 --- a/pkg/kv/bulk/bulk_metrics.go +++ b/pkg/kv/bulk/bulk_metrics.go @@ -50,7 +50,7 @@ const log10int64times1000 = 19 * 1000 // MakeBulkMetrics instantiates the metrics holder for bulk operation monitoring. func MakeBulkMetrics(histogramWindow time.Duration) Metrics { return Metrics{ - MaxBytesHist: metric.NewHistogram(metaMemMaxBytes, histogramWindow, log10int64times1000, 3), + MaxBytesHist: metric.NewHistogram(metaMemMaxBytes, histogramWindow, metric.MemoryUsageBuckets), CurBytesCount: metric.NewGauge(metaMemCurBytes), } } diff --git a/pkg/kv/kvclient/kvcoord/txn_metrics.go b/pkg/kv/kvclient/kvcoord/txn_metrics.go index 372361a64699..b9a44f9e760d 100644 --- a/pkg/kv/kvclient/kvcoord/txn_metrics.go +++ b/pkg/kv/kvclient/kvcoord/txn_metrics.go @@ -274,11 +274,11 @@ func MakeTxnMetrics(histogramWindow time.Duration) TxnMetrics { RefreshFailWithCondensedSpans: metric.NewCounter(metaRefreshFailWithCondensedSpans), RefreshMemoryLimitExceeded: metric.NewCounter(metaRefreshMemoryLimitExceeded), RefreshAutoRetries: metric.NewCounter(metaRefreshAutoRetries), - Durations: metric.NewLatency(metaDurationsHistograms, histogramWindow), + Durations: metric.NewHistogram(metaDurationsHistograms, histogramWindow, metric.IOLatencyBuckets), TxnsWithCondensedIntents: metric.NewCounter(metaTxnsWithCondensedIntentSpans), TxnsWithCondensedIntentsGauge: metric.NewGauge(metaTxnsWithCondensedIntentSpansGauge), TxnsRejectedByLockSpanBudget: metric.NewCounter(metaTxnsRejectedByLockSpanBudget), - Restarts: metric.NewHistogram(metaRestartsHistogram, histogramWindow, 100, 3), + Restarts: metric.NewHistogram(metaRestartsHistogram, histogramWindow, metric.CountBuckets), RestartsWriteTooOld: telemetry.NewCounterWithMetric(metaRestartsWriteTooOld), RestartsWriteTooOldMulti: telemetry.NewCounterWithMetric(metaRestartsWriteTooOldMulti), RestartsSerializable: telemetry.NewCounterWithMetric(metaRestartsSerializable), diff --git a/pkg/kv/kvprober/kvprober.go b/pkg/kv/kvprober/kvprober.go index e2566a89bc30..8d985a089aad 100644 --- a/pkg/kv/kvprober/kvprober.go +++ b/pkg/kv/kvprober/kvprober.go @@ -214,14 +214,18 @@ func NewProber(opts Opts) *Prober { writePlanner: newMeta2Planner(opts.DB, opts.Settings, func() time.Duration { return writeInterval.Get(&opts.Settings.SV) }), metrics: Metrics{ - ReadProbeAttempts: metric.NewCounter(metaReadProbeAttempts), - ReadProbeFailures: metric.NewCounter(metaReadProbeFailures), - ReadProbeLatency: metric.NewLatency(metaReadProbeLatency, opts.HistogramWindowInterval), + ReadProbeAttempts: metric.NewCounter(metaReadProbeAttempts), + ReadProbeFailures: metric.NewCounter(metaReadProbeFailures), + ReadProbeLatency: metric.NewHistogram( + metaReadProbeLatency, opts.HistogramWindowInterval, metric.IOLatencyBuckets, + ), WriteProbeAttempts: metric.NewCounter(metaWriteProbeAttempts), WriteProbeFailures: metric.NewCounter(metaWriteProbeFailures), - WriteProbeLatency: metric.NewLatency(metaWriteProbeLatency, opts.HistogramWindowInterval), - ProbePlanAttempts: metric.NewCounter(metaProbePlanAttempts), - ProbePlanFailures: metric.NewCounter(metaProbePlanFailures), + WriteProbeLatency: metric.NewHistogram( + metaWriteProbeLatency, opts.HistogramWindowInterval, metric.IOLatencyBuckets, + ), + ProbePlanAttempts: metric.NewCounter(metaProbePlanAttempts), + ProbePlanFailures: metric.NewCounter(metaProbePlanFailures), }, tracer: opts.Tracer, } diff --git a/pkg/kv/kvserver/liveness/liveness.go b/pkg/kv/kvserver/liveness/liveness.go index 215a45e6ad22..4e52dc1d748e 100644 --- a/pkg/kv/kvserver/liveness/liveness.go +++ b/pkg/kv/kvserver/liveness/liveness.go @@ -309,7 +309,9 @@ func NewNodeLiveness(opts NodeLivenessOptions) *NodeLiveness { HeartbeatSuccesses: metric.NewCounter(metaHeartbeatSuccesses), HeartbeatFailures: telemetry.NewCounterWithMetric(metaHeartbeatFailures), EpochIncrements: telemetry.NewCounterWithMetric(metaEpochIncrements), - HeartbeatLatency: metric.NewLatency(metaHeartbeatLatency, opts.HistogramWindowInterval), + HeartbeatLatency: metric.NewHistogram( + metaHeartbeatLatency, opts.HistogramWindowInterval, metric.IOLatencyBuckets, + ), } nl.mu.nodes = make(map[roachpb.NodeID]Record) nl.heartbeatToken <- struct{}{} diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 8a47f9e2e975..1a22a928fab9 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -2245,18 +2245,27 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { // Raft processing metrics. RaftTicks: metric.NewCounter(metaRaftTicks), RaftQuotaPoolPercentUsed: metric.NewHistogram( - // NB: this results in 64 buckets (i.e. 64 timeseries in prometheus). - metaRaftQuotaPoolPercentUsed, histogramWindow, 100 /* maxVal */, 1, /* sigFigs */ + metaRaftQuotaPoolPercentUsed, histogramWindow, metric.PercentBuckets, ), - RaftWorkingDurationNanos: metric.NewCounter(metaRaftWorkingDurationNanos), - RaftTickingDurationNanos: metric.NewCounter(metaRaftTickingDurationNanos), - RaftCommandsApplied: metric.NewCounter(metaRaftCommandsApplied), - RaftLogCommitLatency: metric.NewLatency(metaRaftLogCommitLatency, histogramWindow), - RaftCommandCommitLatency: metric.NewLatency(metaRaftCommandCommitLatency, histogramWindow), - RaftHandleReadyLatency: metric.NewLatency(metaRaftHandleReadyLatency, histogramWindow), - RaftApplyCommittedLatency: metric.NewLatency(metaRaftApplyCommittedLatency, histogramWindow), - RaftSchedulerLatency: metric.NewLatency(metaRaftSchedulerLatency, histogramWindow), - RaftTimeoutCampaign: metric.NewCounter(metaRaftTimeoutCampaign), + RaftWorkingDurationNanos: metric.NewCounter(metaRaftWorkingDurationNanos), + RaftTickingDurationNanos: metric.NewCounter(metaRaftTickingDurationNanos), + RaftCommandsApplied: metric.NewCounter(metaRaftCommandsApplied), + RaftLogCommitLatency: metric.NewHistogram( + metaRaftLogCommitLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftCommandCommitLatency: metric.NewHistogram( + metaRaftCommandCommitLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftHandleReadyLatency: metric.NewHistogram( + metaRaftHandleReadyLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftApplyCommittedLatency: metric.NewHistogram( + metaRaftApplyCommittedLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftSchedulerLatency: metric.NewHistogram( + metaRaftSchedulerLatency, histogramWindow, metric.IOLatencyBuckets, + ), + RaftTimeoutCampaign: metric.NewCounter(metaRaftTimeoutCampaign), // Raft message metrics. RaftRcvdMessages: [...]*metric.Counter{ @@ -2394,8 +2403,12 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { ReplicaCircuitBreakerCumTripped: metric.NewCounter(metaReplicaCircuitBreakerCumTripped), // Replica batch evaluation. - ReplicaReadBatchEvaluationLatency: metric.NewLatency(metaReplicaReadBatchEvaluationLatency, histogramWindow), - ReplicaWriteBatchEvaluationLatency: metric.NewLatency(metaReplicaWriteBatchEvaluationLatency, histogramWindow), + ReplicaReadBatchEvaluationLatency: metric.NewHistogram( + metaReplicaReadBatchEvaluationLatency, histogramWindow, metric.IOLatencyBuckets, + ), + ReplicaWriteBatchEvaluationLatency: metric.NewHistogram( + metaReplicaWriteBatchEvaluationLatency, histogramWindow, metric.IOLatencyBuckets, + ), } { diff --git a/pkg/kv/kvserver/txnwait/metrics.go b/pkg/kv/kvserver/txnwait/metrics.go index 1d4c6f2b0653..20f0874dc11a 100644 --- a/pkg/kv/kvserver/txnwait/metrics.go +++ b/pkg/kv/kvserver/txnwait/metrics.go @@ -74,8 +74,7 @@ func NewMetrics(histogramWindowInterval time.Duration) *Metrics { Unit: metric.Unit_NANOSECONDS, }, histogramWindowInterval, - time.Hour.Nanoseconds(), - 1, + metric.LongRunningProcessLatencyBuckets, ), QueryWaitTime: metric.NewHistogram( @@ -86,8 +85,7 @@ func NewMetrics(histogramWindowInterval time.Duration) *Metrics { Unit: metric.Unit_NANOSECONDS, }, histogramWindowInterval, - time.Hour.Nanoseconds(), - 1, + metric.LongRunningProcessLatencyBuckets, ), DeadlocksTotal: metric.NewCounter( diff --git a/pkg/rpc/clock_offset.go b/pkg/rpc/clock_offset.go index 90220dd288d3..48f290dd517c 100644 --- a/pkg/rpc/clock_offset.go +++ b/pkg/rpc/clock_offset.go @@ -122,7 +122,9 @@ func newRemoteClockMonitor( r.metrics = RemoteClockMetrics{ ClockOffsetMeanNanos: metric.NewGauge(metaClockOffsetMeanNanos), ClockOffsetStdDevNanos: metric.NewGauge(metaClockOffsetStdDevNanos), - LatencyHistogramNanos: metric.NewLatency(metaLatencyHistogramNanos, histogramWindowInterval), + LatencyHistogramNanos: metric.NewHistogram( + metaLatencyHistogramNanos, histogramWindowInterval, metric.IOLatencyBuckets, + ), } return &r } diff --git a/pkg/server/node.go b/pkg/server/node.go index 8304711dc008..2c469d91ad25 100644 --- a/pkg/server/node.go +++ b/pkg/server/node.go @@ -173,7 +173,9 @@ type nodeMetrics struct { func makeNodeMetrics(reg *metric.Registry, histogramWindow time.Duration) nodeMetrics { nm := nodeMetrics{ - Latency: metric.NewLatency(metaExecLatency, histogramWindow), + Latency: metric.NewHistogram( + metaExecLatency, histogramWindow, metric.IOLatencyBuckets, + ), Success: metric.NewCounter(metaExecSuccess), Err: metric.NewCounter(metaExecError), DiskStalls: metric.NewCounter(metaDiskStalls), diff --git a/pkg/server/status/BUILD.bazel b/pkg/server/status/BUILD.bazel index 9c846de34936..16aa8e0d0588 100644 --- a/pkg/server/status/BUILD.bazel +++ b/pkg/server/status/BUILD.bazel @@ -147,7 +147,6 @@ go_test( "//pkg/util/system", "//pkg/util/timeutil", "@com_github_kr_pretty//:pretty", - "@com_github_prometheus_client_golang//prometheus", "@com_github_shirou_gopsutil_v3//net", ], ) diff --git a/pkg/server/status/recorder.go b/pkg/server/status/recorder.go index 3d6fe5ad18ce..2aee2ee6001e 100644 --- a/pkg/server/status/recorder.go +++ b/pkg/server/status/recorder.go @@ -529,26 +529,6 @@ type registryRecorder struct { func extractValue(name string, mtr interface{}, fn func(string, float64)) error { switch mtr := mtr.(type) { case *metric.Histogram: - // TODO(mrtracy): Where should this comment go for better - // visibility? - // - // Proper support of Histograms for time series is difficult and - // likely not worth the trouble. Instead, we aggregate a windowed - // histogram at fixed quantiles. If the scraping window and the - // histogram's eviction duration are similar, this should give - // good results; if the two durations are very different, we either - // report stale results or report only the more recent data. - // - // Additionally, we can only aggregate max/min of the quantiles; - // roll-ups don't know that and so they will return mathematically - // nonsensical values, but that seems acceptable for the time - // being. - curr, _ := mtr.Windowed() - for _, pt := range recordHistogramQuantiles { - fn(name+pt.suffix, float64(curr.ValueAtQuantile(pt.quantile))) - } - fn(name+"-count", float64(curr.TotalCount())) - case *metric.HistogramV2: n := float64(mtr.TotalCountWindowed()) fn(name+"-count", n) avg := mtr.TotalSumWindowed() / n diff --git a/pkg/server/status/recorder_test.go b/pkg/server/status/recorder_test.go index 3628c975643a..a401751d91ae 100644 --- a/pkg/server/status/recorder_test.go +++ b/pkg/server/status/recorder_test.go @@ -33,7 +33,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/system" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/kr/pretty" - "github.com/prometheus/client_golang/prometheus" ) // byTimeAndName is a slice of tspb.TimeSeriesData. @@ -202,8 +201,7 @@ func TestMetricsRecorder(t *testing.T) { {"testGauge", "gauge", 20}, {"testGaugeFloat64", "floatgauge", 20}, {"testCounter", "counter", 5}, - {"testHistogram", "histogram", 10}, - {"testHistogramV2", "histogramV2", 9}, + {"testHistogram", "histogram", 9}, {"testLatency", "latency", 10}, {"testAggGauge", "agggauge", 4}, {"testAggCounter", "aggcounter", 7}, @@ -289,16 +287,7 @@ func TestMetricsRecorder(t *testing.T) { c.Inc((data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "histogram": - h := metric.NewHistogram(metric.Metadata{Name: reg.prefix + data.name}, time.Second, 1000, 2) - reg.reg.AddMetric(h) - h.RecordValue(data.val) - for _, q := range recordHistogramQuantiles { - addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) - } - addExpected(reg.prefix, data.name+"-count", reg.source, 100, 1, reg.isNode) - case "histogramV2": - h := metric.NewHistogramV2(metric.Metadata{Name: reg.prefix + data.name}, time.Second, - prometheus.HistogramOpts{Buckets: []float64{1.0, 10.0, 100.0, 1000.0}}) + h := metric.NewHistogram(metric.Metadata{Name: reg.prefix + data.name}, time.Second, []float64{1.0, 10.0, 100.0, 1000.0}) reg.reg.AddMetric(h) h.RecordValue(data.val) for _, q := range recordHistogramQuantiles { @@ -306,16 +295,6 @@ func TestMetricsRecorder(t *testing.T) { } addExpected(reg.prefix, data.name+"-count", reg.source, 100, 1, reg.isNode) addExpected(reg.prefix, data.name+"-avg", reg.source, 100, 9, reg.isNode) - case "latency": - l := metric.NewLatency(metric.Metadata{Name: reg.prefix + data.name}, time.Hour) - reg.reg.AddMetric(l) - l.RecordValue(data.val) - // Latency is simply three histograms (at different resolution - // time scales). - for _, q := range recordHistogramQuantiles { - addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) - } - addExpected(reg.prefix, data.name+"-count", reg.source, 100, 1, reg.isNode) default: t.Fatalf("unexpected: %+v", data) } diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index 8d3b6437d2ea..cb9d996ecbb7 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -444,16 +444,21 @@ func makeMetrics(internal bool) Metrics { SQLOptPlanCacheHits: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheHits, internal)), SQLOptPlanCacheMisses: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheMisses, internal)), // TODO(mrtracy): See HistogramWindowInterval in server/config.go for the 6x factor. - DistSQLExecLatency: metric.NewLatency(getMetricMeta(MetaDistSQLExecLatency, internal), - 6*metricsSampleInterval), - SQLExecLatency: metric.NewLatency(getMetricMeta(MetaSQLExecLatency, internal), - 6*metricsSampleInterval), - DistSQLServiceLatency: metric.NewLatency(getMetricMeta(MetaDistSQLServiceLatency, internal), - 6*metricsSampleInterval), - SQLServiceLatency: metric.NewLatency(getMetricMeta(MetaSQLServiceLatency, internal), - 6*metricsSampleInterval), - SQLTxnLatency: metric.NewLatency(getMetricMeta(MetaSQLTxnLatency, internal), - 6*metricsSampleInterval), + DistSQLExecLatency: metric.NewHistogram( + getMetricMeta(MetaDistSQLExecLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), + SQLExecLatency: metric.NewHistogram( + getMetricMeta(MetaSQLExecLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), + DistSQLServiceLatency: metric.NewHistogram( + getMetricMeta(MetaDistSQLServiceLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), + SQLServiceLatency: metric.NewHistogram( + getMetricMeta(MetaSQLServiceLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), + SQLTxnLatency: metric.NewHistogram( + getMetricMeta(MetaSQLTxnLatency, internal), 6*metricsSampleInterval, metric.IOLatencyBuckets, + ), SQLTxnsOpen: metric.NewGauge(getMetricMeta(MetaSQLTxnsOpen, internal)), SQLActiveStatements: metric.NewGauge(getMetricMeta(MetaSQLActiveQueries, internal)), SQLContendedTxns: metric.NewCounter(getMetricMeta(MetaSQLTxnContended, internal)), @@ -480,26 +485,24 @@ func makeServerMetrics(cfg *ExecutorConfig) ServerMetrics { SQLStatsMemoryMaxBytesHist: metric.NewHistogram( MetaSQLStatsMemMaxBytes, cfg.HistogramWindowInterval, - log10int64times1000, - 3, /* sigFigs */ + metric.MemoryUsageBuckets, ), SQLStatsMemoryCurBytesCount: metric.NewGauge(MetaSQLStatsMemCurBytes), ReportedSQLStatsMemoryMaxBytesHist: metric.NewHistogram( MetaReportedSQLStatsMemMaxBytes, cfg.HistogramWindowInterval, - log10int64times1000, - 3, /* sigFigs */ + metric.MemoryUsageBuckets, ), ReportedSQLStatsMemoryCurBytesCount: metric.NewGauge(MetaReportedSQLStatsMemCurBytes), DiscardedStatsCount: metric.NewCounter(MetaDiscardedSQLStats), SQLStatsFlushStarted: metric.NewCounter(MetaSQLStatsFlushStarted), SQLStatsFlushFailure: metric.NewCounter(MetaSQLStatsFlushFailure), - SQLStatsFlushDuration: metric.NewLatency( - MetaSQLStatsFlushDuration, 6*metricsSampleInterval, + SQLStatsFlushDuration: metric.NewHistogram( + MetaSQLStatsFlushDuration, 6*metricsSampleInterval, metric.IOLatencyBuckets, ), SQLStatsRemovedRows: metric.NewCounter(MetaSQLStatsRemovedRows), - SQLTxnStatsCollectionOverhead: metric.NewLatency( - MetaSQLTxnStatsCollectionOverhead, 6*metricsSampleInterval, + SQLTxnStatsCollectionOverhead: metric.NewHistogram( + MetaSQLTxnStatsCollectionOverhead, 6*metricsSampleInterval, metric.IOLatencyBuckets, ), }, ContentionSubsystemMetrics: txnidcache.NewMetrics(), diff --git a/pkg/sql/execinfra/metrics.go b/pkg/sql/execinfra/metrics.go index b827c8936519..8e8ce257d56b 100644 --- a/pkg/sql/execinfra/metrics.go +++ b/pkg/sql/execinfra/metrics.go @@ -155,12 +155,12 @@ func MakeDistSQLMetrics(histogramWindow time.Duration) DistSQLMetrics { FlowsTotal: metric.NewCounter(metaFlowsTotal), FlowsQueued: metric.NewGauge(metaFlowsQueued), FlowsScheduled: metric.NewCounter(metaFlowsScheduled), - QueueWaitHist: metric.NewLatency(metaQueueWaitHist, histogramWindow), - MaxBytesHist: metric.NewHistogram(metaMemMaxBytes, histogramWindow, log10int64times1000, 3), + QueueWaitHist: metric.NewHistogram(metaQueueWaitHist, histogramWindow, metric.IOLatencyBuckets), + MaxBytesHist: metric.NewHistogram(metaMemMaxBytes, histogramWindow, metric.MemoryUsageBuckets), CurBytesCount: metric.NewGauge(metaMemCurBytes), VecOpenFDs: metric.NewGauge(metaVecOpenFDs), CurDiskBytesCount: metric.NewGauge(metaDiskCurBytes), - MaxDiskBytesHist: metric.NewHistogram(metaDiskMaxBytes, histogramWindow, log10int64times1000, 3), + MaxDiskBytesHist: metric.NewHistogram(metaDiskMaxBytes, histogramWindow, metric.MemoryUsageBuckets), QueriesSpilled: metric.NewCounter(metaQueriesSpilled), SpilledBytesWritten: metric.NewCounter(metaSpilledBytesWritten), SpilledBytesRead: metric.NewCounter(metaSpilledBytesRead), diff --git a/pkg/sql/mem_metrics.go b/pkg/sql/mem_metrics.go index aa3ce8a6416d..ae2ac014c2ae 100644 --- a/pkg/sql/mem_metrics.go +++ b/pkg/sql/mem_metrics.go @@ -73,7 +73,7 @@ func MakeBaseMemMetrics(endpoint string, histogramWindow time.Duration) BaseMemo MetaMemMaxBytes := makeMemMetricMetadata(prefix+".max", "Memory usage per sql statement for "+endpoint) MetaMemCurBytes := makeMemMetricMetadata(prefix+".current", "Current sql statement memory usage for "+endpoint) return BaseMemoryMetrics{ - MaxBytesHist: metric.NewHistogram(MetaMemMaxBytes, histogramWindow, log10int64times1000, 3), + MaxBytesHist: metric.NewHistogram(MetaMemMaxBytes, histogramWindow, metric.MemoryUsageBuckets), CurBytesCount: metric.NewGauge(MetaMemCurBytes), } } @@ -88,9 +88,9 @@ func MakeMemMetrics(endpoint string, histogramWindow time.Duration) MemoryMetric MetaMemSessionCurBytes := makeMemMetricMetadata(prefix+".session.current", "Current sql session memory usage for "+endpoint) return MemoryMetrics{ BaseMemoryMetrics: base, - TxnMaxBytesHist: metric.NewHistogram(MetaMemMaxTxnBytes, histogramWindow, log10int64times1000, 3), + TxnMaxBytesHist: metric.NewHistogram(MetaMemMaxTxnBytes, histogramWindow, metric.MemoryUsageBuckets), TxnCurBytesCount: metric.NewGauge(MetaMemTxnCurBytes), - SessionMaxBytesHist: metric.NewHistogram(MetaMemMaxSessionBytes, histogramWindow, log10int64times1000, 3), + SessionMaxBytesHist: metric.NewHistogram(MetaMemMaxSessionBytes, histogramWindow, metric.MemoryUsageBuckets), SessionCurBytesCount: metric.NewGauge(MetaMemSessionCurBytes), } diff --git a/pkg/sql/pgwire/server.go b/pkg/sql/pgwire/server.go index 5757f49777e4..7e6993d1ca5a 100644 --- a/pkg/sql/pgwire/server.go +++ b/pkg/sql/pgwire/server.go @@ -292,11 +292,13 @@ func makeServerMetrics( sqlMemMetrics sql.MemoryMetrics, histogramWindow time.Duration, ) ServerMetrics { return ServerMetrics{ - BytesInCount: metric.NewCounter(MetaBytesIn), - BytesOutCount: metric.NewCounter(MetaBytesOut), - Conns: metric.NewGauge(MetaConns), - NewConns: metric.NewCounter(MetaNewConns), - ConnLatency: metric.NewLatency(MetaConnLatency, histogramWindow), + BytesInCount: metric.NewCounter(MetaBytesIn), + BytesOutCount: metric.NewCounter(MetaBytesOut), + Conns: metric.NewGauge(MetaConns), + NewConns: metric.NewCounter(MetaNewConns), + ConnLatency: metric.NewHistogram( + MetaConnLatency, histogramWindow, metric.IOLatencyBuckets, + ), ConnFailures: metric.NewCounter(MetaConnFailures), PGWireCancelTotalCount: metric.NewCounter(MetaPGWireCancelTotal), PGWireCancelIgnoredCount: metric.NewCounter(MetaPGWireCancelIgnored), diff --git a/pkg/sql/ttl/ttljob/ttljob_metrics.go b/pkg/sql/ttl/ttljob/ttljob_metrics.go index ee23a7d70e26..d970d4f6bf38 100644 --- a/pkg/sql/ttl/ttljob/ttljob_metrics.go +++ b/pkg/sql/ttl/ttljob/ttljob_metrics.go @@ -96,7 +96,6 @@ func (m *RowLevelTTLAggMetrics) loadMetrics(labelMetrics bool, relation string) } func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Struct { - sigFigs := 2 b := aggmetric.MakeBuilder("relation") ret := &RowLevelTTLAggMetrics{ RangeTotalDuration: b.Histogram( @@ -108,8 +107,7 @@ func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Str MetricType: io_prometheus_client.MetricType_HISTOGRAM, }, histogramWindowInterval, - time.Hour.Nanoseconds(), - sigFigs, + metric.LongRunningProcessLatencyBuckets, ), SelectDuration: b.Histogram( metric.Metadata{ @@ -120,8 +118,7 @@ func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Str MetricType: io_prometheus_client.MetricType_HISTOGRAM, }, histogramWindowInterval, - time.Minute.Nanoseconds(), - sigFigs, + metric.BatchProcessLatencyBuckets, ), DeleteDuration: b.Histogram( metric.Metadata{ @@ -132,8 +129,7 @@ func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Str MetricType: io_prometheus_client.MetricType_HISTOGRAM, }, histogramWindowInterval, - time.Minute.Nanoseconds(), - sigFigs, + metric.BatchProcessLatencyBuckets, ), RowSelections: b.Counter( metric.Metadata{ diff --git a/pkg/util/admission/work_queue.go b/pkg/util/admission/work_queue.go index c51e42167bc4..60a5e5133be8 100644 --- a/pkg/util/admission/work_queue.go +++ b/pkg/util/admission/work_queue.go @@ -1538,8 +1538,9 @@ func makeWorkQueueMetrics(name string) WorkQueueMetrics { Admitted: metric.NewCounter(addName(name, admittedMeta)), Errored: metric.NewCounter(addName(name, erroredMeta)), WaitDurationSum: metric.NewCounter(addName(name, waitDurationSumMeta)), - WaitDurations: metric.NewLatency( - addName(name, waitDurationsMeta), base.DefaultHistogramWindowInterval()), + WaitDurations: metric.NewHistogram( + addName(name, waitDurationsMeta), base.DefaultHistogramWindowInterval(), metric.IOLatencyBuckets, + ), WaitQueueLength: metric.NewGauge(addName(name, waitQueueLengthMeta)), } } diff --git a/pkg/util/metric/BUILD.bazel b/pkg/util/metric/BUILD.bazel index 2a74e5daf558..464e2f0053e8 100644 --- a/pkg/util/metric/BUILD.bazel +++ b/pkg/util/metric/BUILD.bazel @@ -8,6 +8,7 @@ go_library( srcs = [ "doc.go", "graphite_exporter.go", + "histogram_buckets.go", "metric.go", "prometheus_exporter.go", "prometheus_rule_exporter.go", @@ -25,7 +26,6 @@ go_library( "//pkg/util/syncutil", "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", - "@com_github_codahale_hdrhistogram//:hdrhistogram", "@com_github_gogo_protobuf//proto", "@com_github_prometheus_client_golang//prometheus", "@com_github_prometheus_client_golang//prometheus/graphite", @@ -41,6 +41,7 @@ go_test( name = "metric_test", size = "small", srcs = [ + "histogram_buckets_test.go", "metric_test.go", "prometheus_exporter_test.go", "prometheus_rule_exporter_test.go", @@ -50,6 +51,7 @@ go_test( embed = [":metric"], deps = [ "//pkg/util/log", + "@com_github_dustin_go_humanize//:go-humanize", "@com_github_kr_pretty//:pretty", "@com_github_prometheus_client_golang//prometheus", "@com_github_prometheus_client_model//go", diff --git a/pkg/util/metric/aggmetric/BUILD.bazel b/pkg/util/metric/aggmetric/BUILD.bazel index 07840e5ffa84..a6d1628ffaae 100644 --- a/pkg/util/metric/aggmetric/BUILD.bazel +++ b/pkg/util/metric/aggmetric/BUILD.bazel @@ -15,7 +15,6 @@ go_library( "//pkg/util/metric", "//pkg/util/syncutil", "@com_github_cockroachdb_errors//:errors", - "@com_github_codahale_hdrhistogram//:hdrhistogram", "@com_github_gogo_protobuf//proto", "@com_github_google_btree//:btree", "@com_github_prometheus_client_model//go", @@ -32,6 +31,7 @@ go_test( "//pkg/roachpb", "//pkg/util/leaktest", "//pkg/util/metric", + "@com_github_prometheus_client_golang//prometheus", "@com_github_prometheus_client_model//go", "@com_github_stretchr_testify//require", ], diff --git a/pkg/util/metric/aggmetric/agg_metric.go b/pkg/util/metric/aggmetric/agg_metric.go index d3fbb0400f65..c9afb965d64f 100644 --- a/pkg/util/metric/aggmetric/agg_metric.go +++ b/pkg/util/metric/aggmetric/agg_metric.go @@ -51,9 +51,9 @@ func (b Builder) Counter(metadata metric.Metadata) *AggCounter { // Histogram constructs a new AggHistogram with the Builder's labels. func (b Builder) Histogram( - metadata metric.Metadata, duration time.Duration, maxVal int64, sigFigs int, + metadata metric.Metadata, duration time.Duration, buckets []float64, ) *AggHistogram { - return NewHistogram(metadata, duration, maxVal, sigFigs, b.labels...) + return NewHistogram(metadata, duration, buckets, b.labels...) } type childSet struct { diff --git a/pkg/util/metric/aggmetric/agg_metric_test.go b/pkg/util/metric/aggmetric/agg_metric_test.go index 68574822cd5b..aedd611606cc 100644 --- a/pkg/util/metric/aggmetric/agg_metric_test.go +++ b/pkg/util/metric/aggmetric/agg_metric_test.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric" + "github.com/prometheus/client_golang/prometheus" prometheusgo "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" ) @@ -64,7 +65,7 @@ func TestAggMetric(t *testing.T) { h := aggmetric.NewHistogram(metric.Metadata{ Name: "histo_gram", - }, base.DefaultHistogramWindowInterval(), 100, 1, "tenant_id") + }, base.DefaultHistogramWindowInterval(), prometheus.HistogramOpts{}, "tenant_id") r.AddMetric(h) tenant2 := roachpb.MakeTenantID(2) @@ -195,7 +196,7 @@ func TestAggMetricBuilder(t *testing.T) { g := b.Gauge(metric.Metadata{Name: "bar_gauge"}) f := b.GaugeFloat64(metric.Metadata{Name: "baz_gauge"}) h := b.Histogram(metric.Metadata{Name: "histo_gram"}, - base.DefaultHistogramWindowInterval(), 100, 1) + base.DefaultHistogramWindowInterval(), prometheus.HistogramOpts{}) for i := 5; i < 10; i++ { tenantLabel := roachpb.MakeTenantID(uint64(i)).String() diff --git a/pkg/util/metric/aggmetric/histogram.go b/pkg/util/metric/aggmetric/histogram.go index e3b710a30c5a..ea6ccc2d14c2 100644 --- a/pkg/util/metric/aggmetric/histogram.go +++ b/pkg/util/metric/aggmetric/histogram.go @@ -14,7 +14,6 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/util/metric" - "github.com/codahale/hdrhistogram" io_prometheus_client "github.com/prometheus/client_model/go" ) @@ -34,14 +33,10 @@ var _ metric.PrometheusExportable = (*AggHistogram)(nil) // NewHistogram constructs a new AggHistogram. func NewHistogram( - metadata metric.Metadata, - duration time.Duration, - maxVal int64, - sigFigs int, - childLabels ...string, + metadata metric.Metadata, duration time.Duration, buckets []float64, childLabels ...string, ) *AggHistogram { create := func() *metric.Histogram { - return metric.NewHistogram(metadata, duration, maxVal, sigFigs) + return metric.NewHistogram(metadata, duration, buckets) } a := &AggHistogram{ h: *create(), @@ -84,11 +79,11 @@ func (a *AggHistogram) ToPrometheusMetric() *io_prometheus_client.Metric { return a.h.ToPrometheusMetric() } -// Windowed returns a copy of the current windowed histogram data and its -// rotation interval. -func (a *AggHistogram) Windowed() (*hdrhistogram.Histogram, time.Duration) { - return a.h.Windowed() -} +//// Windowed returns a copy of the current windowed histogram data and its +//// rotation interval. +//func (a *AggHistogram) Windowed() prometheus.Histogram { +// return a.h.Windowed() +//} // AddChild adds a Counter to this AggCounter. This method panics if a Counter // already exists for this set of labelVals. diff --git a/pkg/util/metric/histogram_buckets.go b/pkg/util/metric/histogram_buckets.go new file mode 100644 index 000000000000..e1b0994b0881 --- /dev/null +++ b/pkg/util/metric/histogram_buckets.go @@ -0,0 +1,178 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package metric + +// IOLatencyBuckets are prometheus histogram buckets suitable for a histogram +// that records a quantity (nanosecond-denominated) in which most measurements +// resemble those of typical disk latencies, i.e. which are in the micro- and +// millisecond range during normal operation. +var IOLatencyBuckets = []float64{ + // Generated via TestHistogramBuckets/IOLatencyBuckets. + 10000.000000, // 10µs + 26826.957953, // 26.826µs + 71968.567300, // 71.968µs + 193069.772888, // 193.069µs + 517947.467923, // 517.947µs + 1389495.494373, // 1.389495ms + 3727593.720315, // 3.727593ms + 10000000.000000, // 9.999999ms + 26826957.952797, // 26.826957ms + 71968567.300115, // 71.968567ms + 193069772.888325, // 193.069772ms + 517947467.923120, // 517.947467ms + 1389495494.373135, // 1.389495494s + 3727593720.314933, // 3.72759372s + 9999999999.999981, // 9.999999999s +} + +// NetworkLatencyBuckets are prometheus histogram buckets suitable for a histogram +// that records a quantity (nanosecond-denominated) in which most measurements +// behave like network latencies, i.e. most measurements are in the ms to sub-second +// range during normal operation. +var NetworkLatencyBuckets = []float64{ + // Generated via TestHistogramBuckets/NetworkLatencyBuckets. + 500000.000000, // 500µs + 860513.842995, // 860.513µs + 1480968.147973, // 1.480968ms + 2548787.184731, // 2.548787ms + 4386533.310619, // 4.386533ms + 7549345.273094, // 7.549345ms + 12992632.226094, // 12.992632ms + 22360679.774998, // 22.360679ms + 38483348.970335, // 38.483348ms + 66230909.027573, // 66.230909ms + 113985228.104760, // 113.985228ms + 196171733.362212, // 196.171733ms + 337616984.325077, // 337.616984ms + 581048177.284016, // 581.048177ms + 999999999.999999, // 999.999999ms +} + +// BatchProcessLatencyBuckets are prometheus histogram buckets suitable for a +// histogram that records a quantity (nanosecond-denominated) in which most +// measurements are in the seconds to minutes range during normal operation. +var BatchProcessLatencyBuckets = []float64{ + // Generated via TestHistogramBuckets/BatchProcessLatencyBuckets. + 500000000.000000, // 500ms + 789604072.059876, // 789.604072ms + 1246949181.227077, // 1.246949181s + 1969192302.297256, // 1.969192302s + 3109764521.125753, // 3.109764521s + 4910965458.056452, // 4.910965458s + 7755436646.853539, // 7.755436646s + 12247448713.915894, // 12.247448713s + 19341270753.704967, // 19.341270753s + 30543892291.876068, // 30.543892291s + 48235163460.447227, // 48.23516346s + 76173362969.685760, // 1m16.173362969s + 120293595166.717728, // 2m0.293595166s + 189968625172.725128, // 3m9.968625172s + 300000000000.000183, // 5m0s +} + +// LongRunningProcessLatencyBuckets are prometheus histogram buckets suitable +// for a histogram that records a quantity (nanosecond-denominated) for +// long-running processes (multiple minutes). +var LongRunningProcessLatencyBuckets = []float64{ + // Generated via TestHistogramBuckets/LongRunningProcessLatencyBuckets. + 500000000.000000, // 500ms + 942961049.923126, // 942.961049ms + 1778351083.344248, // 1.778351083s + 3353831609.364442, // 3.353831609s + 6325065151.263324, // 6.325065151s + 11928580151.734879, // 11.928580151s + 22496372927.944168, // 22.496372927s + 42426406871.192848, // 42.426406871s + 80012898335.451462, // 1m20.012898335s + 150898093243.579315, // 2m30.898093243s + 284582048872.726685, // 4m44.582048872s + 536699575188.601318, // 8m56.699575188s + 1012173589826.278687, // 16m52.173589826s + 1908880541934.094238, // 31m48.880541934s + 3599999999999.998535, // 59m59.999999999s +} + +// CountBuckets are prometheus histogram buckets suitable for a histogram that +// records a quantity that is a count (unit-less) in which most measurements are +// in the 1 to ~1000 range during normal operation. +var CountBuckets = []float64{ + // Generated via TestHistogramBuckets/CountBuckets. + 1.000000, + 2.000000, + 4.000000, + 8.000000, + 16.000000, + 32.000000, + 64.000000, + 128.000000, + 256.000000, + 512.000000, + 1024.000000, +} + +// PercentBuckets are prometheus histogram buckets suitable for a histogram that +// records a percent quantity [0,100] +var PercentBuckets = []float64{ + // Generated via TestHistogramBuckets/PercentBuckets. + 10.000000, + 20.000000, + 30.000000, + 40.000000, + 50.000000, + 60.000000, + 70.000000, + 80.000000, + 90.000000, + 100.000000, +} + +// DataSizeBuckets are prometheus histogram buckets suitable for a histogram that +// records a quantity that is a size (byte-denominated) in which most measurements are +// in the kB to MB range during normal operation. +var DataSizeBuckets = []float64{ + // Generated via TestHistogramBuckets/DataSizeBuckets. + 1000.000000, // 1.0 kB + 2000.000000, // 2.0 kB + 4000.000000, // 4.0 kB + 8000.000000, // 8.0 kB + 16000.000000, // 16 kB + 32000.000000, // 32 kB + 64000.000000, // 64 kB + 128000.000000, // 128 kB + 256000.000000, // 256 kB + 512000.000000, // 512 kB + 1024000.000000, // 1.0 MB + 2048000.000000, // 2.0 MB + 4096000.000000, // 4.1 MB + 8192000.000000, // 8.2 MB + 16384000.000000, // 16 MB +} + +// MemoryUsageBuckets are prometheus histogram buckets suitable for a histogram that +// records memory usage (in Bytes) +var MemoryUsageBuckets = []float64{ + // Generated via TestHistogramBuckets/MemoryUsageBuckets. + 1.000000, // 1 B + 2.021274, // 2 B + 4.085550, // 4 B + 8.258017, // 8 B + 16.691718, // 16 B + 33.738540, // 33 B + 68.194844, // 68 B + 137.840488, // 137 B + 278.613437, // 278 B + 563.154184, // 563 B + 1138.289087, // 1.1 kB + 2300.794494, // 2.3 kB + 4650.536813, // 4.7 kB + 9400.010609, // 9.4 kB + 19000.000000, // 19 kB +} diff --git a/pkg/util/metric/histogram_buckets_test.go b/pkg/util/metric/histogram_buckets_test.go new file mode 100644 index 000000000000..9ac375c6b14a --- /dev/null +++ b/pkg/util/metric/histogram_buckets_test.go @@ -0,0 +1,91 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package metric + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/dustin/go-humanize" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" +) + +const LATENCY = "LATENCY" +const SIZE = "SIZE" + +const log10int64times1000 = 19 * 1000 + +// TestHistogramBuckets is used to generate additional prometheus buckets to be +// used with Histogram. Please include obs-inf in the review process of new +// buckets. +func TestHistogramBuckets(t *testing.T) { + verifyAndPrint := func(t *testing.T, exp, act []float64, histType string) { + t.Helper() + var buf strings.Builder + for idx, f := range exp { + if idx == 0 { + fmt.Fprintf(&buf, "// Generated via %s.", t.Name()) + } + switch histType { + case LATENCY: + fmt.Fprintf(&buf, "\n%f, // %s", f, time.Duration(f)) + case SIZE: + fmt.Fprintf(&buf, "\n%f, // %s", f, humanize.Bytes(uint64(f))) + default: + fmt.Fprintf(&buf, "\n%f,", f) + } + } + t.Logf("%s", &buf) + require.InDeltaSlice(t, exp, act, 1 /* delta */, "Please update the bucket boundaries for %s", t.Name()) + } + t.Run("IOLatencyBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(10e3, 10e9, 15) + verifyAndPrint(t, exp, IOLatencyBuckets, LATENCY) + }) + + t.Run("NetworkLatencyBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(500e3, 1e9, 15) + verifyAndPrint(t, exp, NetworkLatencyBuckets, LATENCY) + }) + + t.Run("BatchProcessLatencyBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(500e6, 300e9, 15) + verifyAndPrint(t, exp, BatchProcessLatencyBuckets, LATENCY) + }) + + t.Run("LongRunningProcessLatencyBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(500e6, 3600e9, 15) + verifyAndPrint(t, exp, LongRunningProcessLatencyBuckets, LATENCY) + }) + + t.Run("CountBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBuckets(1, 2, 11) + verifyAndPrint(t, exp, CountBuckets, "") + }) + + t.Run("PercentBuckets", func(t *testing.T) { + exp := prometheus.LinearBuckets(10, 10, 10) + verifyAndPrint(t, exp, PercentBuckets, "") + }) + + t.Run("DataSizeBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBuckets(1e3, 2, 15) + verifyAndPrint(t, exp, DataSizeBuckets, SIZE) + }) + + t.Run("MemoryUsageBuckets", func(t *testing.T) { + exp := prometheus.ExponentialBucketsRange(1, log10int64times1000, 15) + verifyAndPrint(t, exp, MemoryUsageBuckets, SIZE) + }) +} diff --git a/pkg/util/metric/metric.go b/pkg/util/metric/metric.go index 95bea5f035b8..f011a2f265ba 100644 --- a/pkg/util/metric/metric.go +++ b/pkg/util/metric/metric.go @@ -19,7 +19,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" - "github.com/codahale/hdrhistogram" "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" prometheusgo "github.com/prometheus/client_model/go" @@ -135,7 +134,6 @@ func (m *Metadata) AddLabel(name, value string) { var _ Iterable = &Gauge{} var _ Iterable = &GaugeFloat64{} var _ Iterable = &Counter{} -var _ Iterable = &Histogram{} var _ json.Marshaler = &Gauge{} var _ json.Marshaler = &GaugeFloat64{} @@ -145,7 +143,6 @@ var _ json.Marshaler = &Registry{} var _ PrometheusExportable = &Gauge{} var _ PrometheusExportable = &GaugeFloat64{} var _ PrometheusExportable = &Counter{} -var _ PrometheusExportable = &Histogram{} type periodic interface { nextTick() time.Time @@ -164,230 +161,23 @@ func TestingSetNow(f func() time.Time) func() { } } -func cloneHistogram(in *hdrhistogram.Histogram) *hdrhistogram.Histogram { - return hdrhistogram.Import(in.Export()) -} - func maybeTick(m periodic) { for m.nextTick().Before(now()) { m.tick() } } -// A Histogram collects observed values by keeping bucketed counts. For -// convenience, internally two sets of buckets are kept: A cumulative set (i.e. -// data is never evicted) and a windowed set (which keeps only recently -// collected samples). -// -// Top-level methods generally apply to the cumulative buckets; the windowed -// variant is exposed through the Windowed method. -type Histogram struct { - Metadata - maxVal int64 - mu struct { - syncutil.Mutex - cumulative *hdrhistogram.Histogram - *tickHelper - sliding *hdrhistogram.WindowedHistogram - } -} - -// NewHistogram initializes a given Histogram. The contained windowed histogram -// rotates every 'duration'; both the windowed and the cumulative histogram -// track nonnegative values up to 'maxVal' with 'sigFigs' decimal points of -// precision. -func NewHistogram(metadata Metadata, duration time.Duration, maxVal int64, sigFigs int) *Histogram { - h := &Histogram{ - Metadata: metadata, - maxVal: maxVal, - } - wHist := hdrhistogram.NewWindowed(histWrapNum, 0, maxVal, sigFigs) - h.mu.cumulative = hdrhistogram.New(0, maxVal, sigFigs) - h.mu.sliding = wHist - h.mu.tickHelper = &tickHelper{ - nextT: now(), - tickInterval: duration / histWrapNum, - onTick: func() { - wHist.Rotate() - }, - } - return h -} - -// NewLatency is a convenience function which returns a histogram with -// suitable defaults for latency tracking. Values are expressed in ns, -// are truncated into the interval [0, MaxLatency] and are recorded -// with one digit of precision (i.e. errors of <10ms at 100ms, <6s at 60s). -// -// The windowed portion of the Histogram retains values for approximately -// histogramWindow. -func NewLatency(metadata Metadata, histogramWindow time.Duration) *Histogram { - return NewHistogram( - metadata, histogramWindow, MaxLatency.Nanoseconds(), 1, - ) -} - -// Windowed returns a copy of the current windowed histogram data and its -// rotation interval. -func (h *Histogram) Windowed() (*hdrhistogram.Histogram, time.Duration) { - h.mu.Lock() - defer h.mu.Unlock() - maybeTick(h.mu.tickHelper) - // TODO(obs-inf): not sure we should multiply by histWrapNum here, but it - // has been the behavior for a long time. - return cloneHistogram(h.mu.sliding.Merge()), histWrapNum * h.mu.tickInterval -} - -// Snapshot returns a copy of the cumulative (i.e. all-time samples) histogram -// data. -func (h *Histogram) Snapshot() *hdrhistogram.Histogram { - h.mu.Lock() - defer h.mu.Unlock() - return cloneHistogram(h.mu.cumulative) -} - -// RecordValue adds the given value to the histogram. Recording a value in -// excess of the configured maximum value for that histogram results in -// recording the maximum value instead. -func (h *Histogram) RecordValue(v int64) { - h.mu.Lock() - defer h.mu.Unlock() - - if h.mu.sliding.Current.RecordValue(v) != nil { - _ = h.mu.sliding.Current.RecordValue(h.maxVal) - } - if h.mu.cumulative.RecordValue(v) != nil { - _ = h.mu.cumulative.RecordValue(h.maxVal) - } -} - -// TotalCount returns the (cumulative) number of samples. -func (h *Histogram) TotalCount() int64 { - h.mu.Lock() - defer h.mu.Unlock() - return h.mu.cumulative.TotalCount() -} - -// Min returns the minimum. -func (h *Histogram) Min() int64 { - h.mu.Lock() - defer h.mu.Unlock() - return h.mu.cumulative.Min() -} - -// Inspect calls the closure with the empty string and the receiver. -func (h *Histogram) Inspect(f func(interface{})) { - h.mu.Lock() - maybeTick(h.mu.tickHelper) - h.mu.Unlock() - f(h) -} - -// GetType returns the prometheus type enum for this metric. -func (h *Histogram) GetType() *prometheusgo.MetricType { - return prometheusgo.MetricType_HISTOGRAM.Enum() -} - -// ToPrometheusMetric returns a filled-in prometheus metric of the right type. -func (h *Histogram) ToPrometheusMetric() *prometheusgo.Metric { - hist := &prometheusgo.Histogram{} - - h.mu.Lock() - maybeTick(h.mu.tickHelper) - bars := h.mu.cumulative.Distribution() - hist.Bucket = make([]*prometheusgo.Bucket, 0, len(bars)) - - var cumCount uint64 - var sum float64 - for _, bar := range bars { - if bar.Count == 0 { - // No need to expose trivial buckets. - continue - } - upperBound := float64(bar.To) - sum += upperBound * float64(bar.Count) - - cumCount += uint64(bar.Count) - curCumCount := cumCount // need a new alloc thanks to bad proto code - - hist.Bucket = append(hist.Bucket, &prometheusgo.Bucket{ - CumulativeCount: &curCumCount, - UpperBound: &upperBound, - }) - } - hist.SampleCount = &cumCount - hist.SampleSum = &sum // can do better here; we approximate in the loop - h.mu.Unlock() - - return &prometheusgo.Metric{ - Histogram: hist, - } -} - -// GetMetadata returns the metric's metadata including the Prometheus -// MetricType. -func (h *Histogram) GetMetadata() Metadata { - baseMetadata := h.Metadata - baseMetadata.MetricType = prometheusgo.MetricType_HISTOGRAM - return baseMetadata -} - -// IOLatencyBuckets are prometheus histogram buckets suitable for a histogram -// that records a quantity (nanosecond-denominated) in which most measurements -// resemble those of typical disk latencies, i.e. which are in the micro- and -// millisecond range during normal operation. -var IOLatencyBuckets = []float64{ - // Generated via TestHistogramBuckets/IOLatencyBuckets. - 10000.000000, // 10µs - 26826.957953, // 26.826µs - 71968.567300, // 71.968µs - 193069.772888, // 193.069µs - 517947.467923, // 517.947µs - 1389495.494373, // 1.389495ms - 3727593.720315, // 3.727593ms - 10000000.000000, // 9.999999ms - 26826957.952797, // 26.826957ms - 71968567.300115, // 71.968567ms - 193069772.888325, // 193.069772ms - 517947467.923120, // 517.947467ms - 1389495494.373135, // 1.389495494s - 3727593720.314933, // 3.72759372s - 9999999999.999981, // 9.999999999s -} - -// NetworkLatencyBuckets are prometheus histogram buckets suitable for a histogram -// that records a quantity (nanosecond-denominated) in which most measurements -// behave like network latencies, i.e. most measurements are in the ms to sub-second -// range during normal operation. -var NetworkLatencyBuckets = []float64{ - // Generated via TestHistogramBuckets/NetworkLatencyBuckets. - 500000.000000, // 500µs - 860513.842995, // 860.513µs - 1480968.147973, // 1.480968ms - 2548787.184731, // 2.548787ms - 4386533.310619, // 4.386533ms - 7549345.273094, // 7.549345ms - 12992632.226094, // 12.992632ms - 22360679.774998, // 22.360679ms - 38483348.970335, // 38.483348ms - 66230909.027573, // 66.230909ms - 113985228.104760, // 113.985228ms - 196171733.362212, // 196.171733ms - 337616984.325077, // 337.616984ms - 581048177.284016, // 581.048177ms - 999999999.999999, // 999.999999ms, -} - -// NewHistogramV2 is a prometheus-backed histogram. Depending on the value of +// NewHistogram is a prometheus-backed histogram. Depending on the value of // opts.Buckets, this is suitable for recording any kind of quantity. Common // sensible choices are {IO,Network}LatencyBuckets. -func NewHistogramV2( - meta Metadata, windowDuration time.Duration, opts prometheus.HistogramOpts, -) *HistogramV2 { +func NewHistogram(meta Metadata, windowDuration time.Duration, buckets []float64) *Histogram { // TODO(obs-inf): prometheus supports labeled histograms but they require more // plumbing and don't fit into the PrometheusObservable interface any more. + opts := prometheus.HistogramOpts{ + Buckets: buckets, + } cum := prometheus.NewHistogram(opts) - h := &HistogramV2{ + h := &Histogram{ Metadata: meta, cum: cum, } @@ -403,16 +193,16 @@ func NewHistogramV2( return h } -var _ periodic = (*HistogramV2)(nil) -var _ PrometheusExportable = (*HistogramV2)(nil) +var _ periodic = (*Histogram)(nil) +var _ PrometheusExportable = (*Histogram)(nil) -// HistogramV2 is a prometheus-backed histogram. It collects observed values by +// Histogram is a prometheus-backed histogram. It collects observed values by // keeping bucketed counts. For convenience, internally two sets of buckets are // kept: A cumulative set (i.e. data is never evicted) and a windowed set (which // keeps only recently collected samples). // // New buckets are created using TestHistogramBuckets. -type HistogramV2 struct { +type Histogram struct { Metadata cum prometheus.Histogram @@ -436,27 +226,27 @@ type HistogramV2 struct { } } -func (h *HistogramV2) nextTick() time.Time { +func (h *Histogram) nextTick() time.Time { h.windowed.RLock() defer h.windowed.RUnlock() return h.windowed.nextTick() } -func (h *HistogramV2) tick() { +func (h *Histogram) tick() { h.windowed.Lock() defer h.windowed.Unlock() h.windowed.tick() } // Windowed returns a copy of the current windowed histogram. -func (h *HistogramV2) Windowed() prometheus.Histogram { +func (h *Histogram) Windowed() prometheus.Histogram { h.windowed.RLock() defer h.windowed.RUnlock() return h.windowed.cur } // RecordValue adds the given value to the histogram. -func (h *HistogramV2) RecordValue(n int64) { +func (h *Histogram) RecordValue(n int64) { v := float64(n) h.cum.Observe(v) @@ -466,12 +256,12 @@ func (h *HistogramV2) RecordValue(n int64) { } // GetType returns the prometheus type enum for this metric. -func (h *HistogramV2) GetType() *prometheusgo.MetricType { +func (h *Histogram) GetType() *prometheusgo.MetricType { return prometheusgo.MetricType_HISTOGRAM.Enum() } // ToPrometheusMetric returns a filled-in prometheus metric of the right type. -func (h *HistogramV2) ToPrometheusMetric() *prometheusgo.Metric { +func (h *Histogram) ToPrometheusMetric() *prometheusgo.Metric { m := &prometheusgo.Metric{} if err := h.cum.Write(m); err != nil { panic(err) @@ -480,28 +270,24 @@ func (h *HistogramV2) ToPrometheusMetric() *prometheusgo.Metric { } // ToPrometheusMetricWindowed returns a filled-in prometheus metric of the right type. -func (h *HistogramV2) ToPrometheusMetricWindowed() *prometheusgo.Metric { +func (h *Histogram) ToPrometheusMetricWindowed() *prometheusgo.Metric { h.windowed.Lock() defer h.windowed.Unlock() m := &prometheusgo.Metric{} if err := h.windowed.cur.Write(m); err != nil { -<<<<<<< HEAD panic(err) -======= - panic(err) // TODD ->>>>>>> 5658e48493 (metric: export quantiles from prometheus-based histogram) } return m } // GetMetadata returns the metric's metadata including the Prometheus // MetricType. -func (h *HistogramV2) GetMetadata() Metadata { +func (h *Histogram) GetMetadata() Metadata { return h.Metadata } // Inspect calls the closure. -func (h *HistogramV2) Inspect(f func(interface{})) { +func (h *Histogram) Inspect(f func(interface{})) { h.windowed.Lock() maybeTick(&h.windowed) h.windowed.Unlock() @@ -509,25 +295,30 @@ func (h *HistogramV2) Inspect(f func(interface{})) { } // TotalCount returns the (cumulative) number of samples. -func (h *HistogramV2) TotalCount() int64 { +func (h *Histogram) TotalCount() int64 { return int64(h.ToPrometheusMetric().Histogram.GetSampleCount()) } // TotalCountWindowed returns the number of samples in the current window. -func (h *HistogramV2) TotalCountWindowed() int64 { +func (h *Histogram) TotalCountWindowed() int64 { return int64(h.ToPrometheusMetricWindowed().Histogram.GetSampleCount()) } // TotalSum returns the (cumulative) number of samples. -func (h *HistogramV2) TotalSum() float64 { +func (h *Histogram) TotalSum() float64 { return h.ToPrometheusMetric().Histogram.GetSampleSum() } // TotalSumWindowed returns the number of samples in the current window. -func (h *HistogramV2) TotalSumWindowed() float64 { +func (h *Histogram) TotalSumWindowed() float64 { return h.ToPrometheusMetricWindowed().Histogram.GetSampleSum() } +// Mean returns the (cumulative) mean of samples. +func (h *Histogram) Mean() float64 { + return h.TotalSum() / float64(h.TotalCount()) +} + // ValueAtQuantileWindowed takes a quantile value [0,100] and returns the // interpolated value at that quantile for the windowed histogram. // @@ -539,7 +330,7 @@ func (h *HistogramV2) TotalSumWindowed() float64 { // with the quantiles that include the +Inf bucket. // 2. Since the prometheus client library ensures buckets are in a strictly // increasing order at creation, we do not sort them. -func (h *HistogramV2) ValueAtQuantileWindowed(q float64) float64 { +func (h *Histogram) ValueAtQuantileWindowed(q float64) float64 { m := h.ToPrometheusMetricWindowed() buckets := m.Histogram.Bucket @@ -567,13 +358,12 @@ func (h *HistogramV2) ValueAtQuantileWindowed(q float64) float64 { if math.IsNaN(val) || math.IsInf(val, -1) { return 0 } -<<<<<<< HEAD + // should not extrapolate past the upper bound of the largest bucket if val > *buckets[len(buckets)-1].UpperBound { return *buckets[len(buckets)-1].UpperBound } -======= ->>>>>>> 5658e48493 (metric: export quantiles from prometheus-based histogram) + return val } diff --git a/pkg/util/metric/metric_test.go b/pkg/util/metric/metric_test.go index f65e31aea098..13f3dbe7fc91 100644 --- a/pkg/util/metric/metric_test.go +++ b/pkg/util/metric/metric_test.go @@ -13,17 +13,14 @@ package metric import ( "bytes" "encoding/json" - "fmt" "math" "reflect" - "strings" "sync" "testing" "time" _ "github.com/cockroachdb/cockroach/pkg/util/log" // for flags "github.com/kr/pretty" - "github.com/prometheus/client_golang/prometheus" prometheusgo "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" ) @@ -103,7 +100,7 @@ func setNow(d time.Duration) { } } -func TestHistogramPrometheus(t *testing.T) { +func TestHistogram(t *testing.T) { u := func(v int) *uint64 { n := uint64(v) return &n @@ -114,58 +111,15 @@ func TestHistogramPrometheus(t *testing.T) { return &n } - h := NewHistogram(Metadata{}, time.Hour, 10, 1) - h.RecordValue(1) - h.RecordValue(5) - h.RecordValue(5) - h.RecordValue(10) - h.RecordValue(15000) // counts as 10 - act := *h.ToPrometheusMetric().Histogram - - expSum := float64(1*1 + 2*5 + 2*10) - - exp := prometheusgo.Histogram{ - SampleCount: u(5), - SampleSum: &expSum, - Bucket: []*prometheusgo.Bucket{ - {CumulativeCount: u(1), UpperBound: f(1)}, - {CumulativeCount: u(3), UpperBound: f(5)}, - {CumulativeCount: u(5), UpperBound: f(10)}, - }, - } - - if !reflect.DeepEqual(act, exp) { - t.Fatalf("expected differs from actual: %s", pretty.Diff(exp, act)) - } -} - -func TestHistogramV2(t *testing.T) { - u := func(v int) *uint64 { - n := uint64(v) - return &n - } - - f := func(v int) *float64 { - n := float64(v) - return &n - } - - h := NewHistogramV2( + h := NewHistogram( Metadata{}, time.Hour, - prometheus.HistogramOpts{ - Namespace: "", - Subsystem: "", - Name: "", - Help: "", - ConstLabels: nil, - Buckets: []float64{ - 1.0, - 5.0, - 10.0, - 25.0, - 100.0, - }, + []float64{ + 1.0, + 5.0, + 10.0, + 25.0, + 100.0, }, ) @@ -211,38 +165,11 @@ func TestHistogramV2(t *testing.T) { require.Equal(t, 100.0, h.ValueAtQuantileWindowed(99.99)) } -// TestHistogramBuckets is used to generate additional prometheus buckets to be -// used with HistogramV2. Please include obs-inf in the review process of new -// buckets. -func TestHistogramBuckets(t *testing.T) { - verifyAndPrint := func(t *testing.T, exp, act []float64) { - t.Helper() - var buf strings.Builder - for idx, f := range exp { - if idx == 0 { - fmt.Fprintf(&buf, "// Generated via %s.", t.Name()) - } - fmt.Fprintf(&buf, "\n%f, // %s", f, time.Duration(f)) - } - t.Logf("%s", &buf) - require.InDeltaSlice(t, exp, act, 1 /* delta */, "Please update the bucket boundaries for %s", t.Name()) - } - t.Run("IOLatencyBuckets", func(t *testing.T) { - exp := prometheus.ExponentialBucketsRange(10e3, 10e9, 15) - verifyAndPrint(t, exp, IOLatencyBuckets) - }) - - t.Run("NetworkLatencyBuckets", func(t *testing.T) { - exp := prometheus.ExponentialBucketsRange(500e3, 1e9, 15) - verifyAndPrint(t, exp, NetworkLatencyBuckets) - }) -} - -func TestNewHistogramV2Rotate(t *testing.T) { +func TestNewHistogramRotate(t *testing.T) { defer TestingSetNow(nil)() setNow(0) - h := NewHistogramV2(emptyMetadata, 10*time.Second, prometheus.HistogramOpts{Buckets: nil}) + h := NewHistogram(emptyMetadata, 10*time.Second, nil) for i := 0; i < 4; i++ { // Windowed histogram is initially empty. h.Inspect(func(interface{}) {}) // triggers ticking @@ -265,36 +192,3 @@ func TestNewHistogramV2Rotate(t *testing.T) { // Go to beginning. } } - -func TestHistogramRotate(t *testing.T) { - defer TestingSetNow(nil)() - setNow(0) - duration := histWrapNum * time.Second - h := NewHistogram(emptyMetadata, duration, 1000+10*histWrapNum, 3) - var cur time.Duration - for i := 0; i < 3*histWrapNum; i++ { - v := int64(10 * i) - h.RecordValue(v) - cur += time.Second - setNow(cur) - cur, windowDuration := h.Windowed() - if windowDuration != duration { - t.Fatalf("window changed: is %s, should be %s", windowDuration, duration) - } - - // When i == histWrapNum-1, we expect the entry from i==0 to move out - // of the window (since we rotated for the histWrapNum'th time). - expMin := int64((1 + i - (histWrapNum - 1)) * 10) - if expMin < 0 { - expMin = 0 - } - - if min := cur.Min(); min != expMin { - t.Fatalf("%d: unexpected minimum %d, expected %d", i, min, expMin) - } - - if max, expMax := cur.Max(), v; max != expMax { - t.Fatalf("%d: unexpected maximum %d, expected %d", i, max, expMax) - } - } -} diff --git a/pkg/util/metric/registry_test.go b/pkg/util/metric/registry_test.go index 52e38069109c..adbf36ca25ad 100644 --- a/pkg/util/metric/registry_test.go +++ b/pkg/util/metric/registry_test.go @@ -76,7 +76,7 @@ func TestRegistry(t *testing.T) { topCounter := NewCounter(Metadata{Name: "top.counter"}) r.AddMetric(topCounter) - r.AddMetric(NewHistogram(Metadata{Name: "top.histogram"}, time.Minute, 1000, 3)) + r.AddMetric(NewHistogram(Metadata{Name: "top.histogram"}, time.Minute, CountBuckets)) r.AddMetric(NewGauge(Metadata{Name: "bottom.gauge"})) ms := &struct { @@ -103,7 +103,7 @@ func TestRegistry(t *testing.T) { StructGauge: NewGauge(Metadata{Name: "struct.gauge"}), StructGauge64: NewGaugeFloat64(Metadata{Name: "struct.gauge64"}), StructCounter: NewCounter(Metadata{Name: "struct.counter"}), - StructHistogram: NewHistogram(Metadata{Name: "struct.histogram"}, time.Minute, 1000, 3), + StructHistogram: NewHistogram(Metadata{Name: "struct.histogram"}, time.Minute, CountBuckets), NestedStructGauge: NestedStruct{ NestedStructGauge: NewGauge(Metadata{Name: "nested.struct.gauge"}), }, @@ -122,7 +122,7 @@ func TestRegistry(t *testing.T) { privateStructGauge: NewGauge(Metadata{Name: "private.struct.gauge"}), privateStructGauge64: NewGaugeFloat64(Metadata{Name: "private.struct.gauge64"}), privateStructCounter: NewCounter(Metadata{Name: "private.struct.counter"}), - privateStructHistogram: NewHistogram(Metadata{Name: "private.struct.histogram"}, time.Minute, 1000, 3), + privateStructHistogram: NewHistogram(Metadata{Name: "private.struct.histogram"}, time.Minute, CountBuckets), privateNestedStructGauge: NestedStruct{ NestedStructGauge: NewGauge(Metadata{Name: "private.nested.struct.gauge"}), },