From 5c2ef545344428b5e0d9712a91fc8aa1a048946f Mon Sep 17 00:00:00 2001 From: Pramod Bindal Date: Thu, 19 Sep 2024 11:48:41 +0530 Subject: [PATCH] Adding metrics for Running PipelineRuns --- config/config-observability.yaml | 1 + pkg/apis/config/metrics.go | 11 +++++ pkg/apis/config/metrics_test.go | 5 ++ .../config-observability-namespacelevel.yaml | 1 + .../config-observability-throttle.yaml | 1 + pkg/pipelinerunmetrics/metrics.go | 46 ++++++++++++++++++- pkg/pipelinerunmetrics/metrics_test.go | 13 +++--- 7 files changed, 70 insertions(+), 8 deletions(-) diff --git a/config/config-observability.yaml b/config/config-observability.yaml index f1f800beb06..692232304e0 100644 --- a/config/config-observability.yaml +++ b/config/config-observability.yaml @@ -59,3 +59,4 @@ data: metrics.pipelinerun.level: "pipeline" metrics.pipelinerun.duration-type: "histogram" metrics.count.enable-reason: "false" + metrics.running-pipelinerun.level: "pipeline" diff --git a/pkg/apis/config/metrics.go b/pkg/apis/config/metrics.go index 0df91805db0..47293c0440a 100644 --- a/pkg/apis/config/metrics.go +++ b/pkg/apis/config/metrics.go @@ -29,6 +29,9 @@ const ( // metricsPipelinerunLevel determines to what level to aggregate metrics // for pipelinerun metricsPipelinerunLevelKey = "metrics.pipelinerun.level" + // metricsRunningPipelinerunLevelKey determines to what level to aggregate metrics + // for running pipelineruns + metricsRunningPipelinerunLevelKey = "metrics.running-pipelinerun.level" // metricsDurationTaskrunType determines what type of // metrics to use for aggregating duration for taskrun metricsDurationTaskrunType = "metrics.taskrun.duration-type" @@ -55,6 +58,9 @@ const ( // DefaultPipelinerunLevel determines to what level to aggregate metrics // when it isn't specified in configmap DefaultPipelinerunLevel = PipelinerunLevelAtPipeline + // DefaultRunningPipelinerunLevel determines to what level to aggregate metrics + // when it isn't specified in configmap + DefaultRunningPipelinerunLevel = PipelinerunLevelAtPipeline // PipelinerunLevelAtPipelinerun specify that aggregation will be done at // pipelinerun level PipelinerunLevelAtPipelinerun = "pipelinerun" @@ -96,6 +102,7 @@ var DefaultMetrics, _ = newMetricsFromMap(map[string]string{}) type Metrics struct { TaskrunLevel string PipelinerunLevel string + RunningPipelinerunLevel string DurationTaskrunType string DurationPipelinerunType string CountWithReason bool @@ -130,6 +137,7 @@ func newMetricsFromMap(cfgMap map[string]string) (*Metrics, error) { tc := Metrics{ TaskrunLevel: DefaultTaskrunLevel, PipelinerunLevel: DefaultPipelinerunLevel, + RunningPipelinerunLevel: DefaultRunningPipelinerunLevel, DurationTaskrunType: DefaultDurationTaskrunType, DurationPipelinerunType: DefaultDurationPipelinerunType, CountWithReason: false, @@ -143,6 +151,9 @@ func newMetricsFromMap(cfgMap map[string]string) (*Metrics, error) { if pipelinerunLevel, ok := cfgMap[metricsPipelinerunLevelKey]; ok { tc.PipelinerunLevel = pipelinerunLevel } + if runningPipelinerunLevel, ok := cfgMap[metricsRunningPipelinerunLevelKey]; ok { + tc.RunningPipelinerunLevel = runningPipelinerunLevel + } if durationTaskrun, ok := cfgMap[metricsDurationTaskrunType]; ok { tc.DurationTaskrunType = durationTaskrun } diff --git a/pkg/apis/config/metrics_test.go b/pkg/apis/config/metrics_test.go index c273ed6b76e..121dc3bfa0a 100644 --- a/pkg/apis/config/metrics_test.go +++ b/pkg/apis/config/metrics_test.go @@ -36,6 +36,7 @@ func TestNewMetricsFromConfigMap(t *testing.T) { expectedConfig: &config.Metrics{ TaskrunLevel: config.TaskrunLevelAtTaskrun, PipelinerunLevel: config.PipelinerunLevelAtPipelinerun, + RunningPipelinerunLevel: config.DefaultRunningPipelinerunLevel, DurationTaskrunType: config.DurationPipelinerunTypeHistogram, DurationPipelinerunType: config.DurationPipelinerunTypeHistogram, CountWithReason: false, @@ -47,6 +48,7 @@ func TestNewMetricsFromConfigMap(t *testing.T) { expectedConfig: &config.Metrics{ TaskrunLevel: config.TaskrunLevelAtNS, PipelinerunLevel: config.PipelinerunLevelAtNS, + RunningPipelinerunLevel: config.PipelinerunLevelAtNS, DurationTaskrunType: config.DurationTaskrunTypeHistogram, DurationPipelinerunType: config.DurationPipelinerunTypeLastValue, CountWithReason: false, @@ -58,6 +60,7 @@ func TestNewMetricsFromConfigMap(t *testing.T) { expectedConfig: &config.Metrics{ TaskrunLevel: config.TaskrunLevelAtNS, PipelinerunLevel: config.PipelinerunLevelAtNS, + RunningPipelinerunLevel: config.DefaultRunningPipelinerunLevel, DurationTaskrunType: config.DurationTaskrunTypeHistogram, DurationPipelinerunType: config.DurationPipelinerunTypeLastValue, CountWithReason: true, @@ -69,6 +72,7 @@ func TestNewMetricsFromConfigMap(t *testing.T) { expectedConfig: &config.Metrics{ TaskrunLevel: config.TaskrunLevelAtNS, PipelinerunLevel: config.PipelinerunLevelAtNS, + RunningPipelinerunLevel: config.DefaultRunningPipelinerunLevel, DurationTaskrunType: config.DurationTaskrunTypeHistogram, DurationPipelinerunType: config.DurationPipelinerunTypeLastValue, CountWithReason: true, @@ -88,6 +92,7 @@ func TestNewMetricsFromEmptyConfigMap(t *testing.T) { expectedConfig := &config.Metrics{ TaskrunLevel: config.TaskrunLevelAtTask, PipelinerunLevel: config.PipelinerunLevelAtPipeline, + RunningPipelinerunLevel: config.PipelinerunLevelAtPipeline, DurationTaskrunType: config.DurationPipelinerunTypeHistogram, DurationPipelinerunType: config.DurationPipelinerunTypeHistogram, CountWithReason: false, diff --git a/pkg/apis/config/testdata/config-observability-namespacelevel.yaml b/pkg/apis/config/testdata/config-observability-namespacelevel.yaml index 5029ee0099f..65a72ede515 100644 --- a/pkg/apis/config/testdata/config-observability-namespacelevel.yaml +++ b/pkg/apis/config/testdata/config-observability-namespacelevel.yaml @@ -27,4 +27,5 @@ data: metrics.taskrun.level: "namespace" metrics.taskrun.duration-type: "histogram" metrics.pipelinerun.level: "namespace" + metrics.running-pipelinerun.level: "namespace" metrics.pipelinerun.duration-type: "lastvalue" diff --git a/pkg/apis/config/testdata/config-observability-throttle.yaml b/pkg/apis/config/testdata/config-observability-throttle.yaml index 2b418e176cd..08fe6ac9d5a 100644 --- a/pkg/apis/config/testdata/config-observability-throttle.yaml +++ b/pkg/apis/config/testdata/config-observability-throttle.yaml @@ -27,6 +27,7 @@ data: metrics.taskrun.level: "namespace" metrics.taskrun.duration-type: "histogram" metrics.pipelinerun.level: "namespace" + metrics.running-pipelinerun.level: "pipeline" metrics.pipelinerun.duration-type: "lastvalue" metrics.count.enable-reason: "true" metrics.taskrun.throttle.enable-namespace: "true" diff --git a/pkg/pipelinerunmetrics/metrics.go b/pkg/pipelinerunmetrics/metrics.go index d528681db9f..a743ad2e1e9 100644 --- a/pkg/pipelinerunmetrics/metrics.go +++ b/pkg/pipelinerunmetrics/metrics.go @@ -163,6 +163,10 @@ func viewRegister(cfg *config.Metrics) error { default: return errors.New("invalid config for PipelinerunLevel: " + cfg.PipelinerunLevel) } + runningPRTag, err := getRunningPipelineRunTags(cfg) + if err != nil { + return err + } distribution := view.Distribution(10, 30, 60, 300, 900, 1800, 3600, 5400, 10800, 21600, 43200, 86400) @@ -213,6 +217,7 @@ func viewRegister(cfg *config.Metrics) error { Description: runningPRs.Description(), Measure: runningPRs, Aggregation: view.LastValue(), + TagKeys: runningPRTag, } runningPRsWaitingOnPipelineResolutionCountView = &view.View{ @@ -396,9 +401,25 @@ func (r *Recorder) RunningPipelineRuns(lister listers.PipelineRunLister) error { var runningPipelineRuns int var trsWaitResolvingTaskRef int var prsWaitResolvingPipelineRef int + countMap := map[string]int{} for _, pr := range prs { + pipelineName := getPipelineTagName(pr) + pipelineRunKey := pr.Namespace + "#" + pipelineName + mutators := []tag.Mutator{ + tag.Insert(namespaceTag, pr.Namespace), + tag.Insert(pipelineTag, pipelineName), + } + if r.cfg != nil && r.cfg.RunningPipelinerunLevel == "pipelinerun" { + pipelineRunKey = pipelineRunKey + "#" + pr.Name + } + ctx_, err_ := tag.New(context.Background(), mutators...) + if err_ != nil { + return err + } if !pr.IsDone() { + countMap[pipelineRunKey]++ + metrics.Record(ctx_, runningPRs.M(float64(countMap[pipelineRunKey]))) runningPipelineRuns++ succeedCondition := pr.Status.GetCondition(apis.ConditionSucceeded) if succeedCondition != nil && succeedCondition.Status == corev1.ConditionUnknown { @@ -409,6 +430,13 @@ func (r *Recorder) RunningPipelineRuns(lister listers.PipelineRunLister) error { prsWaitResolvingPipelineRef++ } } + } else { + // If all PipelineRuns for a Pipeline are completed then record it with 0 + // set the key to 0 so that we do not record further completed PipelineRuns for same Pipeline + if _, exists := countMap[pipelineRunKey]; !exists { + countMap[pipelineRunKey] = 0 + metrics.Record(ctx_, runningPRs.M(0)) + } } } @@ -421,8 +449,6 @@ func (r *Recorder) RunningPipelineRuns(lister listers.PipelineRunLister) error { metrics.Record(ctx, runningPRsWaitingOnTaskResolutionCount.M(float64(trsWaitResolvingTaskRef))) metrics.Record(ctx, runningPRsWaitingOnTaskResolution.M(float64(trsWaitResolvingTaskRef))) metrics.Record(ctx, runningPRsCount.M(float64(runningPipelineRuns))) - metrics.Record(ctx, runningPRs.M(float64(runningPipelineRuns))) - return nil } @@ -449,3 +475,19 @@ func (r *Recorder) ReportRunningPipelineRuns(ctx context.Context, lister listers } } } + +func getRunningPipelineRunTags(cfg *config.Metrics) ([]tag.Key, error) { + var tags []tag.Key + + switch cfg.RunningPipelinerunLevel { + case config.PipelinerunLevelAtPipelinerun: + tags = []tag.Key{pipelinerunTag, pipelineTag, namespaceTag} + case config.PipelinerunLevelAtPipeline: + tags = []tag.Key{pipelineTag, namespaceTag} + case config.PipelinerunLevelAtNS: + tags = []tag.Key{} + default: + return nil, errors.New("invalid config for RunnignPipelinerunLevel: " + cfg.PipelinerunLevel) + } + return tags, nil +} diff --git a/pkg/pipelinerunmetrics/metrics_test.go b/pkg/pipelinerunmetrics/metrics_test.go index 23703e64539..832b82e1119 100644 --- a/pkg/pipelinerunmetrics/metrics_test.go +++ b/pkg/pipelinerunmetrics/metrics_test.go @@ -50,6 +50,7 @@ func getConfigContext(countWithReason bool) context.Context { Metrics: &config.Metrics{ TaskrunLevel: config.TaskrunLevelAtTaskrun, PipelinerunLevel: config.PipelinerunLevelAtPipelinerun, + RunningPipelinerunLevel: config.DefaultRunningPipelinerunLevel, DurationTaskrunType: config.DefaultDurationTaskrunType, DurationPipelinerunType: config.DefaultDurationPipelinerunType, CountWithReason: countWithReason, @@ -464,9 +465,9 @@ func TestRecordPipelineRunDurationCount(t *testing.T) { func TestRecordRunningPipelineRunsCount(t *testing.T) { unregisterMetrics() - newPipelineRun := func(status corev1.ConditionStatus) *v1.PipelineRun { + newPipelineRun := func(status corev1.ConditionStatus, namespace string) *v1.PipelineRun { return &v1.PipelineRun{ - ObjectMeta: metav1.ObjectMeta{Name: names.SimpleNameGenerator.RestrictLengthWithRandomSuffix("pipelinerun-")}, + ObjectMeta: metav1.ObjectMeta{Name: names.SimpleNameGenerator.RestrictLengthWithRandomSuffix("pipelinerun-"), Namespace: namespace}, Status: v1.PipelineRunStatus{ Status: duckv1.Status{ Conditions: duckv1.Conditions{{ @@ -482,9 +483,9 @@ func TestRecordRunningPipelineRunsCount(t *testing.T) { informer := fakepipelineruninformer.Get(ctx) // Add N randomly-named PipelineRuns with differently-succeeded statuses. for _, tr := range []*v1.PipelineRun{ - newPipelineRun(corev1.ConditionTrue), - newPipelineRun(corev1.ConditionUnknown), - newPipelineRun(corev1.ConditionFalse), + newPipelineRun(corev1.ConditionTrue, ""), + newPipelineRun(corev1.ConditionUnknown, "test"), + newPipelineRun(corev1.ConditionFalse, ""), } { if err := informer.Informer().GetIndexer().Add(tr); err != nil { t.Fatalf("Adding TaskRun to informer: %v", err) @@ -501,7 +502,7 @@ func TestRecordRunningPipelineRunsCount(t *testing.T) { t.Errorf("RunningPipelineRuns: %v", err) } metricstest.CheckLastValueData(t, "running_pipelineruns_count", map[string]string{}, 1) - metricstest.CheckLastValueData(t, "running_pipelineruns", map[string]string{}, 1) + metricstest.CheckLastValueData(t, "running_pipelineruns", map[string]string{"pipeline": "anonymous", "namespace": "test"}, 1) } func TestRecordRunningPipelineRunsResolutionWaitCounts(t *testing.T) {