Skip to content

Commit

Permalink
Adding metrics for Running PipelineRuns
Browse files Browse the repository at this point in the history
Currently metrices shown for Running Pipeline Count is at cluster level. There is no way we can get that metric at namespace or pipeline level.
We have added the pipelinerun metric at pipeline and namespace level (Can be configured via ConfigMap).
  • Loading branch information
pramodbindal committed Sep 26, 2024
1 parent 0649270 commit 4a05727
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 8 deletions.
1 change: 1 addition & 0 deletions config/config-observability.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@ data:
metrics.pipelinerun.level: "pipeline"
metrics.pipelinerun.duration-type: "histogram"
metrics.count.enable-reason: "false"
metrics.running-pipelinerun.level: "pipeline"
11 changes: 11 additions & 0 deletions pkg/apis/config/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ const (
// metricsPipelinerunLevel determines to what level to aggregate metrics
// for pipelinerun
metricsPipelinerunLevelKey = "metrics.pipelinerun.level"
// metricsRunningPipelinerunLevelKey determines to what level to aggregate metrics
// for running pipelineruns
metricsRunningPipelinerunLevelKey = "metrics.running-pipelinerun.level"
// metricsDurationTaskrunType determines what type of
// metrics to use for aggregating duration for taskrun
metricsDurationTaskrunType = "metrics.taskrun.duration-type"
Expand All @@ -55,6 +58,9 @@ const (
// DefaultPipelinerunLevel determines to what level to aggregate metrics
// when it isn't specified in configmap
DefaultPipelinerunLevel = PipelinerunLevelAtPipeline
// DefaultRunningPipelinerunLevel determines to what level to aggregate metrics
// when it isn't specified in configmap
DefaultRunningPipelinerunLevel = PipelinerunLevelAtPipeline
// PipelinerunLevelAtPipelinerun specify that aggregation will be done at
// pipelinerun level
PipelinerunLevelAtPipelinerun = "pipelinerun"
Expand Down Expand Up @@ -96,6 +102,7 @@ var DefaultMetrics, _ = newMetricsFromMap(map[string]string{})
type Metrics struct {
TaskrunLevel string
PipelinerunLevel string
RunningPipelinerunLevel string
DurationTaskrunType string
DurationPipelinerunType string
CountWithReason bool
Expand Down Expand Up @@ -130,6 +137,7 @@ func newMetricsFromMap(cfgMap map[string]string) (*Metrics, error) {
tc := Metrics{
TaskrunLevel: DefaultTaskrunLevel,
PipelinerunLevel: DefaultPipelinerunLevel,
RunningPipelinerunLevel: DefaultRunningPipelinerunLevel,
DurationTaskrunType: DefaultDurationTaskrunType,
DurationPipelinerunType: DefaultDurationPipelinerunType,
CountWithReason: false,
Expand All @@ -143,6 +151,9 @@ func newMetricsFromMap(cfgMap map[string]string) (*Metrics, error) {
if pipelinerunLevel, ok := cfgMap[metricsPipelinerunLevelKey]; ok {
tc.PipelinerunLevel = pipelinerunLevel
}
if runningPipelinerunLevel, ok := cfgMap[metricsRunningPipelinerunLevelKey]; ok {
tc.RunningPipelinerunLevel = runningPipelinerunLevel
}
if durationTaskrun, ok := cfgMap[metricsDurationTaskrunType]; ok {
tc.DurationTaskrunType = durationTaskrun
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/config/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func TestNewMetricsFromConfigMap(t *testing.T) {
expectedConfig: &config.Metrics{
TaskrunLevel: config.TaskrunLevelAtTaskrun,
PipelinerunLevel: config.PipelinerunLevelAtPipelinerun,
RunningPipelinerunLevel: config.DefaultRunningPipelinerunLevel,
DurationTaskrunType: config.DurationPipelinerunTypeHistogram,
DurationPipelinerunType: config.DurationPipelinerunTypeHistogram,
CountWithReason: false,
Expand All @@ -47,6 +48,7 @@ func TestNewMetricsFromConfigMap(t *testing.T) {
expectedConfig: &config.Metrics{
TaskrunLevel: config.TaskrunLevelAtNS,
PipelinerunLevel: config.PipelinerunLevelAtNS,
RunningPipelinerunLevel: config.PipelinerunLevelAtNS,
DurationTaskrunType: config.DurationTaskrunTypeHistogram,
DurationPipelinerunType: config.DurationPipelinerunTypeLastValue,
CountWithReason: false,
Expand All @@ -58,6 +60,7 @@ func TestNewMetricsFromConfigMap(t *testing.T) {
expectedConfig: &config.Metrics{
TaskrunLevel: config.TaskrunLevelAtNS,
PipelinerunLevel: config.PipelinerunLevelAtNS,
RunningPipelinerunLevel: config.DefaultRunningPipelinerunLevel,
DurationTaskrunType: config.DurationTaskrunTypeHistogram,
DurationPipelinerunType: config.DurationPipelinerunTypeLastValue,
CountWithReason: true,
Expand All @@ -69,6 +72,7 @@ func TestNewMetricsFromConfigMap(t *testing.T) {
expectedConfig: &config.Metrics{
TaskrunLevel: config.TaskrunLevelAtNS,
PipelinerunLevel: config.PipelinerunLevelAtNS,
RunningPipelinerunLevel: config.DefaultRunningPipelinerunLevel,
DurationTaskrunType: config.DurationTaskrunTypeHistogram,
DurationPipelinerunType: config.DurationPipelinerunTypeLastValue,
CountWithReason: true,
Expand All @@ -88,6 +92,7 @@ func TestNewMetricsFromEmptyConfigMap(t *testing.T) {
expectedConfig := &config.Metrics{
TaskrunLevel: config.TaskrunLevelAtTask,
PipelinerunLevel: config.PipelinerunLevelAtPipeline,
RunningPipelinerunLevel: config.PipelinerunLevelAtPipeline,
DurationTaskrunType: config.DurationPipelinerunTypeHistogram,
DurationPipelinerunType: config.DurationPipelinerunTypeHistogram,
CountWithReason: false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ data:
metrics.taskrun.level: "namespace"
metrics.taskrun.duration-type: "histogram"
metrics.pipelinerun.level: "namespace"
metrics.running-pipelinerun.level: "namespace"
metrics.pipelinerun.duration-type: "lastvalue"
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ data:
metrics.taskrun.level: "namespace"
metrics.taskrun.duration-type: "histogram"
metrics.pipelinerun.level: "namespace"
metrics.running-pipelinerun.level: "pipeline"
metrics.pipelinerun.duration-type: "lastvalue"
metrics.count.enable-reason: "true"
metrics.taskrun.throttle.enable-namespace: "true"
46 changes: 44 additions & 2 deletions pkg/pipelinerunmetrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ func viewRegister(cfg *config.Metrics) error {
default:
return errors.New("invalid config for PipelinerunLevel: " + cfg.PipelinerunLevel)
}
runningPRTag, err := getRunningPipelineRunTags(cfg)
if err != nil {
return err
}

distribution := view.Distribution(10, 30, 60, 300, 900, 1800, 3600, 5400, 10800, 21600, 43200, 86400)

Expand Down Expand Up @@ -213,6 +217,7 @@ func viewRegister(cfg *config.Metrics) error {
Description: runningPRs.Description(),
Measure: runningPRs,
Aggregation: view.LastValue(),
TagKeys: runningPRTag,
}

runningPRsWaitingOnPipelineResolutionCountView = &view.View{
Expand Down Expand Up @@ -396,9 +401,25 @@ func (r *Recorder) RunningPipelineRuns(lister listers.PipelineRunLister) error {
var runningPipelineRuns int
var trsWaitResolvingTaskRef int
var prsWaitResolvingPipelineRef int
countMap := map[string]int{}

for _, pr := range prs {
pipelineName := getPipelineTagName(pr)
pipelineRunKey := pr.Namespace + "#" + pipelineName
mutators := []tag.Mutator{
tag.Insert(namespaceTag, pr.Namespace),
tag.Insert(pipelineTag, pipelineName),
}
if r.cfg != nil && r.cfg.RunningPipelinerunLevel == "pipelinerun" {
pipelineRunKey = pipelineRunKey + "#" + pr.Name
}
ctx_, err_ := tag.New(context.Background(), mutators...)
if err_ != nil {
return err
}
if !pr.IsDone() {
countMap[pipelineRunKey]++
metrics.Record(ctx_, runningPRs.M(float64(countMap[pipelineRunKey])))
runningPipelineRuns++
succeedCondition := pr.Status.GetCondition(apis.ConditionSucceeded)
if succeedCondition != nil && succeedCondition.Status == corev1.ConditionUnknown {
Expand All @@ -409,6 +430,13 @@ func (r *Recorder) RunningPipelineRuns(lister listers.PipelineRunLister) error {
prsWaitResolvingPipelineRef++
}
}
} else {
// If all PipelineRuns for a Pipeline are completed then record it with 0
// set the key to 0 so that we do not record further completed PipelineRuns for same Pipeline
if _, exists := countMap[pipelineRunKey]; !exists {
countMap[pipelineRunKey] = 0
metrics.Record(ctx_, runningPRs.M(0))
}
}
}

Expand All @@ -421,8 +449,6 @@ func (r *Recorder) RunningPipelineRuns(lister listers.PipelineRunLister) error {
metrics.Record(ctx, runningPRsWaitingOnTaskResolutionCount.M(float64(trsWaitResolvingTaskRef)))
metrics.Record(ctx, runningPRsWaitingOnTaskResolution.M(float64(trsWaitResolvingTaskRef)))
metrics.Record(ctx, runningPRsCount.M(float64(runningPipelineRuns)))
metrics.Record(ctx, runningPRs.M(float64(runningPipelineRuns)))

return nil
}

Expand All @@ -449,3 +475,19 @@ func (r *Recorder) ReportRunningPipelineRuns(ctx context.Context, lister listers
}
}
}

func getRunningPipelineRunTags(cfg *config.Metrics) ([]tag.Key, error) {
var tags []tag.Key

switch cfg.RunningPipelinerunLevel {
case config.PipelinerunLevelAtPipelinerun:
tags = []tag.Key{pipelinerunTag, pipelineTag, namespaceTag}
case config.PipelinerunLevelAtPipeline:
tags = []tag.Key{pipelineTag, namespaceTag}
case config.PipelinerunLevelAtNS:
tags = []tag.Key{}
default:
return nil, errors.New("invalid config for RunnignPipelinerunLevel: " + cfg.PipelinerunLevel)
}
return tags, nil
}
13 changes: 7 additions & 6 deletions pkg/pipelinerunmetrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ func getConfigContext(countWithReason bool) context.Context {
Metrics: &config.Metrics{
TaskrunLevel: config.TaskrunLevelAtTaskrun,
PipelinerunLevel: config.PipelinerunLevelAtPipelinerun,
RunningPipelinerunLevel: config.DefaultRunningPipelinerunLevel,
DurationTaskrunType: config.DefaultDurationTaskrunType,
DurationPipelinerunType: config.DefaultDurationPipelinerunType,
CountWithReason: countWithReason,
Expand Down Expand Up @@ -464,9 +465,9 @@ func TestRecordPipelineRunDurationCount(t *testing.T) {
func TestRecordRunningPipelineRunsCount(t *testing.T) {
unregisterMetrics()

newPipelineRun := func(status corev1.ConditionStatus) *v1.PipelineRun {
newPipelineRun := func(status corev1.ConditionStatus, namespace string) *v1.PipelineRun {
return &v1.PipelineRun{
ObjectMeta: metav1.ObjectMeta{Name: names.SimpleNameGenerator.RestrictLengthWithRandomSuffix("pipelinerun-")},
ObjectMeta: metav1.ObjectMeta{Name: names.SimpleNameGenerator.RestrictLengthWithRandomSuffix("pipelinerun-"), Namespace: namespace},
Status: v1.PipelineRunStatus{
Status: duckv1.Status{
Conditions: duckv1.Conditions{{
Expand All @@ -482,9 +483,9 @@ func TestRecordRunningPipelineRunsCount(t *testing.T) {
informer := fakepipelineruninformer.Get(ctx)
// Add N randomly-named PipelineRuns with differently-succeeded statuses.
for _, tr := range []*v1.PipelineRun{
newPipelineRun(corev1.ConditionTrue),
newPipelineRun(corev1.ConditionUnknown),
newPipelineRun(corev1.ConditionFalse),
newPipelineRun(corev1.ConditionTrue, ""),
newPipelineRun(corev1.ConditionUnknown, "test"),
newPipelineRun(corev1.ConditionFalse, ""),
} {
if err := informer.Informer().GetIndexer().Add(tr); err != nil {
t.Fatalf("Adding TaskRun to informer: %v", err)
Expand All @@ -501,7 +502,7 @@ func TestRecordRunningPipelineRunsCount(t *testing.T) {
t.Errorf("RunningPipelineRuns: %v", err)
}
metricstest.CheckLastValueData(t, "running_pipelineruns_count", map[string]string{}, 1)
metricstest.CheckLastValueData(t, "running_pipelineruns", map[string]string{}, 1)
metricstest.CheckLastValueData(t, "running_pipelineruns", map[string]string{"pipeline": "anonymous", "namespace": "test"}, 1)
}

func TestRecordRunningPipelineRunsResolutionWaitCounts(t *testing.T) {
Expand Down

0 comments on commit 4a05727

Please sign in to comment.