diff --git a/pkg/controllers/metrics/pod/controller.go b/pkg/controllers/metrics/pod/controller.go index 9547fc56b6..d6fab75116 100644 --- a/pkg/controllers/metrics/pod/controller.go +++ b/pkg/controllers/metrics/pod/controller.go @@ -71,6 +71,15 @@ var ( Objectives: metrics.SummaryObjectives(), }, ) + podBoundDurationSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "karpenter", + Subsystem: metrics.PodSubsystem, + Name: "bound_duration_seconds", + Help: "The time from pod creation until the pod is bound.", + }, + labelNames(), + ) ) // Controller for the resource @@ -82,7 +91,7 @@ type Controller struct { } func init() { - crmetrics.Registry.MustRegister(podState, podStartupDurationSeconds) + crmetrics.Registry.MustRegister(podState, podStartupDurationSeconds, podBoundDurationSeconds) } func labelNames() []string { @@ -132,13 +141,19 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco Labels: labels, }, }) - c.recordPodStartupMetric(pod) + c.recordPodStartupMetric(pod, labels) return reconcile.Result{}, nil } -func (c *Controller) recordPodStartupMetric(pod *corev1.Pod) { +func (c *Controller) recordPodStartupMetric(pod *corev1.Pod, labels prometheus.Labels) { key := client.ObjectKeyFromObject(pod).String() if pod.Status.Phase == phasePending { + cond, ok := lo.Find(pod.Status.Conditions, func(c corev1.PodCondition) bool { + return c.Type == corev1.PodScheduled + }) + if ok && cond.Status == corev1.ConditionTrue { + podBoundDurationSeconds.With(labels).Observe(cond.LastTransitionTime.Sub(pod.CreationTimestamp.Time).Seconds()) + } c.pendingPods.Insert(key) return } diff --git a/pkg/controllers/metrics/pod/suite_test.go b/pkg/controllers/metrics/pod/suite_test.go index 940bb6f2df..5183f5b8bc 100644 --- a/pkg/controllers/metrics/pod/suite_test.go +++ b/pkg/controllers/metrics/pod/suite_test.go @@ -20,6 +20,8 @@ import ( "context" "testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -84,6 +86,18 @@ var _ = Describe("Pod Metrics", func() { }) Expect(found).To(BeTrue()) }) + It("should update the pod bound_duration_seconds metric", func() { + p := test.Pod() + p.Status.Phase = corev1.PodPending + p.Status.Conditions = []corev1.PodCondition{{Type: corev1.PodScheduled, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now()}} + ExpectApplied(ctx, env.Client, p) + ExpectReconcileSucceeded(ctx, podController, client.ObjectKeyFromObject(p)) + _, found := FindMetricWithLabelValues("karpenter_pods_bound_duration_seconds", map[string]string{ + "name": p.GetName(), + "namespace": p.GetNamespace(), + }) + Expect(found).To(BeTrue()) + }) It("should delete the pod state metric on pod delete", func() { p := test.Pod() ExpectApplied(ctx, env.Client, p) diff --git a/pkg/controllers/node/termination/controller.go b/pkg/controllers/node/termination/controller.go index b8e49a3931..bd2d5cf3ff 100644 --- a/pkg/controllers/node/termination/controller.go +++ b/pkg/controllers/node/termination/controller.go @@ -129,6 +129,9 @@ func (c *Controller) finalize(ctx context.Context, node *corev1.Node) (reconcile return reconcile.Result{RequeueAfter: 1 * time.Second}, nil } + NodesDrainedTotal.With(prometheus.Labels{ + metrics.NodePoolLabel: node.Labels[v1.NodePoolLabelKey], + }).Inc() // In order for Pods associated with PersistentVolumes to smoothly migrate from the terminating Node, we wait // for VolumeAttachments of drain-able Pods to be cleaned up before terminating Node and removing its finalizer. // However, if TerminationGracePeriod is configured for Node, and we are past that period, we will skip waiting. diff --git a/pkg/controllers/node/termination/metrics.go b/pkg/controllers/node/termination/metrics.go index b31c558788..13808c9e38 100644 --- a/pkg/controllers/node/termination/metrics.go +++ b/pkg/controllers/node/termination/metrics.go @@ -28,7 +28,8 @@ import ( func init() { crmetrics.Registry.MustRegister( TerminationDurationSeconds, - NodeLifetimeDurationSeconds) + NodeLifetimeDurationSeconds, + NodesDrainedTotal) } const dayDuration = time.Hour * 24 @@ -44,6 +45,15 @@ var ( }, []string{metrics.NodePoolLabel}, ) + NodesDrainedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metrics.Namespace, + Subsystem: metrics.NodeSubsystem, + Name: "drained_total", + Help: "The total number of nodes drained by Karpenter", + }, + []string{metrics.NodePoolLabel}, + ) NodeLifetimeDurationSeconds = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: metrics.Namespace, diff --git a/pkg/controllers/node/termination/suite_test.go b/pkg/controllers/node/termination/suite_test.go index 32151e311f..da6daf60ad 100644 --- a/pkg/controllers/node/termination/suite_test.go +++ b/pkg/controllers/node/termination/suite_test.go @@ -95,6 +95,7 @@ var _ = Describe("Termination", func() { metrics.NodesTerminatedTotal.Reset() termination.TerminationDurationSeconds.Reset() termination.NodeLifetimeDurationSeconds.Reset() + termination.NodesDrainedTotal.Reset() }) Context("Reconciliation", func() { @@ -841,6 +842,7 @@ var _ = Describe("Termination", func() { node = ExpectNodeExists(ctx, env.Client, node.Name) // Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node). ExpectObjectReconciled(ctx, env.Client, terminationController, node) + ExpectMetricCounterValue(termination.NodesDrainedTotal, 1, map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]}) ExpectObjectReconciled(ctx, env.Client, terminationController, node) m, ok := FindMetricWithLabelValues("karpenter_nodes_terminated_total", map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]}) diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go index aafd3bcf68..1156e1d9f1 100644 --- a/pkg/controllers/provisioning/provisioner.go +++ b/pkg/controllers/provisioning/provisioner.go @@ -159,13 +159,14 @@ func (p *Provisioner) GetPendingPods(ctx context.Context) ([]*corev1.Pod, error) if err != nil { return nil, fmt.Errorf("listing pods, %w", err) } - pods = lo.Reject(pods, func(po *corev1.Pod, _ int) bool { + rejectedPods, pods := lo.FilterReject(pods, func(po *corev1.Pod, _ int) bool { if err := p.Validate(ctx, po); err != nil { log.FromContext(ctx).WithValues("Pod", klog.KRef(po.Namespace, po.Name)).V(1).Info(fmt.Sprintf("ignoring pod, %s", err)) return true } return false }) + metrics.IgnoredPodCount.Set(float64(len(rejectedPods))) p.consolidationWarnings(ctx, pods) return pods, nil } diff --git a/pkg/controllers/provisioning/suite_test.go b/pkg/controllers/provisioning/suite_test.go index 80c7e3d81e..c27c1effb3 100644 --- a/pkg/controllers/provisioning/suite_test.go +++ b/pkg/controllers/provisioning/suite_test.go @@ -19,6 +19,7 @@ package provisioning_test import ( "context" "fmt" + "sigs.k8s.io/karpenter/pkg/metrics" "testing" "time" @@ -97,6 +98,7 @@ var _ = AfterEach(func() { ExpectCleanedUp(ctx, env.Client) cloudProvider.Reset() cluster.Reset() + metrics.IgnoredPodCount.Set(0) }) var _ = Describe("Provisioning", func() { @@ -1338,6 +1340,7 @@ var _ = Describe("Provisioning", func() { PersistentVolumeClaims: []string{"invalid"}, }) ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectMetricGaugeValue(metrics.IgnoredPodCount, 1, nil) ExpectNotScheduled(ctx, env.Client, pod) }) It("should schedule with an empty storage class if the pvc is bound", func() { diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index fd21137cba..647e75200a 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -89,9 +89,16 @@ var ( NodePoolLabel, }, ) + IgnoredPodCount = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "ignored_pod_count", + Help: "Number of pods ignored during scheduling by Karpenter", + }, + ) ) func init() { crmetrics.Registry.MustRegister(NodeClaimsCreatedTotal, NodeClaimsTerminatedTotal, NodeClaimsDisruptedTotal, - NodesCreatedTotal, NodesTerminatedTotal) + NodesCreatedTotal, NodesTerminatedTotal, IgnoredPodCount) }