From 979c06cdf95d850cb86357999388725ad3171213 Mon Sep 17 00:00:00 2001
From: jigisha620 <patixjig@amazon.com>
Date: Tue, 24 Sep 2024 19:41:39 -0700
Subject: [PATCH] chore: Additional upstream metrics

---
 .../node/termination/controller.go            |  3 ++
 pkg/controllers/node/termination/metrics.go   | 12 +++++-
 .../node/termination/suite_test.go            |  2 +
 .../node/termination/terminator/eviction.go   |  6 +++
 .../node/termination/terminator/metrics.go    | 43 +++++++++++++++++++
 .../node/termination/terminator/suite_test.go |  4 ++
 pkg/controllers/provisioning/provisioner.go   |  3 +-
 .../provisioning/scheduling/metrics.go        |  9 +++-
 pkg/controllers/provisioning/suite_test.go    |  4 ++
 pkg/controllers/state/cluster.go              | 17 +++++++-
 pkg/controllers/state/metrics.go              | 11 ++++-
 pkg/controllers/state/suite_test.go           | 22 ++++++++++
 12 files changed, 130 insertions(+), 6 deletions(-)
 create mode 100644 pkg/controllers/node/termination/terminator/metrics.go

diff --git a/pkg/controllers/node/termination/controller.go b/pkg/controllers/node/termination/controller.go
index b8e49a3931..bd2d5cf3ff 100644
--- a/pkg/controllers/node/termination/controller.go
+++ b/pkg/controllers/node/termination/controller.go
@@ -129,6 +129,9 @@ func (c *Controller) finalize(ctx context.Context, node *corev1.Node) (reconcile
 
 		return reconcile.Result{RequeueAfter: 1 * time.Second}, nil
 	}
+	NodesDrainedTotal.With(prometheus.Labels{
+		metrics.NodePoolLabel: node.Labels[v1.NodePoolLabelKey],
+	}).Inc()
 	// In order for Pods associated with PersistentVolumes to smoothly migrate from the terminating Node, we wait
 	// for VolumeAttachments of drain-able Pods to be cleaned up before terminating Node and removing its finalizer.
 	// However, if TerminationGracePeriod is configured for Node, and we are past that period, we will skip waiting.
diff --git a/pkg/controllers/node/termination/metrics.go b/pkg/controllers/node/termination/metrics.go
index b31c558788..13808c9e38 100644
--- a/pkg/controllers/node/termination/metrics.go
+++ b/pkg/controllers/node/termination/metrics.go
@@ -28,7 +28,8 @@ import (
 func init() {
 	crmetrics.Registry.MustRegister(
 		TerminationDurationSeconds,
-		NodeLifetimeDurationSeconds)
+		NodeLifetimeDurationSeconds,
+		NodesDrainedTotal)
 }
 
 const dayDuration = time.Hour * 24
@@ -44,6 +45,15 @@ var (
 		},
 		[]string{metrics.NodePoolLabel},
 	)
+	NodesDrainedTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: metrics.Namespace,
+			Subsystem: metrics.NodeSubsystem,
+			Name:      "drained_total",
+			Help:      "The total number of nodes drained by Karpenter",
+		},
+		[]string{metrics.NodePoolLabel},
+	)
 	NodeLifetimeDurationSeconds = prometheus.NewHistogramVec(
 		prometheus.HistogramOpts{
 			Namespace: metrics.Namespace,
diff --git a/pkg/controllers/node/termination/suite_test.go b/pkg/controllers/node/termination/suite_test.go
index 32151e311f..da6daf60ad 100644
--- a/pkg/controllers/node/termination/suite_test.go
+++ b/pkg/controllers/node/termination/suite_test.go
@@ -95,6 +95,7 @@ var _ = Describe("Termination", func() {
 		metrics.NodesTerminatedTotal.Reset()
 		termination.TerminationDurationSeconds.Reset()
 		termination.NodeLifetimeDurationSeconds.Reset()
+		termination.NodesDrainedTotal.Reset()
 	})
 
 	Context("Reconciliation", func() {
@@ -841,6 +842,7 @@ var _ = Describe("Termination", func() {
 			node = ExpectNodeExists(ctx, env.Client, node.Name)
 			// Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node).
 			ExpectObjectReconciled(ctx, env.Client, terminationController, node)
+			ExpectMetricCounterValue(termination.NodesDrainedTotal, 1, map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]})
 			ExpectObjectReconciled(ctx, env.Client, terminationController, node)
 
 			m, ok := FindMetricWithLabelValues("karpenter_nodes_terminated_total", map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]})
diff --git a/pkg/controllers/node/termination/terminator/eviction.go b/pkg/controllers/node/termination/terminator/eviction.go
index 99bbadf81d..6df9f656e1 100644
--- a/pkg/controllers/node/termination/terminator/eviction.go
+++ b/pkg/controllers/node/termination/terminator/eviction.go
@@ -180,6 +180,11 @@ func (q *Queue) Evict(ctx context.Context, key QueueKey) bool {
 				},
 			},
 		}); err != nil {
+		var apiStatus apierrors.APIStatus
+		if errors.As(err, &apiStatus) {
+			code := apiStatus.Status().Code
+			NodesEvictionRequestsTotal.With(map[string]string{CodeLabel: fmt.Sprint(code)}).Inc()
+		}
 		// status codes for the eviction API are defined here:
 		// https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/#how-api-initiated-eviction-works
 		if apierrors.IsNotFound(err) || apierrors.IsConflict(err) {
@@ -199,6 +204,7 @@ func (q *Queue) Evict(ctx context.Context, key QueueKey) bool {
 		log.FromContext(ctx).Error(err, "failed evicting pod")
 		return false
 	}
+	NodesEvictionRequestsTotal.With(map[string]string{CodeLabel: "200"}).Inc()
 	q.recorder.Publish(terminatorevents.EvictPod(&corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: key.Name, Namespace: key.Namespace}}))
 	return true
 }
diff --git a/pkg/controllers/node/termination/terminator/metrics.go b/pkg/controllers/node/termination/terminator/metrics.go
new file mode 100644
index 0000000000..d7c591d9bb
--- /dev/null
+++ b/pkg/controllers/node/termination/terminator/metrics.go
@@ -0,0 +1,43 @@
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package terminator
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+	crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
+
+	"sigs.k8s.io/karpenter/pkg/metrics"
+)
+
+const (
+	// CodeLabel for eviction request
+	CodeLabel = "code"
+)
+
+var NodesEvictionRequestsTotal = prometheus.NewCounterVec(
+	prometheus.CounterOpts{
+		Namespace: metrics.Namespace,
+		Subsystem: metrics.NodeSubsystem,
+		Name:      "eviction_requests_total",
+		Help:      "The total number of eviction requests made by Karpenter",
+	},
+	[]string{CodeLabel},
+)
+
+func init() {
+	crmetrics.Registry.MustRegister(NodesEvictionRequestsTotal)
+}
diff --git a/pkg/controllers/node/termination/terminator/suite_test.go b/pkg/controllers/node/termination/terminator/suite_test.go
index 47fb2f5ed5..2b0e3a72d4 100644
--- a/pkg/controllers/node/termination/terminator/suite_test.go
+++ b/pkg/controllers/node/termination/terminator/suite_test.go
@@ -92,6 +92,7 @@ var _ = Describe("Eviction/Queue", func() {
 				Labels: testLabels,
 			},
 		})
+		terminator.NodesEvictionRequestsTotal.Reset()
 	})
 
 	Context("Eviction API", func() {
@@ -102,11 +103,13 @@ var _ = Describe("Eviction/Queue", func() {
 		It("should succeed with no event when the pod UID conflicts", func() {
 			ExpectApplied(ctx, env.Client, pod)
 			Expect(queue.Evict(ctx, terminator.QueueKey{NamespacedName: client.ObjectKeyFromObject(pod), UID: uuid.NewUUID()})).To(BeTrue())
+			ExpectMetricCounterValue(terminator.NodesEvictionRequestsTotal, 1, map[string]string{terminator.CodeLabel: "409"})
 			Expect(recorder.Events()).To(HaveLen(0))
 		})
 		It("should succeed with an evicted event when there are no PDBs", func() {
 			ExpectApplied(ctx, env.Client, pod)
 			Expect(queue.Evict(ctx, terminator.NewQueueKey(pod))).To(BeTrue())
+			ExpectMetricCounterValue(terminator.NodesEvictionRequestsTotal, 1, map[string]string{terminator.CodeLabel: "200"})
 			Expect(recorder.Calls("Evicted")).To(Equal(1))
 		})
 		It("should succeed with no event when there are PDBs that allow an eviction", func() {
@@ -130,6 +133,7 @@ var _ = Describe("Eviction/Queue", func() {
 			})
 			ExpectApplied(ctx, env.Client, pdb, pdb2, pod)
 			Expect(queue.Evict(ctx, terminator.NewQueueKey(pod))).To(BeFalse())
+			ExpectMetricCounterValue(terminator.NodesEvictionRequestsTotal, 1, map[string]string{terminator.CodeLabel: "500"})
 		})
 		It("should ensure that calling Evict() is valid while making Add() calls", func() {
 			cancelCtx, cancel := context.WithCancel(ctx)
diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go
index ab56bdc13f..a51b04a4fd 100644
--- a/pkg/controllers/provisioning/provisioner.go
+++ b/pkg/controllers/provisioning/provisioner.go
@@ -159,13 +159,14 @@ func (p *Provisioner) GetPendingPods(ctx context.Context) ([]*corev1.Pod, error)
 	if err != nil {
 		return nil, fmt.Errorf("listing pods, %w", err)
 	}
-	pods = lo.Reject(pods, func(po *corev1.Pod, _ int) bool {
+	rejectedPods, pods := lo.FilterReject(pods, func(po *corev1.Pod, _ int) bool {
 		if err := p.Validate(ctx, po); err != nil {
 			log.FromContext(ctx).WithValues("Pod", klog.KRef(po.Namespace, po.Name)).V(1).Info(fmt.Sprintf("ignoring pod, %s", err))
 			return true
 		}
 		return false
 	})
+	scheduler.IgnoredPodCount.Set(float64(len(rejectedPods)))
 	p.consolidationWarnings(ctx, pods)
 	return pods, nil
 }
diff --git a/pkg/controllers/provisioning/scheduling/metrics.go b/pkg/controllers/provisioning/scheduling/metrics.go
index 05fdbdc4ab..1c3328761c 100644
--- a/pkg/controllers/provisioning/scheduling/metrics.go
+++ b/pkg/controllers/provisioning/scheduling/metrics.go
@@ -24,7 +24,7 @@ import (
 )
 
 func init() {
-	crmetrics.Registry.MustRegister(SchedulingDurationSeconds, QueueDepth)
+	crmetrics.Registry.MustRegister(SchedulingDurationSeconds, QueueDepth, IgnoredPodCount)
 }
 
 const (
@@ -58,4 +58,11 @@ var (
 			schedulingIDLabel,
 		},
 	)
+	IgnoredPodCount = prometheus.NewGauge(
+		prometheus.GaugeOpts{
+			Namespace: metrics.Namespace,
+			Name:      "ignored_pod_count",
+			Help:      "Number of pods ignored during scheduling by Karpenter",
+		},
+	)
 )
diff --git a/pkg/controllers/provisioning/suite_test.go b/pkg/controllers/provisioning/suite_test.go
index dec8bbb662..8e497a542c 100644
--- a/pkg/controllers/provisioning/suite_test.go
+++ b/pkg/controllers/provisioning/suite_test.go
@@ -22,6 +22,8 @@ import (
 	"testing"
 	"time"
 
+	pscheduling "sigs.k8s.io/karpenter/pkg/controllers/provisioning/scheduling"
+
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"github.com/samber/lo"
@@ -98,6 +100,7 @@ var _ = AfterEach(func() {
 	ExpectCleanedUp(ctx, env.Client)
 	cloudProvider.Reset()
 	cluster.Reset()
+	pscheduling.IgnoredPodCount.Set(0)
 })
 
 var _ = Describe("Provisioning", func() {
@@ -1370,6 +1373,7 @@ var _ = Describe("Provisioning", func() {
 				PersistentVolumeClaims: []string{"invalid"},
 			})
 			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
+			ExpectMetricGaugeValue(pscheduling.IgnoredPodCount, 1, nil)
 			ExpectNotScheduled(ctx, env.Client, pod)
 		})
 		It("should schedule with an empty storage class if the pvc is bound", func() {
diff --git a/pkg/controllers/state/cluster.go b/pkg/controllers/state/cluster.go
index ad59298b16..aeec1ee06b 100644
--- a/pkg/controllers/state/cluster.go
+++ b/pkg/controllers/state/cluster.go
@@ -59,8 +59,9 @@ type Cluster struct {
 	// changed about the cluster that might make consolidation possible. By recording
 	// the state, interested disruption methods can check to see if this has changed to
 	// optimize and not try to disrupt if nothing about the cluster has changed.
-	clusterState     time.Time
-	antiAffinityPods sync.Map // pod namespaced name -> *corev1.Pod of pods that have required anti affinities
+	clusterState      time.Time
+	unsyncedStartTime time.Time
+	antiAffinityPods  sync.Map // pod namespaced name -> *corev1.Pod of pods that have required anti affinities
 }
 
 func NewCluster(clk clock.Clock, client client.Client) *Cluster {
@@ -82,6 +83,18 @@ func NewCluster(clk clock.Clock, client client.Client) *Cluster {
 //
 //nolint:gocyclo
 func (c *Cluster) Synced(ctx context.Context) (synced bool) {
+	// Set the metric depending on the result of the Synced() call
+	defer func() {
+		if synced {
+			c.unsyncedStartTime = time.Time{}
+			ClusterStateUnsyncedTimeSeconds.With(map[string]string{}).Set(0)
+		} else {
+			if c.unsyncedStartTime.IsZero() {
+				c.unsyncedStartTime = c.clock.Now()
+			}
+			ClusterStateUnsyncedTimeSeconds.With(map[string]string{}).Set(c.clock.Since(c.unsyncedStartTime).Seconds())
+		}
+	}()
 	// Set the metric to whatever the result of the Synced() call is
 	defer func() {
 		ClusterStateSynced.Set(lo.Ternary[float64](synced, 1, 0))
diff --git a/pkg/controllers/state/metrics.go b/pkg/controllers/state/metrics.go
index e4fa3880e3..a344c59de8 100644
--- a/pkg/controllers/state/metrics.go
+++ b/pkg/controllers/state/metrics.go
@@ -45,8 +45,17 @@ var (
 			Help:      "Returns 1 if cluster state is synced and 0 otherwise. Synced checks that nodeclaims and nodes that are stored in the APIServer have the same representation as Karpenter's cluster state",
 		},
 	)
+	ClusterStateUnsyncedTimeSeconds = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: metrics.Namespace,
+			Subsystem: stateSubsystem,
+			Name:      "unsynced_time_seconds",
+			Help:      "The time for which cluster state is not synced",
+		},
+		[]string{},
+	)
 )
 
 func init() {
-	crmetrics.Registry.MustRegister(ClusterStateNodesCount, ClusterStateSynced)
+	crmetrics.Registry.MustRegister(ClusterStateNodesCount, ClusterStateSynced, ClusterStateUnsyncedTimeSeconds)
 }
diff --git a/pkg/controllers/state/suite_test.go b/pkg/controllers/state/suite_test.go
index c2084a1bb0..7e3612441b 100644
--- a/pkg/controllers/state/suite_test.go
+++ b/pkg/controllers/state/suite_test.go
@@ -88,6 +88,7 @@ var _ = AfterSuite(func() {
 
 var _ = BeforeEach(func() {
 	fakeClock.SetTime(time.Now())
+	state.ClusterStateUnsyncedTimeSeconds.Reset()
 	cloudProvider.InstanceTypes = fake.InstanceTypesAssorted()
 	nodePool = test.NodePool(v1.NodePool{ObjectMeta: metav1.ObjectMeta{Name: "default"}})
 	ExpectApplied(ctx, env.Client, nodePool)
@@ -1126,6 +1127,27 @@ var _ = Describe("Cluster State Sync", func() {
 		Expect(cluster.Synced(ctx)).To(BeTrue())
 		ExpectMetricGaugeValue(state.ClusterStateSynced, 1.0, nil)
 		ExpectMetricGaugeValue(state.ClusterStateNodesCount, 1000.0, nil)
+		metric, found := FindMetricWithLabelValues("karpenter_cluster_state_unsynced_time_seconds", map[string]string{})
+		Expect(found).To(BeTrue())
+		Expect(metric.GetGauge().GetValue()).To(BeEquivalentTo(0))
+	})
+	It("should emit cluster_state_unsynced_time_seconds metric when cluster state is unsynced", func() {
+		nodeClaim := test.NodeClaim(v1.NodeClaim{
+			Status: v1.NodeClaimStatus{
+				ProviderID: "",
+			},
+		})
+		nodeClaim.Status.ProviderID = ""
+		ExpectApplied(ctx, env.Client, nodeClaim)
+		ExpectReconcileSucceeded(ctx, nodeClaimController, client.ObjectKeyFromObject(nodeClaim))
+		Expect(cluster.Synced(ctx)).To(BeFalse())
+
+		fakeClock.Step(2 * time.Minute)
+		ExpectReconcileSucceeded(ctx, nodeClaimController, client.ObjectKeyFromObject(nodeClaim))
+		Expect(cluster.Synced(ctx)).To(BeFalse())
+		metric, found := FindMetricWithLabelValues("karpenter_cluster_state_unsynced_time_seconds", map[string]string{})
+		Expect(found).To(BeTrue())
+		Expect(metric.GetGauge().GetValue()).To(BeNumerically(">=", 120))
 	})
 	It("should consider the cluster state synced when nodes don't have provider id", func() {
 		// Deploy 1000 nodes and sync them all with the cluster