From 824c10885318fdc7fcc6912d8645bf055af91876 Mon Sep 17 00:00:00 2001
From: Michael Weibel <michael@helio.exchange>
Date: Fri, 19 Apr 2024 17:56:33 +0200
Subject: [PATCH 01/30] feat(clusterapi): per nodeGroup autoscaling options

---
 .../cloudprovider/clusterapi/README.md        |  21 +++
 .../clusterapi/clusterapi_nodegroup.go        |  55 +++++++-
 .../clusterapi/clusterapi_nodegroup_test.go   | 130 +++++++++++++++++-
 .../clusterapi/clusterapi_unstructured.go     |  21 +--
 .../clusterapi/clusterapi_utils.go            |  24 ++++
 5 files changed, 238 insertions(+), 13 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/README.md b/cluster-autoscaler/cloudprovider/clusterapi/README.md
index d6a73da14949..adf22919d34f 100644
--- a/cluster-autoscaler/cloudprovider/clusterapi/README.md
+++ b/cluster-autoscaler/cloudprovider/clusterapi/README.md
@@ -275,6 +275,27 @@ metadata:
     capacity.cluster-autoscaler.kubernetes.io/taints: "key1=value1:NoSchedule,key2=value2:NoExecute"
 ```
 
+#### Per-NodeGroup autoscaling options
+
+Custom autoscaling options per node group (MachineDeployment/MachinePool/MachineSet) can be specified as annoations with a common prefix:
+
+```yaml
+apiVersion: cluster.x-k8s.io/v1beta1
+kind: MachineDeployment
+metadata:
+  annotations:
+    # overrides --scale-down-utilization-threshold global value for that specific MachineDeployment
+    cluster.x-k8s.io/autoscaling-options-scaledownutilizationthreshold: "0.5"
+    # overrides --scale-down-gpu-utilization-threshold global value for that specific MachineDeployment
+    cluster.x-k8s.io/autoscaling-options-scaledowngpuutilizationthreshold: "0.5"
+    # overrides --scale-down-unneeded-time global value for that specific MachineDeployment
+    cluster.x-k8s.io/autoscaling-options-scaledownunneededtime: "10m0s"
+    # overrides --scale-down-unready-time global value for that specific MachineDeployment
+    cluster.x-k8s.io/autoscaling-options-scaledownunreadytime: "20m0s"
+    # overrides --max-node-provision-time global value for that specific MachineDeployment
+    cluster.x-k8s.io/autoscaling-options-maxnodeprovisiontime: "20m0s"
+```
+
 #### CPU Architecture awareness for single-arch clusters 
 
 Users of single-arch non-amd64 clusters who are using scale from zero 
diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
index eb66ff8ee8ed..009b9abfca30 100644
--- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
+++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
@@ -20,6 +20,8 @@ import (
 	"fmt"
 	"k8s.io/klog/v2"
 	"math/rand"
+	"strconv"
+	"time"
 
 	"github.com/pkg/errors"
 
@@ -335,7 +337,28 @@ func (ng *nodegroup) Autoprovisioned() bool {
 // GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular
 // NodeGroup. Returning a nil will result in using default options.
 func (ng *nodegroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) {
-	return nil, cloudprovider.ErrNotImplemented
+	options := ng.scalableResource.autoscalingOptions
+	if options == nil || len(options) == 0 {
+		return &defaults, nil
+	}
+
+	if opt, ok := getFloat64Option(options, ng.Id(), config.DefaultScaleDownUtilizationThresholdKey); ok {
+		defaults.ScaleDownUtilizationThreshold = opt
+	}
+	if opt, ok := getFloat64Option(options, ng.Id(), config.DefaultScaleDownGpuUtilizationThresholdKey); ok {
+		defaults.ScaleDownGpuUtilizationThreshold = opt
+	}
+	if opt, ok := getDurationOption(options, ng.Id(), config.DefaultScaleDownUnneededTimeKey); ok {
+		defaults.ScaleDownUnneededTime = opt
+	}
+	if opt, ok := getDurationOption(options, ng.Id(), config.DefaultScaleDownUnreadyTimeKey); ok {
+		defaults.ScaleDownUnreadyTime = opt
+	}
+	if opt, ok := getDurationOption(options, ng.Id(), config.DefaultMaxNodeProvisionTimeKey); ok {
+		defaults.MaxNodeProvisionTime = opt
+	}
+
+	return &defaults, nil
 }
 
 func newNodeGroupFromScalableResource(controller *machineController, unstructuredScalableResource *unstructured.Unstructured) (*nodegroup, error) {
@@ -415,3 +438,33 @@ func setLabelIfNotEmpty(to, from map[string]string, key string) {
 		to[key] = value
 	}
 }
+
+func getFloat64Option(options map[string]string, templateName, name string) (float64, bool) {
+	raw, ok := options[name]
+	if !ok {
+		return 0, false
+	}
+
+	option, err := strconv.ParseFloat(raw, 64)
+	if err != nil {
+		klog.Warningf("failed to convert autoscaling_options option %q (value %q) for scalable resource %q to float: %v", name, raw, templateName, err)
+		return 0, false
+	}
+
+	return option, true
+}
+
+func getDurationOption(options map[string]string, templateName, name string) (time.Duration, bool) {
+	raw, ok := options[name]
+	if !ok {
+		return 0, false
+	}
+
+	option, err := time.ParseDuration(raw)
+	if err != nil {
+		klog.Warningf("failed to convert autoscaling_options option %q (value %q) for scalable resource %q to duration: %v", name, raw, templateName, err)
+		return 0, false
+	}
+
+	return option, true
+}
diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go
index 079291552245..1dc7e035b723 100644
--- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go
+++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go
@@ -25,15 +25,15 @@ import (
 	"testing"
 	"time"
 
-	"k8s.io/client-go/tools/cache"
-
+	"github.com/stretchr/testify/assert"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/util/wait"
-
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+	"k8s.io/autoscaler/cluster-autoscaler/config"
 	gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
+	"k8s.io/client-go/tools/cache"
 )
 
 const (
@@ -1507,3 +1507,127 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 	}
 
 }
+
+func TestNodeGroupGetOptions(t *testing.T) {
+	enableScaleAnnotations := map[string]string{
+		nodeGroupMinSizeAnnotationKey: "1",
+		nodeGroupMaxSizeAnnotationKey: "10",
+	}
+
+	defaultOptions := config.NodeGroupAutoscalingOptions{
+		ScaleDownUtilizationThreshold:    0.1,
+		ScaleDownGpuUtilizationThreshold: 0.2,
+		ScaleDownUnneededTime:            time.Second,
+		ScaleDownUnreadyTime:             time.Minute,
+		MaxNodeProvisionTime:             15 * time.Minute,
+	}
+
+	cases := []struct {
+		desc     string
+		opts     map[string]string
+		expected *config.NodeGroupAutoscalingOptions
+	}{
+		{
+			desc:     "return provided defaults on empty metadata",
+			opts:     map[string]string{},
+			expected: &defaultOptions,
+		},
+		{
+			desc: "return specified options",
+			opts: map[string]string{
+				config.DefaultScaleDownGpuUtilizationThresholdKey: "0.6",
+				config.DefaultScaleDownUtilizationThresholdKey:    "0.7",
+				config.DefaultScaleDownUnneededTimeKey:            "1h",
+				config.DefaultScaleDownUnreadyTimeKey:             "30m",
+				config.DefaultMaxNodeProvisionTimeKey:             "60m",
+			},
+			expected: &config.NodeGroupAutoscalingOptions{
+				ScaleDownGpuUtilizationThreshold: 0.6,
+				ScaleDownUtilizationThreshold:    0.7,
+				ScaleDownUnneededTime:            time.Hour,
+				ScaleDownUnreadyTime:             30 * time.Minute,
+				MaxNodeProvisionTime:             60 * time.Minute,
+			},
+		},
+		{
+			desc: "complete partial options specs with defaults",
+			opts: map[string]string{
+				config.DefaultScaleDownGpuUtilizationThresholdKey: "0.1",
+				config.DefaultScaleDownUnneededTimeKey:            "1m",
+			},
+			expected: &config.NodeGroupAutoscalingOptions{
+				ScaleDownGpuUtilizationThreshold: 0.1,
+				ScaleDownUtilizationThreshold:    defaultOptions.ScaleDownUtilizationThreshold,
+				ScaleDownUnneededTime:            time.Minute,
+				ScaleDownUnreadyTime:             defaultOptions.ScaleDownUnreadyTime,
+				MaxNodeProvisionTime:             15 * time.Minute,
+			},
+		},
+		{
+			desc: "keep defaults on unparsable options values",
+			opts: map[string]string{
+				config.DefaultScaleDownGpuUtilizationThresholdKey: "foo",
+				config.DefaultScaleDownUnneededTimeKey:            "bar",
+			},
+			expected: &defaultOptions,
+		},
+	}
+
+	test := func(t *testing.T, testConfig *testConfig, expectedOptions *config.NodeGroupAutoscalingOptions) {
+		controller, stop := mustCreateTestController(t, testConfig)
+		defer stop()
+
+		nodegroups, err := controller.nodeGroups()
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+
+		if l := len(nodegroups); l != 1 {
+			t.Fatalf("expected 1 nodegroup, got %d", l)
+		}
+
+		ng := nodegroups[0]
+		opts, err := ng.GetOptions(defaultOptions)
+		assert.NoError(t, err)
+		assert.Equal(t, expectedOptions, opts)
+	}
+
+	for _, c := range cases {
+		t.Run(c.desc, func(t *testing.T) {
+			annotations := map[string]string{}
+			for k, v := range c.opts {
+				annotations[nodeGroupAutoscalingOptionsKeyPrefix+k] = v
+			}
+
+			t.Run("MachineSet", func(t *testing.T) {
+				test(
+					t,
+					createMachineSetTestConfig(
+						testNamespace,
+						RandomString(6),
+						RandomString(6),
+						10,
+						cloudprovider.JoinStringMaps(enableScaleAnnotations, annotations),
+						nil,
+					),
+					c.expected,
+				)
+			})
+
+			t.Run("MachineDeployment", func(t *testing.T) {
+				test(
+					t,
+					createMachineDeploymentTestConfig(
+						testNamespace,
+						RandomString(6),
+						RandomString(6),
+						10,
+						cloudprovider.JoinStringMaps(enableScaleAnnotations, annotations),
+						nil,
+					),
+					c.expected,
+				)
+			})
+		})
+	}
+}
diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go
index 4eec0e4bf7ec..f374dc7789f4 100644
--- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go
+++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go
@@ -35,10 +35,11 @@ import (
 )
 
 type unstructuredScalableResource struct {
-	controller   *machineController
-	unstructured *unstructured.Unstructured
-	maxSize      int
-	minSize      int
+	controller         *machineController
+	unstructured       *unstructured.Unstructured
+	maxSize            int
+	minSize            int
+	autoscalingOptions map[string]string
 }
 
 func (r unstructuredScalableResource) ID() string {
@@ -353,16 +354,18 @@ func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*un
 }
 
 func newUnstructuredScalableResource(controller *machineController, u *unstructured.Unstructured) (*unstructuredScalableResource, error) {
-	minSize, maxSize, err := parseScalingBounds(u.GetAnnotations())
+	annotations := u.GetAnnotations()
+	minSize, maxSize, err := parseScalingBounds(annotations)
 	if err != nil {
 		return nil, errors.Wrap(err, "error validating min/max annotations")
 	}
 
 	return &unstructuredScalableResource{
-		controller:   controller,
-		unstructured: u,
-		maxSize:      maxSize,
-		minSize:      minSize,
+		controller:         controller,
+		unstructured:       u,
+		maxSize:            maxSize,
+		minSize:            minSize,
+		autoscalingOptions: autoscalingOptions(annotations),
 	}, nil
 }
 
diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
index 68c2be164436..a154a0d1128c 100644
--- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
+++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
@@ -98,6 +98,8 @@ var (
 	nodeGroupMaxSizeAnnotationKey = getNodeGroupMaxSizeAnnotationKey()
 	zeroQuantity                  = resource.MustParse("0")
 
+	nodeGroupAutoscalingOptionsKeyPrefix = getNodeGroupAutoscalingOptionsKeyPrefix()
+
 	systemArchitecture *SystemArchitecture
 	once               sync.Once
 )
@@ -132,6 +134,21 @@ func minSize(annotations map[string]string) (int, error) {
 	return i, nil
 }
 
+func autoscalingOptions(annotations map[string]string) map[string]string {
+	options := map[string]string{}
+	for k, v := range annotations {
+		if !strings.HasPrefix(k, nodeGroupAutoscalingOptionsKeyPrefix) {
+			continue
+		}
+		resourceName := strings.Split(k, nodeGroupAutoscalingOptionsKeyPrefix)
+		if len(resourceName) < 2 || resourceName[1] == "" || v == "" {
+			continue
+		}
+		options[resourceName[1]] = strings.ToLower(v)
+	}
+	return options
+}
+
 // maxSize returns the maximum value encoded in the annotations keyed
 // by nodeGroupMaxSizeAnnotationKey. Returns errMissingMaxAnnotation
 // if the annotation doesn't exist or errInvalidMaxAnnotation if the
@@ -292,6 +309,13 @@ func getNodeGroupMaxSizeAnnotationKey() string {
 	return key
 }
 
+// getNodeGroupAutoscalingOptionsKeyPrefix returns the key that is used for autoscaling options
+// per node group which override autoscaler default options.
+func getNodeGroupAutoscalingOptionsKeyPrefix() string {
+	key := fmt.Sprintf("%s/autoscaling-options-", getCAPIGroup())
+	return key
+}
+
 // getMachineDeleteAnnotationKey returns the key that is used by cluster-api for marking
 // machines to be deleted. This function is needed because the user can change the default
 // group name by using the CAPI_GROUP environment variable.

From 8d19474f9c4b96aeb52dbdb4388cec2a4151a9f2 Mon Sep 17 00:00:00 2001
From: Ismail Alidzhikov <i.alidjikov@gmail.com>
Date: Mon, 10 Jun 2024 15:25:59 +0300
Subject: [PATCH 02/30] vpa-admission-controller: Log object's namespace

---
 .../pkg/admission-controller/resource/pod/handler.go          | 4 ++--
 .../resource/pod/patch/resource_updates.go                    | 2 +-
 .../resource/pod/recommendation/recommendation_provider.go    | 4 ++--
 .../pkg/admission-controller/resource/vpa/matcher.go          | 4 ++--
 vertical-pod-autoscaler/pkg/utils/vpa/api.go                  | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go
index d43fd08edd42..20d7549e5c34 100644
--- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go
+++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go
@@ -76,10 +76,10 @@ func (h *resourceHandler) GetPatches(ar *admissionv1.AdmissionRequest) ([]resour
 		pod.Name = pod.GenerateName + "%"
 		pod.Namespace = namespace
 	}
-	klog.V(4).Infof("Admitting pod %v", pod.ObjectMeta)
+	klog.V(4).Infof("Admitting pod %s", klog.KObj(&pod))
 	controllingVpa := h.vpaMatcher.GetMatchingVPA(&pod)
 	if controllingVpa == nil {
-		klog.V(4).Infof("No matching VPA found for pod %s/%s", pod.Namespace, pod.Name)
+		klog.V(4).Infof("No matching VPA found for pod %s", klog.KObj(&pod))
 		return []resource_admission.PatchRecord{}, nil
 	}
 	pod, err := h.preProcessor.Process(pod)
diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go
index bed6119900ba..cddec1aa83fb 100644
--- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go
+++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go
@@ -51,7 +51,7 @@ func (c *resourcesUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *v
 
 	containersResources, annotationsPerContainer, err := c.recommendationProvider.GetContainersResourcesForPod(pod, vpa)
 	if err != nil {
-		return []resource_admission.PatchRecord{}, fmt.Errorf("Failed to calculate resource patch for pod %v/%v: %v", pod.Namespace, pod.Name, err)
+		return []resource_admission.PatchRecord{}, fmt.Errorf("Failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err)
 	}
 
 	if annotationsPerContainer == nil {
diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation/recommendation_provider.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation/recommendation_provider.go
index 3522f1b5ae91..3272058e3e5d 100644
--- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation/recommendation_provider.go
+++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation/recommendation_provider.go
@@ -97,7 +97,7 @@ func (p *recommendationProvider) GetContainersResourcesForPod(pod *core.Pod, vpa
 		var err error
 		recommendedPodResources, annotations, err = p.recommendationProcessor.Apply(vpa.Status.Recommendation, vpa.Spec.ResourcePolicy, vpa.Status.Conditions, pod)
 		if err != nil {
-			klog.V(2).Infof("cannot process recommendation for pod %s", pod.Name)
+			klog.V(2).Infof("cannot process recommendation for pod %s", klog.KObj(pod))
 			return nil, annotations, err
 		}
 	}
@@ -114,7 +114,7 @@ func (p *recommendationProvider) GetContainersResourcesForPod(pod *core.Pod, vpa
 	// Ensure that we are not propagating empty resource key if any.
 	for _, resource := range containerResources {
 		if resource.RemoveEmptyResourceKeyIfAny() {
-			klog.Infof("An empty resource key was found and purged for pod=%s/%s with vpa=", pod.Namespace, pod.Name, vpa.Name)
+			klog.Infof("An empty resource key was found and purged for pod=%s with vpa=%s", klog.KObj(pod), klog.KObj(vpa))
 		}
 	}
 
diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go
index 8c74638333d6..cee7c97859ff 100644
--- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go
+++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go
@@ -62,7 +62,7 @@ func (m *matcher) GetMatchingVPA(pod *core.Pod) *vpa_types.VerticalPodAutoscaler
 		}
 		selector, err := m.selectorFetcher.Fetch(vpaConfig)
 		if err != nil {
-			klog.V(3).Infof("skipping VPA object %v because we cannot fetch selector: %s", vpaConfig.Name, err)
+			klog.V(3).Infof("skipping VPA object %s because we cannot fetch selector: %s", klog.KObj(vpaConfig), err)
 			continue
 		}
 		onConfigs = append(onConfigs, &vpa_api_util.VpaWithSelector{
@@ -70,7 +70,7 @@ func (m *matcher) GetMatchingVPA(pod *core.Pod) *vpa_types.VerticalPodAutoscaler
 			Selector: selector,
 		})
 	}
-	klog.V(2).Infof("Let's choose from %d configs for pod %s/%s", len(onConfigs), pod.Namespace, pod.Name)
+	klog.V(2).Infof("Let's choose from %d configs for pod %s", len(onConfigs), klog.KObj(pod))
 	result := vpa_api_util.GetControllingVPAForPod(pod, onConfigs, m.controllerFetcher)
 	if result != nil {
 		return result.Vpa
diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/api.go b/vertical-pod-autoscaler/pkg/utils/vpa/api.go
index 529d76961727..42fa0bb54d27 100644
--- a/vertical-pod-autoscaler/pkg/utils/vpa/api.go
+++ b/vertical-pod-autoscaler/pkg/utils/vpa/api.go
@@ -150,7 +150,7 @@ func GetControllingVPAForPod(pod *core.Pod, vpas []*VpaWithSelector, ctrlFetcher
 	}
 	parentController, err := ctrlFetcher.FindTopMostWellKnownOrScalable(k)
 	if err != nil {
-		klog.Errorf("fail to get pod controller: pod=%s err=%s", pod.Name, err.Error())
+		klog.Errorf("fail to get pod controller: pod=%s err=%s", klog.KObj(pod), err.Error())
 		return nil
 	}
 	if parentController == nil {
@@ -231,7 +231,7 @@ func CreateOrUpdateVpaCheckpoint(vpaCheckpointClient vpa_api.VerticalPodAutoscal
 		_, err = vpaCheckpointClient.Create(context.TODO(), vpaCheckpoint, meta.CreateOptions{})
 	}
 	if err != nil {
-		return fmt.Errorf("Cannot save checkpoint for vpa %v container %v. Reason: %+v", vpaCheckpoint.ObjectMeta.Name, vpaCheckpoint.Spec.ContainerName, err)
+		return fmt.Errorf("Cannot save checkpoint for vpa %s/%s container %s. Reason: %+v", vpaCheckpoint.Namespace, vpaCheckpoint.Name, vpaCheckpoint.Spec.ContainerName, err)
 	}
 	return nil
 }

From 628fc39443098c7e6683da739c62accf39100c51 Mon Sep 17 00:00:00 2001
From: Ismail Alidzhikov <i.alidjikov@gmail.com>
Date: Mon, 10 Jun 2024 16:18:48 +0300
Subject: [PATCH 03/30] vpa-recommender: Log object's namespace

---
 .../checkpoint/checkpoint_writer.go           |  2 +-
 .../pkg/recommender/input/cluster_feeder.go   | 22 +++++++++----------
 .../input/metrics/metrics_source.go           |  7 +++---
 .../pkg/recommender/model/cluster.go          |  6 ++---
 .../routines/capping_post_processor.go        |  2 +-
 .../pkg/recommender/routines/recommender.go   |  6 ++---
 6 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/vertical-pod-autoscaler/pkg/recommender/checkpoint/checkpoint_writer.go b/vertical-pod-autoscaler/pkg/recommender/checkpoint/checkpoint_writer.go
index c738c95e4df1..f9bc6dbf646a 100644
--- a/vertical-pod-autoscaler/pkg/recommender/checkpoint/checkpoint_writer.go
+++ b/vertical-pod-autoscaler/pkg/recommender/checkpoint/checkpoint_writer.go
@@ -94,7 +94,7 @@ func (writer *checkpointWriter) StoreCheckpoints(ctx context.Context, now time.T
 		for container, aggregatedContainerState := range aggregateContainerStateMap {
 			containerCheckpoint, err := aggregatedContainerState.SaveToCheckpoint()
 			if err != nil {
-				klog.Errorf("Cannot serialize checkpoint for vpa %v container %v. Reason: %+v", vpa.ID.VpaName, container, err)
+				klog.Errorf("Cannot serialize checkpoint for vpa %s/%s container %v. Reason: %+v", vpa.ID.Namespace, vpa.ID.VpaName, container, err)
 				continue
 			}
 			checkpointName := fmt.Sprintf("%s-%s", vpa.ID.VpaName, container)
diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go
index 6da25fea5eb4..67f60397e81f 100644
--- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go
+++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go
@@ -42,7 +42,7 @@ import (
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input/spec"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target"
-	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
+	controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
 	metrics_recommender "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/recommender"
 )
 
@@ -229,13 +229,13 @@ func (feeder *clusterStateFeeder) setVpaCheckpoint(checkpoint *vpa_types.Vertica
 	vpaID := model.VpaID{Namespace: checkpoint.Namespace, VpaName: checkpoint.Spec.VPAObjectName}
 	vpa, exists := feeder.clusterState.Vpas[vpaID]
 	if !exists {
-		return fmt.Errorf("cannot load checkpoint to missing VPA object %+v", vpaID)
+		return fmt.Errorf("cannot load checkpoint to missing VPA object %s/%s", vpa.ID.Namespace, vpa.ID.VpaName)
 	}
 
 	cs := model.NewAggregateContainerState()
 	err := cs.LoadFromCheckpoint(&checkpoint.Status)
 	if err != nil {
-		return fmt.Errorf("cannot load checkpoint for VPA %+v. Reason: %v", vpa.ID, err)
+		return fmt.Errorf("cannot load checkpoint for VPA %s/%s. Reason: %v", vpa.ID.Namespace, vpa.ID.VpaName, err)
 	}
 	vpa.ContainersInitialAggregateState[checkpoint.Spec.ContainerName] = cs
 	return nil
@@ -254,7 +254,7 @@ func (feeder *clusterStateFeeder) InitFromCheckpoints() {
 		klog.V(3).Infof("Fetching checkpoints from namespace %s", namespace)
 		checkpointList, err := feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).List(context.TODO(), metav1.ListOptions{})
 		if err != nil {
-			klog.Errorf("Cannot list VPA checkpoints from namespace %v. Reason: %+v", namespace, err)
+			klog.Errorf("Cannot list VPA checkpoints from namespace %s. Reason: %+v", namespace, err)
 		}
 		for _, checkpoint := range checkpointList.Items {
 
@@ -319,16 +319,16 @@ func filterVPAs(feeder *clusterStateFeeder, allVpaCRDs []*vpa_types.VerticalPodA
 	for _, vpaCRD := range allVpaCRDs {
 		if feeder.recommenderName == DefaultRecommenderName {
 			if !implicitDefaultRecommender(vpaCRD.Spec.Recommenders) && !selectsRecommender(vpaCRD.Spec.Recommenders, &feeder.recommenderName) {
-				klog.V(6).Infof("Ignoring vpaCRD %s in namespace %s as current recommender's name %v doesn't appear among its recommenders", vpaCRD.Name, vpaCRD.Namespace, feeder.recommenderName)
+				klog.V(6).Infof("Ignoring vpaCRD %s as current recommender's name %v doesn't appear among its recommenders", klog.KObj(vpaCRD), feeder.recommenderName)
 				continue
 			}
 		} else {
 			if implicitDefaultRecommender(vpaCRD.Spec.Recommenders) {
-				klog.V(6).Infof("Ignoring vpaCRD %s in namespace %s as %v recommender doesn't process CRDs implicitly destined to %v recommender", vpaCRD.Name, vpaCRD.Namespace, feeder.recommenderName, DefaultRecommenderName)
+				klog.V(6).Infof("Ignoring vpaCRD %s as %v recommender doesn't process CRDs implicitly destined to %v recommender", klog.KObj(vpaCRD), feeder.recommenderName, DefaultRecommenderName)
 				continue
 			}
 			if !selectsRecommender(vpaCRD.Spec.Recommenders, &feeder.recommenderName) {
-				klog.V(6).Infof("Ignoring vpaCRD %s in namespace %s as current recommender's name %v doesn't appear among its recommenders", vpaCRD.Name, vpaCRD.Namespace, feeder.recommenderName)
+				klog.V(6).Infof("Ignoring vpaCRD %s as current recommender's name %v doesn't appear among its recommenders", klog.KObj(vpaCRD), feeder.recommenderName)
 				continue
 			}
 		}
@@ -359,7 +359,7 @@ func (feeder *clusterStateFeeder) LoadVPAs() {
 		}
 
 		selector, conditions := feeder.getSelector(vpaCRD)
-		klog.V(4).Infof("Using selector %s for VPA %s/%s", selector.String(), vpaCRD.Namespace, vpaCRD.Name)
+		klog.V(4).Infof("Using selector %s for VPA %s", selector.String(), klog.KObj(vpaCRD))
 
 		if feeder.clusterState.AddOrUpdateVpa(vpaCRD, selector) == nil {
 			// Successfully added VPA to the model.
@@ -377,9 +377,9 @@ func (feeder *clusterStateFeeder) LoadVPAs() {
 	// Delete non-existent VPAs from the model.
 	for vpaID := range feeder.clusterState.Vpas {
 		if _, exists := vpaKeys[vpaID]; !exists {
-			klog.V(3).Infof("Deleting VPA %v", vpaID)
+			klog.V(3).Infof("Deleting VPA %s/%s", vpaID.Namespace, vpaID.VpaName)
 			if err := feeder.clusterState.DeleteVpa(vpaID); err != nil {
-				klog.Errorf("Deleting VPA %v failed: %v", vpaID, err)
+				klog.Errorf("Deleting VPA %s/%s failed: %v", vpaID.Namespace, vpaID.VpaName, err)
 			}
 		}
 	}
@@ -398,7 +398,7 @@ func (feeder *clusterStateFeeder) LoadPods() {
 	}
 	for key := range feeder.clusterState.Pods {
 		if _, exists := pods[key]; !exists {
-			klog.V(3).Infof("Deleting Pod %v", key)
+			klog.V(3).Infof("Deleting Pod %s/%s", key.Namespace, key.PodName)
 			feeder.clusterState.DeletePod(key)
 		}
 	}
diff --git a/vertical-pod-autoscaler/pkg/recommender/input/metrics/metrics_source.go b/vertical-pod-autoscaler/pkg/recommender/input/metrics/metrics_source.go
index 04978268baf5..807c4f017a22 100644
--- a/vertical-pod-autoscaler/pkg/recommender/input/metrics/metrics_source.go
+++ b/vertical-pod-autoscaler/pkg/recommender/input/metrics/metrics_source.go
@@ -18,6 +18,8 @@ package metrics
 
 import (
 	"context"
+	"time"
+
 	k8sapiv1 "k8s.io/api/core/v1"
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
@@ -28,7 +30,6 @@ import (
 	"k8s.io/metrics/pkg/apis/metrics/v1beta1"
 	resourceclient "k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1"
 	"k8s.io/metrics/pkg/client/external_metrics"
-	"time"
 )
 
 // PodMetricsLister wraps both metrics-client and External Metrics
@@ -113,10 +114,10 @@ func (s *externalMetricsClient) List(ctx context.Context, namespace string, opts
 					return nil, err
 				}
 				if m == nil || len(m.Items) == 0 {
-					klog.V(4).Infof("External Metrics Query for VPA %+v: resource %+v, metric %+v, No items,", vpa.ID, resourceName, metricName)
+					klog.V(4).Infof("External Metrics Query for VPA %s/%s: resource %+v, metric %+v, No items,", vpa.ID.Namespace, vpa.ID.VpaName, resourceName, metricName)
 					continue
 				}
-				klog.V(4).Infof("External Metrics Query for VPA %+v: resource %+v, metric %+v, %d items, item[0]: %+v", vpa.ID, resourceName, metricName, len(m.Items), m.Items[0])
+				klog.V(4).Infof("External Metrics Query for VPA %s/%s: resource %+v, metric %+v, %d items, item[0]: %+v", vpa.ID.Namespace, vpa.ID.VpaName, resourceName, metricName, len(m.Items), m.Items[0])
 				podMets.Timestamp = m.Items[0].Timestamp
 				if m.Items[0].WindowSeconds != nil {
 					podMets.Window = v1.Duration{Duration: time.Duration(*m.Items[0].WindowSeconds) * time.Second}
diff --git a/vertical-pod-autoscaler/pkg/recommender/model/cluster.go b/vertical-pod-autoscaler/pkg/recommender/model/cluster.go
index be3c61d39451..b9912b946ed5 100644
--- a/vertical-pod-autoscaler/pkg/recommender/model/cluster.go
+++ b/vertical-pod-autoscaler/pkg/recommender/model/cluster.go
@@ -25,7 +25,7 @@ import (
 	"k8s.io/klog/v2"
 
 	vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
-	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
+	controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
 	vpa_utils "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa"
 )
 
@@ -329,7 +329,7 @@ func (cluster *ClusterState) MakeAggregateStateKey(pod *PodState, containerName
 func (cluster *ClusterState) aggregateStateKeyForContainerID(containerID ContainerID) AggregateStateKey {
 	pod, podExists := cluster.Pods[containerID.PodID]
 	if !podExists {
-		panic(fmt.Sprintf("Pod not present in the ClusterState: %v", containerID.PodID))
+		panic(fmt.Sprintf("Pod not present in the ClusterState: %s/%s", containerID.PodID.Namespace, containerID.PodID.PodName))
 	}
 	return cluster.MakeAggregateStateKey(pod, containerID.ContainerName)
 }
@@ -433,7 +433,7 @@ func (cluster *ClusterState) RecordRecommendation(vpa *Vpa, now time.Time) error
 	} else {
 		if lastLogged.Add(RecommendationMissingMaxDuration).Before(now) {
 			cluster.EmptyVPAs[vpa.ID] = now
-			return fmt.Errorf("VPA %v/%v is missing recommendation for more than %v", vpa.ID.Namespace, vpa.ID.VpaName, RecommendationMissingMaxDuration)
+			return fmt.Errorf("VPA %s/%s is missing recommendation for more than %v", vpa.ID.Namespace, vpa.ID.VpaName, RecommendationMissingMaxDuration)
 		}
 	}
 	return nil
diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/capping_post_processor.go b/vertical-pod-autoscaler/pkg/recommender/routines/capping_post_processor.go
index e82322802f98..684028d9c187 100644
--- a/vertical-pod-autoscaler/pkg/recommender/routines/capping_post_processor.go
+++ b/vertical-pod-autoscaler/pkg/recommender/routines/capping_post_processor.go
@@ -34,7 +34,7 @@ func (c CappingPostProcessor) Process(vpa *vpa_types.VerticalPodAutoscaler, reco
 	// TODO: maybe rename the vpa_utils.ApplyVPAPolicy to something that mention that it is doing capping only
 	cappedRecommendation, err := vpa_utils.ApplyVPAPolicy(recommendation, vpa.Spec.ResourcePolicy)
 	if err != nil {
-		klog.Errorf("Failed to apply policy for VPA %v/%v: %v", vpa.GetNamespace(), vpa.GetName(), err)
+		klog.Errorf("Failed to apply policy for VPA %s: %v", klog.KObj(vpa), err)
 		return recommendation
 	}
 	return cappedRecommendation
diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go
index 5dc5d1df0d90..139e20425bf1 100644
--- a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go
+++ b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go
@@ -28,7 +28,7 @@ import (
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/logic"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
-	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
+	controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
 	metrics_recommender "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/recommender"
 	vpa_utils "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa"
 )
@@ -116,7 +116,7 @@ func (r *recommender) UpdateVPAs() {
 				pods := r.clusterState.GetMatchingPods(vpa)
 				klog.Infof("MatchingPods: %+v", pods)
 				if len(pods) != vpa.PodCount {
-					klog.Errorf("ClusterState pod count and matching pods disagree for vpa %v/%v", vpa.ID.Namespace, vpa.ID.VpaName)
+					klog.Errorf("ClusterState pod count and matching pods disagree for VPA %s/%s", vpa.ID.Namespace, vpa.ID.VpaName)
 				}
 			}
 		}
@@ -126,7 +126,7 @@ func (r *recommender) UpdateVPAs() {
 			r.vpaClient.VerticalPodAutoscalers(vpa.ID.Namespace), vpa.ID.VpaName, vpa.AsStatus(), &observedVpa.Status)
 		if err != nil {
 			klog.Errorf(
-				"Cannot update VPA %v/%v object. Reason: %+v", vpa.ID.Namespace, vpa.ID.VpaName, err)
+				"Cannot update VPA %s/%s object. Reason: %+v", vpa.ID.Namespace, vpa.ID.VpaName, err)
 		}
 	}
 }

From 333d438dbf9d604267ede55007f0ab4ff87c9a16 Mon Sep 17 00:00:00 2001
From: Rahul Rangith <rahul.rangith@datadoghq.com>
Date: Wed, 22 May 2024 16:16:04 -0400
Subject: [PATCH 04/30] Default min/max sizes for Azure VMSSs

return a struct
---
 cluster-autoscaler/FAQ.md                     |  2 +-
 .../azure/azure_autodiscovery.go              | 64 ++++++++++++++---
 .../azure/azure_autodiscovery_test.go         | 71 ++++++++++++++++++-
 .../cloudprovider/azure/azure_manager.go      |  9 ++-
 .../cloudprovider/azure/azure_manager_test.go | 47 ++++++++++++
 cluster-autoscaler/main.go                    |  3 +-
 6 files changed, 181 insertions(+), 15 deletions(-)

diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md
index 1e0ee7ae02de..a7a214c2709c 100644
--- a/cluster-autoscaler/FAQ.md
+++ b/cluster-autoscaler/FAQ.md
@@ -793,7 +793,7 @@ The following startup parameters are supported for cluster autoscaler:
 | `ok-total-unready-count` | Number of allowed unready nodes, irrespective of max-total-unready-percentage  | 3
 | `max-node-provision-time` | Maximum time CA waits for node to be provisioned | 15 minutes
 | `nodes` | sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: \<min>:\<max>:<other...> | ""
-| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.<br>A definition is expressed `<name of discoverer>:[<key>[=<value>]]`<br>The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`<br>GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`<br> Azure matches by tags on VMSS, e.g. `label:foo=bar`, and will auto-detect `min` and `max` tags on the VMSS to set scaling limits.<br>Can be used multiple times | ""
+| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.<br>A definition is expressed `<name of discoverer>:[<key>[=<value>]]`<br>The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`<br>GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`<br> Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size for VMSSs, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`.<br>Can be used multiple times | ""
 | `emit-per-nodegroup-metrics` | If true, emit per node group metrics. | false
 | `estimator` | Type of resource estimator to be used in scale up | binpacking
 | `expander` | Type of node group expander to be used in scale up.  | random
diff --git a/cluster-autoscaler/cloudprovider/azure/azure_autodiscovery.go b/cluster-autoscaler/cloudprovider/azure/azure_autodiscovery.go
index 51112ace97cc..a218dfe637e9 100644
--- a/cluster-autoscaler/cloudprovider/azure/azure_autodiscovery.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_autodiscovery.go
@@ -18,18 +18,31 @@ package azure
 
 import (
 	"fmt"
-	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+	"strconv"
 	"strings"
+
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 )
 
 const (
-	autoDiscovererTypeLabel = "label"
+	autoDiscovererTypeLabel       = "label"
+	vmssAutoDiscovererKeyMinNodes = "min"
+	vmssAutoDiscovererKeyMaxNodes = "max"
 )
 
 // A labelAutoDiscoveryConfig specifies how to auto-discover Azure node groups.
 type labelAutoDiscoveryConfig struct {
 	// Key-values to match on.
 	Selector map[string]string
+	// MinSize specifies the minimum size for all VMSSs that match Selector.
+	MinSize *int
+	// MazSize specifies the maximum size for all VMSSs that match Selector.
+	MaxSize *int
+}
+
+type autoDiscoveryConfigSizes struct {
+	Min int
+	Max int
 }
 
 // ParseLabelAutoDiscoverySpecs returns any provided NodeGroupAutoDiscoverySpecs
@@ -70,34 +83,67 @@ func parseLabelAutoDiscoverySpec(spec string) (labelAutoDiscoveryConfig, error)
 		if k == "" || v == "" {
 			return cfg, fmt.Errorf("empty value not allowed in key=value tag pairs")
 		}
-		cfg.Selector[k] = v
+
+		switch k {
+		case vmssAutoDiscovererKeyMinNodes:
+			minSize, err := strconv.Atoi(v)
+			if err != nil || minSize < 0 {
+				return cfg, fmt.Errorf("invalid minimum nodes: %s", v)
+			}
+			cfg.MinSize = &minSize
+		case vmssAutoDiscovererKeyMaxNodes:
+			maxSize, err := strconv.Atoi(v)
+			if err != nil || maxSize < 0 {
+				return cfg, fmt.Errorf("invalid maximum nodes: %s", v)
+			}
+			cfg.MaxSize = &maxSize
+		default:
+			cfg.Selector[k] = v
+		}
+	}
+	if cfg.MaxSize != nil && cfg.MinSize != nil && *cfg.MaxSize < *cfg.MinSize {
+		return cfg, fmt.Errorf("maximum size %d must be greater than or equal to minimum size %d", *cfg.MaxSize, *cfg.MinSize)
 	}
 	return cfg, nil
 }
 
-func matchDiscoveryConfig(labels map[string]*string, configs []labelAutoDiscoveryConfig) bool {
+// returns an autoDiscoveryConfigSizes struct if the VMSS's tags match the autodiscovery configs
+// if the VMSS's tags do not match then return nil
+// if there are multiple min/max sizes defined, return the highest min value and the lowest max value
+func matchDiscoveryConfig(labels map[string]*string, configs []labelAutoDiscoveryConfig) *autoDiscoveryConfigSizes {
 	if len(configs) == 0 {
-		return false
+		return nil
 	}
+	minSize := -1
+	maxSize := -1
 
 	for _, c := range configs {
 		if len(c.Selector) == 0 {
-			return false
+			return nil
 		}
 
 		for k, v := range c.Selector {
 			value, ok := labels[k]
 			if !ok {
-				return false
+				return nil
 			}
 
 			if len(v) > 0 {
 				if value == nil || *value != v {
-					return false
+					return nil
 				}
 			}
 		}
+		if c.MinSize != nil && minSize < *c.MinSize {
+			minSize = *c.MinSize
+		}
+		if c.MaxSize != nil && (maxSize == -1 || maxSize > *c.MaxSize) {
+			maxSize = *c.MaxSize
+		}
 	}
 
-	return true
+	return &autoDiscoveryConfigSizes{
+		Min: minSize,
+		Max: maxSize,
+	}
 }
diff --git a/cluster-autoscaler/cloudprovider/azure/azure_autodiscovery_test.go b/cluster-autoscaler/cloudprovider/azure/azure_autodiscovery_test.go
index f119ed917243..dcecc4d79e95 100644
--- a/cluster-autoscaler/cloudprovider/azure/azure_autodiscovery_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_autodiscovery_test.go
@@ -17,12 +17,15 @@ limitations under the License.
 package azure
 
 import (
+	"testing"
+
 	"github.com/stretchr/testify/assert"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
-	"testing"
 )
 
 func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
+	minVal := 1
+	maxVal := 2
 	testCases := []struct {
 		name        string
 		specs       []string
@@ -46,7 +49,7 @@ func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
 			expectedErr: true,
 		},
 		{
-			name:        "InvalidAutoDiscoerLabel",
+			name:        "InvalidAutoDiscoverLabel",
 			specs:       []string{"invalid:test-tag=test-value,another-test-tag"},
 			expectedErr: true,
 		},
@@ -60,6 +63,70 @@ func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
 			specs:       []string{"label:=test-val"},
 			expectedErr: true,
 		},
+		{
+			name: "ValidSpecWithSizes",
+			specs: []string{
+				"label:cluster-autoscaler-enabled=true,cluster-autoscaler-name=fake-cluster,min=1,max=2",
+				"label:test-tag=test-value,another-test-tag=another-test-value,min=1,max=2",
+			},
+			expected: []labelAutoDiscoveryConfig{
+				{Selector: map[string]string{"cluster-autoscaler-enabled": "true", "cluster-autoscaler-name": "fake-cluster"}, MinSize: &minVal, MaxSize: &maxVal},
+				{Selector: map[string]string{"test-tag": "test-value", "another-test-tag": "another-test-value"}, MinSize: &minVal, MaxSize: &maxVal},
+			},
+		},
+		{
+			name: "ValidSpecWithSizesOnlyMax",
+			specs: []string{
+				"label:cluster-autoscaler-enabled=true,max=2",
+			},
+			expected: []labelAutoDiscoveryConfig{
+				{Selector: map[string]string{"cluster-autoscaler-enabled": "true"}, MaxSize: &maxVal},
+			},
+		},
+		{
+			name: "ValidSpecWithSizesOnlyMin",
+			specs: []string{
+				"label:cluster-autoscaler-enabled=true,min=1",
+			},
+			expected: []labelAutoDiscoveryConfig{
+				{Selector: map[string]string{"cluster-autoscaler-enabled": "true"}, MinSize: &minVal},
+			},
+		},
+		{
+			name: "NonIntegerMin",
+			specs: []string{
+				"label:cluster-autoscaler-enabled=true,min=random,max=2",
+			},
+			expectedErr: true,
+		},
+		{
+			name: "NegativeMin",
+			specs: []string{
+				"label:cluster-autoscaler-enabled=true,min=-5,max=2",
+			},
+			expectedErr: true,
+		},
+		{
+			name: "NonIntegerMax",
+			specs: []string{
+				"label:cluster-autoscaler-enabled=true,min=1,max=random",
+			},
+			expectedErr: true,
+		},
+		{
+			name: "NegativeMax",
+			specs: []string{
+				"label:cluster-autoscaler-enabled=true,min=1,max=-5",
+			},
+			expectedErr: true,
+		},
+		{
+			name: "LowerMaxThanMin",
+			specs: []string{
+				"label:cluster-autoscaler-enabled=true,min=5,max=1",
+			},
+			expectedErr: true,
+		},
 	}
 
 	for _, tc := range testCases {
diff --git a/cluster-autoscaler/cloudprovider/azure/azure_manager.go b/cluster-autoscaler/cloudprovider/azure/azure_manager.go
index daa449f3cf09..b8fe47a04f5b 100644
--- a/cluster-autoscaler/cloudprovider/azure/azure_manager.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_manager.go
@@ -304,12 +304,13 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (
 
 	var nodeGroups []cloudprovider.NodeGroup
 	for _, scaleSet := range vmssList {
+		var cfgSizes *autoDiscoveryConfigSizes
 		if len(filter) > 0 {
 			if scaleSet.Tags == nil || len(scaleSet.Tags) == 0 {
 				continue
 			}
 
-			if !matchDiscoveryConfig(scaleSet.Tags, filter) {
+			if cfgSizes = matchDiscoveryConfig(scaleSet.Tags, filter); cfgSizes == nil {
 				continue
 			}
 		}
@@ -327,6 +328,8 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (
 				klog.Warningf("ignoring vmss %q because of invalid minimum size specified for vmss: %s", *scaleSet.Name, err)
 				continue
 			}
+		} else if cfgSizes.Min >= 0 {
+			spec.MinSize = cfgSizes.Min
 		} else {
 			klog.Warningf("ignoring vmss %q because of no minimum size specified for vmss", *scaleSet.Name)
 			continue
@@ -342,12 +345,14 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (
 				klog.Warningf("ignoring vmss %q because of invalid maximum size specified for vmss: %s", *scaleSet.Name, err)
 				continue
 			}
+		} else if cfgSizes.Max >= 0 {
+			spec.MaxSize = cfgSizes.Max
 		} else {
 			klog.Warningf("ignoring vmss %q because of no maximum size specified for vmss", *scaleSet.Name)
 			continue
 		}
 		if spec.MaxSize < spec.MinSize {
-			klog.Warningf("ignoring vmss %q because of maximum size must be greater than minimum size: max=%d < min=%d", *scaleSet.Name, spec.MaxSize, spec.MinSize)
+			klog.Warningf("ignoring vmss %q because of maximum size must be greater than or equal to minimum size: max=%d < min=%d", *scaleSet.Name, spec.MaxSize, spec.MinSize)
 			continue
 		}
 
diff --git a/cluster-autoscaler/cloudprovider/azure/azure_manager_test.go b/cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
index 3bad46343f50..f72b512ccc50 100644
--- a/cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
@@ -687,6 +687,53 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
 	assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
 }
 
+func TestGetFilteredAutoscalingGroupsVmssWithConfiguredSizes(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	defer ctrl.Finish()
+
+	vmssName := "test-vmss"
+	vmssTag := "fake-tag"
+	vmssTagValue := "fake-value"
+	vmssTag2 := "fake-tag2"
+	vmssTagValue2 := "fake-value2"
+	minVal := 2
+	maxVal := 4
+
+	ngdo := cloudprovider.NodeGroupDiscoveryOptions{
+		NodeGroupAutoDiscoverySpecs: []string{
+			fmt.Sprintf("label:%s=%s,min=2,max=5", vmssTag, vmssTagValue),
+			fmt.Sprintf("label:%s=%s,min=1,max=4", vmssTag2, vmssTagValue2),
+		},
+	}
+
+	manager := newTestAzureManager(t)
+	expectedScaleSets := []compute.VirtualMachineScaleSet{fakeVMSSWithTags(vmssName, map[string]*string{vmssTag: &vmssTagValue, vmssTag2: &vmssTagValue2})}
+	mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
+	mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
+	manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
+	err := manager.forceRefresh()
+	assert.NoError(t, err)
+
+	specs, err := ParseLabelAutoDiscoverySpecs(ngdo)
+	assert.NoError(t, err)
+
+	asgs, err := manager.getFilteredNodeGroups(specs)
+	assert.NoError(t, err)
+	expectedAsgs := []cloudprovider.NodeGroup{&ScaleSet{
+		azureRef: azureRef{
+			Name: vmssName,
+		},
+		minSize:                minVal,
+		maxSize:                maxVal,
+		manager:                manager,
+		enableForceDelete:      manager.config.EnableForceDelete,
+		curSize:                3,
+		sizeRefreshPeriod:      manager.azureCache.refreshInterval,
+		instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
+	}}
+	assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
+}
+
 func TestGetFilteredAutoscalingGroupsWithInvalidVMType(t *testing.T) {
 	ctrl := gomock.NewController(t)
 	defer ctrl.Finish()
diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go
index 0402d5cd91c5..bf891281b1df 100644
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@@ -179,8 +179,9 @@ var (
 		"node-group-auto-discovery",
 		"One or more definition(s) of node group auto-discovery. "+
 			"A definition is expressed `<name of discoverer>:[<key>[=<value>]]`. "+
-			"The `aws` and `gce` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`. "+
+			"The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`. "+
 			"GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10` "+
+			"Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`. "+
 			"Can be used multiple times.")
 
 	estimatorFlag = flag.String("estimator", estimator.BinpackingEstimatorName,

From 4a5d2813f49722f9ac598f57634e00a5fde67f20 Mon Sep 17 00:00:00 2001
From: Ravi Sinha <ravsinha@amazon.com>
Date: Mon, 10 Jun 2024 09:08:18 -0700
Subject: [PATCH 05/30] This merge resolves an issue in the Kubernetes Cluster
 Autoscaler where actual instances within AWS Auto Scaling Groups (ASGs) were
 incorrectly decommissioned instead of placeholders. The updates ensure that
 placeholders are exclusively targeted for scaling down under conditions where
 recent scaling activities have failed. This prevents the accidental
 termination of active nodes and enhances the reliability of the autoscaler in
 AWS environments.

---
 .../cloudprovider/aws/auto_scaling_groups.go  | 127 ++++++++++++++++++
 .../aws/aws_cloud_provider_test.go            |  98 ++++++++++++++
 2 files changed, 225 insertions(+)

diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
index 4667e285b153..a8d3fc48c295 100644
--- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
+++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
@@ -308,6 +308,54 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
 		}
 	}
 
+	var isRecentScalingActivitySuccess = false
+	var err error
+
+	placeHolderInstancesCount := m.GetPlaceHolderInstancesCount(instances)
+	// Check if there are any placeholder instances in the list.
+	if placeHolderInstancesCount > 0 {
+		// Log the check for placeholders in the ASG.
+		klog.V(4).Infof("Detected %d placeholder instance(s), checking recent scaling activity for ASG %s",
+			placeHolderInstancesCount, commonAsg.Name)
+
+		// Retrieve the most recent scaling activity to determine its success state.
+		isRecentScalingActivitySuccess, err = m.getMostRecentScalingActivity(commonAsg)
+
+		// Handle errors from retrieving scaling activity.
+		if err != nil {
+			// Log the error if the scaling activity check fails and return the error.
+			klog.Errorf("Error retrieving scaling activity for ASG %s: %v", commonAsg.Name, err)
+			return err // Return error to prevent further processing with uncertain state information.
+		}
+
+		if !isRecentScalingActivitySuccess {
+			asgDetail, err := m.getDescribeAutoScalingGroupResults(commonAsg)
+
+			if err != nil {
+				klog.Errorf("Error retrieving ASG details %s: %v", commonAsg.Name, err)
+				return err
+			}
+
+			activeInstancesInAsg := len(asgDetail.Instances)
+			desiredCapacityInAsg := int(*asgDetail.DesiredCapacity)
+			klog.V(4).Infof("asg %s has placeholders instances with desired capacity = %d and active instances = %d ",
+				commonAsg.Name, desiredCapacityInAsg, activeInstancesInAsg)
+
+			// If the difference between the active instances and the desired capacity is greater than 1,
+			// it means that the ASG is under-provisioned and the desired capacity is not being reached.
+			// In this case, we would reduce the size of ASG by the count of unprovisioned instances
+			// which is equal to the total count of active instances in ASG
+
+			err = m.setAsgSizeNoLock(commonAsg, activeInstancesInAsg)
+
+			if err != nil {
+				klog.Errorf("Error reducing ASG %s size to %d: %v", commonAsg.Name, activeInstancesInAsg, err)
+				return err
+			}
+			return nil
+		}
+	}
+
 	for _, instance := range instances {
 		// check if the instance is a placeholder - a requested instance that was never created by the node group
 		// if it is, just decrease the size of the node group, as there's no specific instance we can remove
@@ -352,6 +400,33 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
 	return nil
 }
 
+func (m *asgCache) getDescribeAutoScalingGroupResults(commonAsg *asg) (*autoscaling.Group, error) {
+	asgs := make([]*autoscaling.Group, 0)
+	commonAsgNames := []string{commonAsg.Name}
+	input := &autoscaling.DescribeAutoScalingGroupsInput{
+		AutoScalingGroupNames: aws.StringSlice(commonAsgNames),
+		MaxRecords:            aws.Int64(100),
+	}
+
+	err := m.awsService.DescribeAutoScalingGroupsPages(input, func(output *autoscaling.DescribeAutoScalingGroupsOutput, _ bool) bool {
+		asgs = append(asgs, output.AutoScalingGroups...)
+		// We return true while we want to be called with the next page of
+		// results, if any.
+		return false
+	})
+
+	if err != nil {
+		klog.Errorf("Failed while performing DescribeAutoScalingGroupsPages: %v", err)
+		return nil, err
+	}
+
+	if len(asgs) == 0 {
+		return nil, fmt.Errorf("no ASGs found for %s", commonAsgNames)
+	}
+
+	return asgs[0], nil
+}
+
 // isPlaceholderInstance checks if the given instance is only a placeholder
 func (m *asgCache) isPlaceholderInstance(instance *AwsInstanceRef) bool {
 	return strings.HasPrefix(instance.Name, placeholderInstanceNamePrefix)
@@ -624,3 +699,55 @@ func (m *asgCache) buildInstanceRefFromAWS(instance *autoscaling.Instance) AwsIn
 func (m *asgCache) Cleanup() {
 	close(m.interrupt)
 }
+
+func (m *asgCache) getMostRecentScalingActivity(asg *asg) (bool, error) {
+	input := &autoscaling.DescribeScalingActivitiesInput{
+		AutoScalingGroupName: aws.String(asg.Name),
+		MaxRecords:           aws.Int64(1),
+	}
+
+	var response *autoscaling.DescribeScalingActivitiesOutput
+	var err error
+	attempts := 3
+
+	for i := 0; i < attempts; i++ {
+		response, err = m.awsService.DescribeScalingActivities(input)
+		if err == nil {
+			break
+		}
+		klog.V(2).Infof("Failed to describe scaling activities, attempt %d/%d: %v", i+1, attempts, err)
+		time.Sleep(time.Second * 2)
+	}
+
+	if err != nil {
+		klog.Errorf("All attempts failed for DescribeScalingActivities: %v", err)
+		return false, err
+	}
+
+	if len(response.Activities) == 0 {
+		klog.Info("No scaling activities found for ASG:", asg.Name)
+		return false, nil
+	}
+
+	lastActivity := response.Activities[0]
+	if *lastActivity.StatusCode == "Successful" {
+		klog.Infof("Most recent scaling activity for ASG %s was successful", asg.Name)
+		return true, nil
+	} else {
+		klog.Infof("Most recent scaling activity for ASG %s was not successful: %s", asg.Name, *lastActivity.StatusMessage)
+		return false, nil
+	}
+}
+
+// GetPlaceHolderInstancesCount returns count of placeholder instances in the cache
+func (m *asgCache) GetPlaceHolderInstancesCount(instances []*AwsInstanceRef) int {
+
+	placeholderInstancesCount := 0
+	for _, instance := range instances {
+		if strings.HasPrefix(instance.Name, placeholderInstanceNamePrefix) {
+			placeholderInstancesCount++
+
+		}
+	}
+	return placeholderInstancesCount
+}
diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
index 0033d27c68ea..e2e425b068e3 100644
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
@@ -18,6 +18,7 @@ package aws
 
 import (
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
@@ -568,6 +569,22 @@ func TestDeleteNodesWithPlaceholder(t *testing.T) {
 		HonorCooldown:        aws.Bool(false),
 	}).Return(&autoscaling.SetDesiredCapacityOutput{})
 
+	a.On("DescribeScalingActivities",
+		&autoscaling.DescribeScalingActivitiesInput{
+			AutoScalingGroupName: aws.String("test-asg"),
+			MaxRecords:           aws.Int64(1),
+		},
+	).Return(
+		&autoscaling.DescribeScalingActivitiesOutput{
+			Activities: []*autoscaling.Activity{
+				{
+					StatusCode:    aws.String("Successful"),
+					StatusMessage: aws.String("Successful"),
+					StartTime:     aws.Time(time.Now().Add(-30 * time.Minute)),
+				},
+			},
+		}, nil)
+
 	// Look up the current number of instances...
 	var expectedInstancesCount int64 = 2
 	a.On("DescribeAutoScalingGroupsPages",
@@ -739,3 +756,84 @@ func TestHasInstance(t *testing.T) {
 	assert.NoError(t, err)
 	assert.False(t, present)
 }
+
+// write unit test for DeleteInstances function
+func TestDeleteInstances_scalingActivityFailure(t *testing.T) {
+
+	a := &autoScalingMock{}
+	provider := testProvider(t, newTestAwsManagerWithAsgs(t, a, nil, []string{"1:5:test-asg"}))
+
+	asgs := provider.NodeGroups()
+	a.On("SetDesiredCapacity", &autoscaling.SetDesiredCapacityInput{
+		AutoScalingGroupName: aws.String(asgs[0].Id()),
+		DesiredCapacity:      aws.Int64(1),
+		HonorCooldown:        aws.Bool(false),
+	}).Return(&autoscaling.SetDesiredCapacityOutput{})
+	var expectedInstancesCount int64 = 5
+	a.On("DescribeAutoScalingGroupsPages",
+		&autoscaling.DescribeAutoScalingGroupsInput{
+			AutoScalingGroupNames: aws.StringSlice([]string{"test-asg"}),
+			MaxRecords:            aws.Int64(100),
+		},
+		mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"),
+	).Run(func(args mock.Arguments) {
+		fn := args.Get(1).(func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool)
+		fn(testNamedDescribeAutoScalingGroupsOutput("test-asg", expectedInstancesCount, "i-0c257f8f05fd1c64b", "i-0c257f8f05fd1c64c", "i-0c257f8f05fd1c64d"), false)
+		// we expect the instance count to be 1 after the call to DeleteNodes
+		//expectedInstancesCount =
+	}).Return(nil)
+
+	a.On("DescribeScalingActivities",
+		&autoscaling.DescribeScalingActivitiesInput{
+			AutoScalingGroupName: aws.String("test-asg"),
+			MaxRecords:           aws.Int64(1),
+		},
+	).Return(
+		&autoscaling.DescribeScalingActivitiesOutput{
+			Activities: []*autoscaling.Activity{
+				{
+					StatusCode:    aws.String("Failed"),
+					StatusMessage: aws.String("Launching a new EC2 instance. Status Reason: We currently do not have sufficient p5.48xlarge capacity in zones with support for 'gp2' volumes. Our system will be working on provisioning additional capacity. Launching EC2 instance failed.\t"),
+					StartTime:     aws.Time(time.Now().Add(-30 * time.Minute)),
+				},
+			},
+		}, nil)
+
+	a.On("DescribeScalingActivities",
+		&autoscaling.DescribeScalingActivitiesInput{
+			AutoScalingGroupName: aws.String("test-asg"),
+		},
+	).Return(&autoscaling.DescribeScalingActivitiesOutput{}, nil)
+
+	a.On("SetDesiredCapacity", &autoscaling.SetDesiredCapacityInput{
+		AutoScalingGroupName: aws.String(asgs[0].Id()),
+		DesiredCapacity:      aws.Int64(3),
+		HonorCooldown:        aws.Bool(false),
+	}).Return(&autoscaling.SetDesiredCapacityOutput{})
+
+	provider.Refresh()
+
+	initialSize, err := asgs[0].TargetSize()
+	assert.NoError(t, err)
+	assert.Equal(t, 5, initialSize)
+
+	nodes := []*apiv1.Node{}
+	asgToInstances := provider.awsManager.asgCache.asgToInstances[AwsRef{Name: "test-asg"}]
+	for _, instance := range asgToInstances {
+		nodes = append(nodes, &apiv1.Node{
+			Spec: apiv1.NodeSpec{
+				ProviderID: instance.ProviderID,
+			},
+		})
+	}
+
+	err = asgs[0].DeleteNodes(nodes)
+	assert.NoError(t, err)
+	a.AssertNumberOfCalls(t, "SetDesiredCapacity", 1)
+	a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 2)
+
+	newSize, err := asgs[0].TargetSize()
+	assert.NoError(t, err)
+	assert.Equal(t, 3, newSize)
+
+}

From 9a0830eb01915a01bbcd6c74e95ccbb0a54e5be4 Mon Sep 17 00:00:00 2001
From: Ravi Sinha <ravsinha@amazon.com>
Date: Mon, 10 Jun 2024 11:36:20 -0700
Subject: [PATCH 06/30] handling placeholder instances with no scaling
 activities error

---
 .../cloudprovider/aws/auto_scaling_groups.go  | 176 +++++-------------
 .../aws/aws_cloud_provider_test.go            | 103 +---------
 2 files changed, 47 insertions(+), 232 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
index a8d3fc48c295..ea744de0621a 100644
--- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
+++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
@@ -308,9 +308,6 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
 		}
 	}
 
-	var isRecentScalingActivitySuccess = false
-	var err error
-
 	placeHolderInstancesCount := m.GetPlaceHolderInstancesCount(instances)
 	// Check if there are any placeholder instances in the list.
 	if placeHolderInstancesCount > 0 {
@@ -318,113 +315,69 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
 		klog.V(4).Infof("Detected %d placeholder instance(s), checking recent scaling activity for ASG %s",
 			placeHolderInstancesCount, commonAsg.Name)
 
-		// Retrieve the most recent scaling activity to determine its success state.
-		isRecentScalingActivitySuccess, err = m.getMostRecentScalingActivity(commonAsg)
+		asgNames := []string{commonAsg.Name}
+		asgDetail, err := m.awsService.getAutoscalingGroupsByNames(asgNames)
 
-		// Handle errors from retrieving scaling activity.
 		if err != nil {
-			// Log the error if the scaling activity check fails and return the error.
-			klog.Errorf("Error retrieving scaling activity for ASG %s: %v", commonAsg.Name, err)
-			return err // Return error to prevent further processing with uncertain state information.
+			klog.Errorf("Error retrieving ASG details %s: %v", commonAsg.Name, err)
+			return err
 		}
 
-		if !isRecentScalingActivitySuccess {
-			asgDetail, err := m.getDescribeAutoScalingGroupResults(commonAsg)
-
-			if err != nil {
-				klog.Errorf("Error retrieving ASG details %s: %v", commonAsg.Name, err)
-				return err
-			}
+		activeInstancesInAsg := len(asgDetail[0].Instances)
+		desiredCapacityInAsg := int(*asgDetail[0].DesiredCapacity)
+		klog.V(4).Infof("asg %s has placeholders instances with desired capacity = %d and active instances = %d. updating ASG to match active instances count",
+			commonAsg.Name, desiredCapacityInAsg, activeInstancesInAsg)
 
-			activeInstancesInAsg := len(asgDetail.Instances)
-			desiredCapacityInAsg := int(*asgDetail.DesiredCapacity)
-			klog.V(4).Infof("asg %s has placeholders instances with desired capacity = %d and active instances = %d ",
-				commonAsg.Name, desiredCapacityInAsg, activeInstancesInAsg)
+		// If the difference between the active instances and the desired capacity is greater than 1,
+		// it means that the ASG is under-provisioned and the desired capacity is not being reached.
+		// In this case, we would reduce the size of ASG by the count of unprovisioned instances
+		// which is equal to the total count of active instances in ASG
 
-			// If the difference between the active instances and the desired capacity is greater than 1,
-			// it means that the ASG is under-provisioned and the desired capacity is not being reached.
-			// In this case, we would reduce the size of ASG by the count of unprovisioned instances
-			// which is equal to the total count of active instances in ASG
+		err = m.setAsgSizeNoLock(commonAsg, activeInstancesInAsg)
 
-			err = m.setAsgSizeNoLock(commonAsg, activeInstancesInAsg)
-
-			if err != nil {
-				klog.Errorf("Error reducing ASG %s size to %d: %v", commonAsg.Name, activeInstancesInAsg, err)
-				return err
-			}
-			return nil
+		if err != nil {
+			klog.Errorf("Error reducing ASG %s size to %d: %v", commonAsg.Name, activeInstancesInAsg, err)
+			return err
 		}
+		return nil
 	}
 
 	for _, instance := range instances {
-		// check if the instance is a placeholder - a requested instance that was never created by the node group
-		// if it is, just decrease the size of the node group, as there's no specific instance we can remove
-		if m.isPlaceholderInstance(instance) {
-			klog.V(4).Infof("instance %s is detected as a placeholder, decreasing ASG requested size instead "+
-				"of deleting instance", instance.Name)
-			m.decreaseAsgSizeByOneNoLock(commonAsg)
-		} else {
-			// check if the instance is already terminating - if it is, don't bother terminating again
-			// as doing so causes unnecessary API calls and can cause the curSize cached value to decrement
-			// unnecessarily.
-			lifecycle, err := m.findInstanceLifecycle(*instance)
-			if err != nil {
-				return err
-			}
 
-			if lifecycle != nil &&
-				*lifecycle == autoscaling.LifecycleStateTerminated ||
-				*lifecycle == autoscaling.LifecycleStateTerminating ||
-				*lifecycle == autoscaling.LifecycleStateTerminatingWait ||
-				*lifecycle == autoscaling.LifecycleStateTerminatingProceed {
-				klog.V(2).Infof("instance %s is already terminating in state %s, will skip instead", instance.Name, *lifecycle)
-				continue
-			}
-
-			params := &autoscaling.TerminateInstanceInAutoScalingGroupInput{
-				InstanceId:                     aws.String(instance.Name),
-				ShouldDecrementDesiredCapacity: aws.Bool(true),
-			}
-			start := time.Now()
-			resp, err := m.awsService.TerminateInstanceInAutoScalingGroup(params)
-			observeAWSRequest("TerminateInstanceInAutoScalingGroup", err, start)
-			if err != nil {
-				return err
-			}
-			klog.V(4).Infof(*resp.Activity.Description)
-
-			// Proactively decrement the size so autoscaler makes better decisions
-			commonAsg.curSize--
+		// check if the instance is already terminating - if it is, don't bother terminating again
+		// as doing so causes unnecessary API calls and can cause the curSize cached value to decrement
+		// unnecessarily.
+		lifecycle, err := m.findInstanceLifecycle(*instance)
+		if err != nil {
+			return err
 		}
-	}
-	return nil
-}
 
-func (m *asgCache) getDescribeAutoScalingGroupResults(commonAsg *asg) (*autoscaling.Group, error) {
-	asgs := make([]*autoscaling.Group, 0)
-	commonAsgNames := []string{commonAsg.Name}
-	input := &autoscaling.DescribeAutoScalingGroupsInput{
-		AutoScalingGroupNames: aws.StringSlice(commonAsgNames),
-		MaxRecords:            aws.Int64(100),
-	}
+		if lifecycle != nil &&
+			*lifecycle == autoscaling.LifecycleStateTerminated ||
+			*lifecycle == autoscaling.LifecycleStateTerminating ||
+			*lifecycle == autoscaling.LifecycleStateTerminatingWait ||
+			*lifecycle == autoscaling.LifecycleStateTerminatingProceed {
+			klog.V(2).Infof("instance %s is already terminating in state %s, will skip instead", instance.Name, *lifecycle)
+			continue
+		}
 
-	err := m.awsService.DescribeAutoScalingGroupsPages(input, func(output *autoscaling.DescribeAutoScalingGroupsOutput, _ bool) bool {
-		asgs = append(asgs, output.AutoScalingGroups...)
-		// We return true while we want to be called with the next page of
-		// results, if any.
-		return false
-	})
+		params := &autoscaling.TerminateInstanceInAutoScalingGroupInput{
+			InstanceId:                     aws.String(instance.Name),
+			ShouldDecrementDesiredCapacity: aws.Bool(true),
+		}
+		start := time.Now()
+		resp, err := m.awsService.TerminateInstanceInAutoScalingGroup(params)
+		observeAWSRequest("TerminateInstanceInAutoScalingGroup", err, start)
+		if err != nil {
+			return err
+		}
+		klog.V(4).Infof(*resp.Activity.Description)
 
-	if err != nil {
-		klog.Errorf("Failed while performing DescribeAutoScalingGroupsPages: %v", err)
-		return nil, err
-	}
+		// Proactively decrement the size so autoscaler makes better decisions
+		commonAsg.curSize--
 
-	if len(asgs) == 0 {
-		return nil, fmt.Errorf("no ASGs found for %s", commonAsgNames)
 	}
-
-	return asgs[0], nil
+	return nil
 }
 
 // isPlaceholderInstance checks if the given instance is only a placeholder
@@ -700,45 +653,6 @@ func (m *asgCache) Cleanup() {
 	close(m.interrupt)
 }
 
-func (m *asgCache) getMostRecentScalingActivity(asg *asg) (bool, error) {
-	input := &autoscaling.DescribeScalingActivitiesInput{
-		AutoScalingGroupName: aws.String(asg.Name),
-		MaxRecords:           aws.Int64(1),
-	}
-
-	var response *autoscaling.DescribeScalingActivitiesOutput
-	var err error
-	attempts := 3
-
-	for i := 0; i < attempts; i++ {
-		response, err = m.awsService.DescribeScalingActivities(input)
-		if err == nil {
-			break
-		}
-		klog.V(2).Infof("Failed to describe scaling activities, attempt %d/%d: %v", i+1, attempts, err)
-		time.Sleep(time.Second * 2)
-	}
-
-	if err != nil {
-		klog.Errorf("All attempts failed for DescribeScalingActivities: %v", err)
-		return false, err
-	}
-
-	if len(response.Activities) == 0 {
-		klog.Info("No scaling activities found for ASG:", asg.Name)
-		return false, nil
-	}
-
-	lastActivity := response.Activities[0]
-	if *lastActivity.StatusCode == "Successful" {
-		klog.Infof("Most recent scaling activity for ASG %s was successful", asg.Name)
-		return true, nil
-	} else {
-		klog.Infof("Most recent scaling activity for ASG %s was not successful: %s", asg.Name, *lastActivity.StatusMessage)
-		return false, nil
-	}
-}
-
 // GetPlaceHolderInstancesCount returns count of placeholder instances in the cache
 func (m *asgCache) GetPlaceHolderInstancesCount(instances []*AwsInstanceRef) int {
 
diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
index e2e425b068e3..d375dd7aa238 100644
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
@@ -17,9 +17,6 @@ limitations under the License.
 package aws
 
 import (
-	"testing"
-	"time"
-
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 	apiv1 "k8s.io/api/core/v1"
@@ -28,6 +25,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
+	"testing"
 )
 
 var testAwsManager = &AwsManager{
@@ -569,22 +567,6 @@ func TestDeleteNodesWithPlaceholder(t *testing.T) {
 		HonorCooldown:        aws.Bool(false),
 	}).Return(&autoscaling.SetDesiredCapacityOutput{})
 
-	a.On("DescribeScalingActivities",
-		&autoscaling.DescribeScalingActivitiesInput{
-			AutoScalingGroupName: aws.String("test-asg"),
-			MaxRecords:           aws.Int64(1),
-		},
-	).Return(
-		&autoscaling.DescribeScalingActivitiesOutput{
-			Activities: []*autoscaling.Activity{
-				{
-					StatusCode:    aws.String("Successful"),
-					StatusMessage: aws.String("Successful"),
-					StartTime:     aws.Time(time.Now().Add(-30 * time.Minute)),
-				},
-			},
-		}, nil)
-
 	// Look up the current number of instances...
 	var expectedInstancesCount int64 = 2
 	a.On("DescribeAutoScalingGroupsPages",
@@ -620,7 +602,7 @@ func TestDeleteNodesWithPlaceholder(t *testing.T) {
 	err = asgs[0].DeleteNodes([]*apiv1.Node{node})
 	assert.NoError(t, err)
 	a.AssertNumberOfCalls(t, "SetDesiredCapacity", 1)
-	a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 1)
+	a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 2)
 
 	newSize, err := asgs[0].TargetSize()
 	assert.NoError(t, err)
@@ -756,84 +738,3 @@ func TestHasInstance(t *testing.T) {
 	assert.NoError(t, err)
 	assert.False(t, present)
 }
-
-// write unit test for DeleteInstances function
-func TestDeleteInstances_scalingActivityFailure(t *testing.T) {
-
-	a := &autoScalingMock{}
-	provider := testProvider(t, newTestAwsManagerWithAsgs(t, a, nil, []string{"1:5:test-asg"}))
-
-	asgs := provider.NodeGroups()
-	a.On("SetDesiredCapacity", &autoscaling.SetDesiredCapacityInput{
-		AutoScalingGroupName: aws.String(asgs[0].Id()),
-		DesiredCapacity:      aws.Int64(1),
-		HonorCooldown:        aws.Bool(false),
-	}).Return(&autoscaling.SetDesiredCapacityOutput{})
-	var expectedInstancesCount int64 = 5
-	a.On("DescribeAutoScalingGroupsPages",
-		&autoscaling.DescribeAutoScalingGroupsInput{
-			AutoScalingGroupNames: aws.StringSlice([]string{"test-asg"}),
-			MaxRecords:            aws.Int64(100),
-		},
-		mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"),
-	).Run(func(args mock.Arguments) {
-		fn := args.Get(1).(func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool)
-		fn(testNamedDescribeAutoScalingGroupsOutput("test-asg", expectedInstancesCount, "i-0c257f8f05fd1c64b", "i-0c257f8f05fd1c64c", "i-0c257f8f05fd1c64d"), false)
-		// we expect the instance count to be 1 after the call to DeleteNodes
-		//expectedInstancesCount =
-	}).Return(nil)
-
-	a.On("DescribeScalingActivities",
-		&autoscaling.DescribeScalingActivitiesInput{
-			AutoScalingGroupName: aws.String("test-asg"),
-			MaxRecords:           aws.Int64(1),
-		},
-	).Return(
-		&autoscaling.DescribeScalingActivitiesOutput{
-			Activities: []*autoscaling.Activity{
-				{
-					StatusCode:    aws.String("Failed"),
-					StatusMessage: aws.String("Launching a new EC2 instance. Status Reason: We currently do not have sufficient p5.48xlarge capacity in zones with support for 'gp2' volumes. Our system will be working on provisioning additional capacity. Launching EC2 instance failed.\t"),
-					StartTime:     aws.Time(time.Now().Add(-30 * time.Minute)),
-				},
-			},
-		}, nil)
-
-	a.On("DescribeScalingActivities",
-		&autoscaling.DescribeScalingActivitiesInput{
-			AutoScalingGroupName: aws.String("test-asg"),
-		},
-	).Return(&autoscaling.DescribeScalingActivitiesOutput{}, nil)
-
-	a.On("SetDesiredCapacity", &autoscaling.SetDesiredCapacityInput{
-		AutoScalingGroupName: aws.String(asgs[0].Id()),
-		DesiredCapacity:      aws.Int64(3),
-		HonorCooldown:        aws.Bool(false),
-	}).Return(&autoscaling.SetDesiredCapacityOutput{})
-
-	provider.Refresh()
-
-	initialSize, err := asgs[0].TargetSize()
-	assert.NoError(t, err)
-	assert.Equal(t, 5, initialSize)
-
-	nodes := []*apiv1.Node{}
-	asgToInstances := provider.awsManager.asgCache.asgToInstances[AwsRef{Name: "test-asg"}]
-	for _, instance := range asgToInstances {
-		nodes = append(nodes, &apiv1.Node{
-			Spec: apiv1.NodeSpec{
-				ProviderID: instance.ProviderID,
-			},
-		})
-	}
-
-	err = asgs[0].DeleteNodes(nodes)
-	assert.NoError(t, err)
-	a.AssertNumberOfCalls(t, "SetDesiredCapacity", 1)
-	a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 2)
-
-	newSize, err := asgs[0].TargetSize()
-	assert.NoError(t, err)
-	assert.Equal(t, 3, newSize)
-
-}

From 1739a71d80acba2f0ccacd741fd44b747c2b407b Mon Sep 17 00:00:00 2001
From: Ismail Alidzhikov <i.alidjikov@gmail.com>
Date: Wed, 19 Jun 2024 16:23:10 +0300
Subject: [PATCH 07/30] Use klog.KRef wherever possible

---
 .../pkg/recommender/checkpoint/checkpoint_writer.go         | 2 +-
 .../pkg/recommender/input/cluster_feeder.go                 | 6 +++---
 .../pkg/recommender/input/metrics/metrics_source.go         | 4 ++--
 .../pkg/recommender/routines/recommender.go                 | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/vertical-pod-autoscaler/pkg/recommender/checkpoint/checkpoint_writer.go b/vertical-pod-autoscaler/pkg/recommender/checkpoint/checkpoint_writer.go
index f9bc6dbf646a..9bc9f0def89c 100644
--- a/vertical-pod-autoscaler/pkg/recommender/checkpoint/checkpoint_writer.go
+++ b/vertical-pod-autoscaler/pkg/recommender/checkpoint/checkpoint_writer.go
@@ -64,7 +64,7 @@ func getVpasToCheckpoint(clusterVpas map[model.VpaID]*model.Vpa) []*model.Vpa {
 	vpas := make([]*model.Vpa, 0, len(clusterVpas))
 	for _, vpa := range clusterVpas {
 		if isFetchingHistory(vpa) {
-			klog.V(3).Infof("VPA %s/%s is loading history, skipping checkpoints", vpa.ID.Namespace, vpa.ID.VpaName)
+			klog.V(3).Infof("VPA %s is loading history, skipping checkpoints", klog.KRef(vpa.ID.Namespace, vpa.ID.VpaName))
 			continue
 		}
 		vpas = append(vpas, vpa)
diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go
index 67f60397e81f..9dd5da9df3b5 100644
--- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go
+++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go
@@ -377,9 +377,9 @@ func (feeder *clusterStateFeeder) LoadVPAs() {
 	// Delete non-existent VPAs from the model.
 	for vpaID := range feeder.clusterState.Vpas {
 		if _, exists := vpaKeys[vpaID]; !exists {
-			klog.V(3).Infof("Deleting VPA %s/%s", vpaID.Namespace, vpaID.VpaName)
+			klog.V(3).Infof("Deleting VPA %s", klog.KRef(vpaID.Namespace, vpaID.VpaName))
 			if err := feeder.clusterState.DeleteVpa(vpaID); err != nil {
-				klog.Errorf("Deleting VPA %s/%s failed: %v", vpaID.Namespace, vpaID.VpaName, err)
+				klog.Errorf("Deleting VPA %s failed: %v", klog.KRef(vpaID.Namespace, vpaID.VpaName), err)
 			}
 		}
 	}
@@ -398,7 +398,7 @@ func (feeder *clusterStateFeeder) LoadPods() {
 	}
 	for key := range feeder.clusterState.Pods {
 		if _, exists := pods[key]; !exists {
-			klog.V(3).Infof("Deleting Pod %s/%s", key.Namespace, key.PodName)
+			klog.V(3).Infof("Deleting Pod %s", klog.KRef(key.Namespace, key.PodName))
 			feeder.clusterState.DeletePod(key)
 		}
 	}
diff --git a/vertical-pod-autoscaler/pkg/recommender/input/metrics/metrics_source.go b/vertical-pod-autoscaler/pkg/recommender/input/metrics/metrics_source.go
index 807c4f017a22..6f612a1c6f4a 100644
--- a/vertical-pod-autoscaler/pkg/recommender/input/metrics/metrics_source.go
+++ b/vertical-pod-autoscaler/pkg/recommender/input/metrics/metrics_source.go
@@ -114,10 +114,10 @@ func (s *externalMetricsClient) List(ctx context.Context, namespace string, opts
 					return nil, err
 				}
 				if m == nil || len(m.Items) == 0 {
-					klog.V(4).Infof("External Metrics Query for VPA %s/%s: resource %+v, metric %+v, No items,", vpa.ID.Namespace, vpa.ID.VpaName, resourceName, metricName)
+					klog.V(4).Infof("External Metrics Query for VPA %s: resource %+v, metric %+v, No items,", klog.KRef(vpa.ID.Namespace, vpa.ID.VpaName), resourceName, metricName)
 					continue
 				}
-				klog.V(4).Infof("External Metrics Query for VPA %s/%s: resource %+v, metric %+v, %d items, item[0]: %+v", vpa.ID.Namespace, vpa.ID.VpaName, resourceName, metricName, len(m.Items), m.Items[0])
+				klog.V(4).Infof("External Metrics Query for VPA %s: resource %+v, metric %+v, %d items, item[0]: %+v", klog.KRef(vpa.ID.Namespace, vpa.ID.VpaName), resourceName, metricName, len(m.Items), m.Items[0])
 				podMets.Timestamp = m.Items[0].Timestamp
 				if m.Items[0].WindowSeconds != nil {
 					podMets.Window = v1.Duration{Duration: time.Duration(*m.Items[0].WindowSeconds) * time.Second}
diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go
index 139e20425bf1..5a89480e701e 100644
--- a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go
+++ b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go
@@ -116,7 +116,7 @@ func (r *recommender) UpdateVPAs() {
 				pods := r.clusterState.GetMatchingPods(vpa)
 				klog.Infof("MatchingPods: %+v", pods)
 				if len(pods) != vpa.PodCount {
-					klog.Errorf("ClusterState pod count and matching pods disagree for VPA %s/%s", vpa.ID.Namespace, vpa.ID.VpaName)
+					klog.Errorf("ClusterState pod count and matching pods disagree for VPA %s", klog.KRef(vpa.ID.Namespace, vpa.ID.VpaName))
 				}
 			}
 		}
@@ -126,7 +126,7 @@ func (r *recommender) UpdateVPAs() {
 			r.vpaClient.VerticalPodAutoscalers(vpa.ID.Namespace), vpa.ID.VpaName, vpa.AsStatus(), &observedVpa.Status)
 		if err != nil {
 			klog.Errorf(
-				"Cannot update VPA %s/%s object. Reason: %+v", vpa.ID.Namespace, vpa.ID.VpaName, err)
+				"Cannot update VPA %s object. Reason: %+v", klog.KRef(vpa.ID.Namespace, vpa.ID.VpaName), err)
 		}
 	}
 }

From b7ba76feddddd845b095ca73d18cce7f1b5ae27e Mon Sep 17 00:00:00 2001
From: Ravi Sinha <ravsinha@amazon.com>
Date: Thu, 20 Jun 2024 15:10:33 -0700
Subject: [PATCH 08/30] handling deletion of actual instances along with
 placeholders

---
 .../cloudprovider/aws/auto_scaling_groups.go  |   6 +-
 .../aws/aws_cloud_provider_test.go            | 112 +++++++++++++++++-
 2 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
index ea744de0621a..77e7b0b57950 100644
--- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
+++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
@@ -339,11 +339,15 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
 			klog.Errorf("Error reducing ASG %s size to %d: %v", commonAsg.Name, activeInstancesInAsg, err)
 			return err
 		}
-		return nil
 	}
 
 	for _, instance := range instances {
 
+		if m.isPlaceholderInstance(instance) {
+			// skipping placeholder as placeholder instances don't exist
+			// and we have already reduced ASG size during placeholder check.
+			continue
+		}
 		// check if the instance is already terminating - if it is, don't bother terminating again
 		// as doing so causes unnecessary API calls and can cause the curSize cached value to decrement
 		// unnecessarily.
diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
index d375dd7aa238..09e4de361f06 100644
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
@@ -17,6 +17,8 @@ limitations under the License.
 package aws
 
 import (
+	"testing"
+	"fmt"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 	apiv1 "k8s.io/api/core/v1"
@@ -25,7 +27,6 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
-	"testing"
 )
 
 var testAwsManager = &AwsManager{
@@ -738,3 +739,112 @@ func TestHasInstance(t *testing.T) {
 	assert.NoError(t, err)
 	assert.False(t, present)
 }
+
+func TestDeleteNodesWithPlaceholderAndIncorrectCache(t *testing.T) {
+	// This test validates the scenario where ASG cache is not in sync with Autoscaling configuration.
+	// we are taking an example where ASG size is 10, cache as 3 instances "i-0000", "i-0001" and "i-0002
+	// But ASG has 6 instances i-0000 to i-10005. When DeleteInstances is called with 2 instances ("i-0000", "i-0001" )
+	// and placeholders, CAS will terminate only these 2 instances after reducing ASG size by the count of placeholders
+
+	a := &autoScalingMock{}
+	provider := testProvider(t, newTestAwsManagerWithAsgs(t, a, nil, []string{"1:10:test-asg"}))
+	asgs := provider.NodeGroups()
+	commonAsg := &asg{
+		AwsRef:  AwsRef{Name: asgs[0].Id()},
+		minSize: asgs[0].MinSize(),
+		maxSize: asgs[0].MaxSize(),
+	}
+
+	// desired capacity will be set as 6 as ASG has 4 placeholders
+	a.On("SetDesiredCapacity", &autoscaling.SetDesiredCapacityInput{
+		AutoScalingGroupName: aws.String(asgs[0].Id()),
+		DesiredCapacity:      aws.Int64(6),
+		HonorCooldown:        aws.Bool(false),
+	}).Return(&autoscaling.SetDesiredCapacityOutput{})
+
+	// Look up the current number of instances...
+	var expectedInstancesCount int64 = 10
+	a.On("DescribeAutoScalingGroupsPages",
+		&autoscaling.DescribeAutoScalingGroupsInput{
+			AutoScalingGroupNames: aws.StringSlice([]string{"test-asg"}),
+			MaxRecords:            aws.Int64(maxRecordsReturnedByAPI),
+		},
+		mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"),
+	).Run(func(args mock.Arguments) {
+		fn := args.Get(1).(func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool)
+		fn(testNamedDescribeAutoScalingGroupsOutput("test-asg", expectedInstancesCount, "i-0000", "i-0001", "i-0002", "i-0003", "i-0004", "i-0005"), false)
+
+		expectedInstancesCount = 4
+	}).Return(nil)
+
+	a.On("DescribeScalingActivities",
+		&autoscaling.DescribeScalingActivitiesInput{
+			AutoScalingGroupName: aws.String("test-asg"),
+		},
+	).Return(&autoscaling.DescribeScalingActivitiesOutput{}, nil)
+
+	provider.Refresh()
+
+	initialSize, err := asgs[0].TargetSize()
+	assert.NoError(t, err)
+	assert.Equal(t, 10, initialSize)
+
+	var awsInstanceRefs []AwsInstanceRef
+	instanceToAsg := make(map[AwsInstanceRef]*asg)
+
+	var nodes []*apiv1.Node
+	for i := 3; i <= 9; i++ {
+		providerId := fmt.Sprintf("aws:///us-east-1a/i-placeholder-test-asg-%d", i)
+		node := &apiv1.Node{
+			Spec: apiv1.NodeSpec{
+				ProviderID: providerId,
+			},
+		}
+		nodes = append(nodes, node)
+		awsInstanceRef := AwsInstanceRef{
+			ProviderID: providerId,
+			Name:       fmt.Sprintf("i-placeholder-test-asg-%d", i),
+		}
+		awsInstanceRefs = append(awsInstanceRefs, awsInstanceRef)
+		instanceToAsg[awsInstanceRef] = commonAsg
+	}
+
+	for i := 0; i <= 2; i++ {
+		providerId := fmt.Sprintf("aws:///us-east-1a/i-000%d", i)
+		node := &apiv1.Node{
+			Spec: apiv1.NodeSpec{
+				ProviderID: providerId,
+			},
+		}
+		// only setting 2 instances to be terminated out of 3 active instances
+		if i < 2 {
+			nodes = append(nodes, node)
+			a.On("TerminateInstanceInAutoScalingGroup", &autoscaling.TerminateInstanceInAutoScalingGroupInput{
+				InstanceId:                     aws.String(fmt.Sprintf("i-000%d", i)),
+				ShouldDecrementDesiredCapacity: aws.Bool(true),
+			}).Return(&autoscaling.TerminateInstanceInAutoScalingGroupOutput{
+				Activity: &autoscaling.Activity{Description: aws.String("Deleted instance")},
+			})
+		}
+		awsInstanceRef := AwsInstanceRef{
+			ProviderID: providerId,
+			Name:       fmt.Sprintf("i-000%d", i),
+		}
+		awsInstanceRefs = append(awsInstanceRefs, awsInstanceRef)
+		instanceToAsg[awsInstanceRef] = commonAsg
+	}
+
+	// modifying provider to have incorrect information than ASG current state
+	provider.awsManager.asgCache.asgToInstances[AwsRef{Name: "test-asg"}] = awsInstanceRefs
+	provider.awsManager.asgCache.instanceToAsg = instanceToAsg
+
+	// calling delete nodes 2 nodes and remaining placeholders
+	err = asgs[0].DeleteNodes(nodes)
+	assert.NoError(t, err)
+	a.AssertNumberOfCalls(t, "SetDesiredCapacity", 1)
+	a.AssertNumberOfCalls(t, "DescribeAutoScalingGroupsPages", 2)
+
+	// This ensures only 2 instances are terminated which are mocked in this unit test
+	a.AssertNumberOfCalls(t, "TerminateInstanceInAutoScalingGroup", 2)
+
+}

From d9f8217e54c7a7a97ab24b7d733d5c30fd88aa02 Mon Sep 17 00:00:00 2001
From: Ravi Sinha <ravsinha@amazon.com>
Date: Fri, 21 Jun 2024 12:21:01 -0700
Subject: [PATCH 09/30] updating unit test name

---
 .../cloudprovider/aws/aws_cloud_provider_test.go              | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
index 09e4de361f06..80ae85b53256 100644
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
@@ -740,7 +740,7 @@ func TestHasInstance(t *testing.T) {
 	assert.False(t, present)
 }
 
-func TestDeleteNodesWithPlaceholderAndIncorrectCache(t *testing.T) {
+func TestDeleteNodesWithPlaceholderAndStaleCache(t *testing.T) {
 	// This test validates the scenario where ASG cache is not in sync with Autoscaling configuration.
 	// we are taking an example where ASG size is 10, cache as 3 instances "i-0000", "i-0001" and "i-0002
 	// But ASG has 6 instances i-0000 to i-10005. When DeleteInstances is called with 2 instances ("i-0000", "i-0001" )
@@ -834,7 +834,7 @@ func TestDeleteNodesWithPlaceholderAndIncorrectCache(t *testing.T) {
 		instanceToAsg[awsInstanceRef] = commonAsg
 	}
 
-	// modifying provider to have incorrect information than ASG current state
+	// modifying provider to bring disparity between ASG and cache
 	provider.awsManager.asgCache.asgToInstances[AwsRef{Name: "test-asg"}] = awsInstanceRefs
 	provider.awsManager.asgCache.instanceToAsg = instanceToAsg
 

From f9c05fb19baf5b602675faac87e05fa4120a99ed Mon Sep 17 00:00:00 2001
From: Ravi Sinha <ravsinha@amazon.com>
Date: Fri, 21 Jun 2024 13:19:15 -0700
Subject: [PATCH 10/30] resolving gofmt issue

---
 cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
index 80ae85b53256..335cbeb3180a 100644
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
@@ -17,7 +17,6 @@ limitations under the License.
 package aws
 
 import (
-	"testing"
 	"fmt"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
@@ -27,6 +26,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
+	"testing"
 )
 
 var testAwsManager = &AwsManager{

From 3c5a97d7b9c6d8556df204301a4abbe6fd3b4c97 Mon Sep 17 00:00:00 2001
From: Ravi Sinha <ravsinha@amazon.com>
Date: Tue, 25 Jun 2024 17:31:31 -0700
Subject: [PATCH 11/30] fixing log comments

---
 cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
index 77e7b0b57950..6c34b7c726dc 100644
--- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
+++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
@@ -312,7 +312,7 @@ func (m *asgCache) DeleteInstances(instances []*AwsInstanceRef) error {
 	// Check if there are any placeholder instances in the list.
 	if placeHolderInstancesCount > 0 {
 		// Log the check for placeholders in the ASG.
-		klog.V(4).Infof("Detected %d placeholder instance(s), checking recent scaling activity for ASG %s",
+		klog.V(4).Infof("Detected %d placeholder instance(s) in ASG %s",
 			placeHolderInstancesCount, commonAsg.Name)
 
 		asgNames := []string{commonAsg.Name}

From e62dd0d3ddfd826b3ceaa8dcd583d7826d47d04d Mon Sep 17 00:00:00 2001
From: Jon Huhn <huhnjon@gmail.com>
Date: Tue, 25 Jun 2024 16:51:03 +0000
Subject: [PATCH 12/30] Update Azure cluster-autoscaler e2e cluster template

---
 ...plate-prow-aks-aso-cluster-autoscaler.yaml | 153 ++++++++++++------
 1 file changed, 105 insertions(+), 48 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/azure/test/templates/cluster-template-prow-aks-aso-cluster-autoscaler.yaml b/cluster-autoscaler/cloudprovider/azure/test/templates/cluster-template-prow-aks-aso-cluster-autoscaler.yaml
index 0c1d712dc508..8cf989e9e174 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/templates/cluster-template-prow-aks-aso-cluster-autoscaler.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/test/templates/cluster-template-prow-aks-aso-cluster-autoscaler.yaml
@@ -19,6 +19,7 @@ metadata:
   name: ${CLUSTER_NAME}
   namespace: default
 spec:
+  version: ${KUBERNETES_VERSION}
   resources:
   - apiVersion: containerservice.azure.com/v1api20231001
     kind: ManagedCluster
@@ -52,7 +53,54 @@ spec:
         buildProvenance: ${BUILD_PROVENANCE}
         creationTimestamp: ${TIMESTAMP}
         jobName: ${JOB_NAME}
-  version: ${KUBERNETES_VERSION}
+  - apiVersion: managedidentity.azure.com/v1api20230131
+    kind: UserAssignedIdentity
+    metadata:
+      annotations:
+        serviceoperator.azure.com/credential-from: ${ASO_CREDENTIAL_SECRET_NAME}
+      name: ${CLUSTER_NAME}
+      namespace: default
+    spec:
+      location: ${AZURE_LOCATION}
+      operatorSpec:
+        configMaps:
+          principalId:
+            key: principal-id
+            name: ${CLUSTER_NAME}-identity
+      owner:
+        name: ${CLUSTER_NAME}
+  - apiVersion: managedidentity.azure.com/v1api20230131
+    kind: FederatedIdentityCredential
+    metadata:
+      annotations:
+        serviceoperator.azure.com/credential-from: ${ASO_CREDENTIAL_SECRET_NAME}
+      name: ${CLUSTER_NAME}
+      namespace: default
+    spec:
+      audiences:
+      - api://AzureADTokenExchange
+      issuerFromConfig:
+        key: issuer
+        name: ${CLUSTER_NAME}-oidc
+      owner:
+        name: ${CLUSTER_NAME}
+      subject: system:serviceaccount:${CLUSTER_AUTOSCALER_NAMESPACE:=default}:${CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME:=cluster-autoscaler}
+  - apiVersion: authorization.azure.com/v1api20220401
+    kind: RoleAssignment
+    metadata:
+      annotations:
+        serviceoperator.azure.com/credential-from: ${ASO_CREDENTIAL_SECRET_NAME}
+      name: ${CLUSTER_NAME}
+      namespace: default
+    spec:
+      owner:
+        armId: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/MC_${CLUSTER_NAME}_${CLUSTER_NAME}_${AZURE_LOCATION}
+      principalIdFromConfig:
+        key: principal-id
+        name: ${CLUSTER_NAME}-identity
+      roleDefinitionReference:
+        # Contributor
+        armId: /subscriptions/${AZURE_SUBSCRIPTION_ID}/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c
 ---
 apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
 kind: AzureASOManagedCluster
@@ -219,53 +267,62 @@ spec:
       type: VirtualMachineScaleSets
       vmSize: ${AZURE_AKS_NODE_MACHINE_TYPE:=Standard_D2s_v3}
 ---
-apiVersion: managedidentity.azure.com/v1api20230131
-kind: UserAssignedIdentity
-metadata:
-  annotations:
-    serviceoperator.azure.com/credential-from: ${ASO_CREDENTIAL_SECRET_NAME}
-  name: ${CLUSTER_NAME}
-  namespace: default
-spec:
-  location: ${AZURE_LOCATION}
-  operatorSpec:
-    configMaps:
-      principalId:
-        key: principal-id
-        name: ${CLUSTER_NAME}-identity
-  owner:
-    name: ${CLUSTER_NAME}
----
-apiVersion: managedidentity.azure.com/v1api20230131
-kind: FederatedIdentityCredential
+apiVersion: v1
+kind: Secret
 metadata:
-  annotations:
-    serviceoperator.azure.com/credential-from: ${ASO_CREDENTIAL_SECRET_NAME}
-  name: ${CLUSTER_NAME}
-  namespace: default
-spec:
-  audiences:
-  - api://AzureADTokenExchange
-  issuerFromConfig:
-    key: issuer
-    name: ${CLUSTER_NAME}-oidc
-  owner:
-    name: ${CLUSTER_NAME}
-  subject: system:serviceaccount:${CLUSTER_AUTOSCALER_NAMESPACE:=default}:${CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME:=cluster-autoscaler}
+  name: ${ASO_CREDENTIAL_SECRET_NAME}
+stringData:
+  AZURE_SUBSCRIPTION_ID: ${AZURE_SUBSCRIPTION_ID}
+  AZURE_TENANT_ID:       ${AZURE_TENANT_ID}
+  AZURE_CLIENT_ID:       ${AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY}
+  AUTH_MODE:             ${ASO_CREDENTIAL_SECRET_MODE:-workloadidentity}
 ---
-apiVersion: authorization.azure.com/v1api20220401
-kind: RoleAssignment
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
 metadata:
-  annotations:
-    serviceoperator.azure.com/credential-from: ${ASO_CREDENTIAL_SECRET_NAME}
-  name: ${CLUSTER_NAME}
-  namespace: default
-spec:
-  owner:
-    armId: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/MC_${CLUSTER_NAME}_${CLUSTER_NAME}_${AZURE_LOCATION}
-  principalIdFromConfig:
-    key: principal-id
-    name: ${CLUSTER_NAME}-identity
-  roleDefinitionReference:
-    # Contributor
-    armId: /subscriptions/${AZURE_SUBSCRIPTION_ID}/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c
+  name: capz-${CLUSTER_NAME}
+  labels:
+    cluster.x-k8s.io/aggregate-to-capz-manager: "true"
+rules:
+- apiGroups:
+  - authorization.azure.com
+  resources:
+  - roleassignments
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - authorization.azure.com
+  resources:
+  - roleassignments/status
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - managedidentity.azure.com
+  resources:
+  - userassignedidentities
+  - federatedidentitycredentials
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - managedidentity.azure.com
+  resources:
+  - userassignedidentities/status
+  - federatedidentitycredentials/status
+  verbs:
+  - get
+  - list
+  - watch

From 296e79a29227a3ae35fe4820219449e3f4263f7c Mon Sep 17 00:00:00 2001
From: Daniel Gutowski <danielgutowski@google.com>
Date: Tue, 28 May 2024 01:46:34 -0700
Subject: [PATCH 13/30] Update ProvReq annotations to new prefix

Use autoscaling.x-k8s.io rather than cluster-autoscaler.kubernetes.io
---
 .../autoscaling.x-k8s.io/v1beta1/types.go           |  4 ++++
 cluster-autoscaler/go.mod                           |  2 +-
 .../processors/provreq/pods_filter.go               | 13 +++++++------
 .../processors/provreq/pods_filter_test.go          |  8 +++++---
 .../proposals/provisioning-request.md               |  7 +++++--
 .../orchestrator/wrapper_orchestrator.go            |  4 ++--
 .../orchestrator/wrapper_orchestrator_test.go       |  4 ++--
 cluster-autoscaler/provisioningrequest/pods/pods.go | 13 +++++++------
 .../provisioningrequest/pods/pods_test.go           |  4 ++--
 9 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1/types.go b/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1/types.go
index 05f9fcbe5a49..cde79b1d6a5e 100644
--- a/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1/types.go
+++ b/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1/types.go
@@ -200,4 +200,8 @@ const (
 	// ProvisioningClassBestEffortAtomicScaleUp denotes that CA try to provision the capacity
 	// in an atomic manner.
 	ProvisioningClassBestEffortAtomicScaleUp string = "best-effort-atomic-scale-up.autoscaling.x-k8s.io"
+	// ProvisioningRequestPodAnnotationKey is a key used to annotate pods consuming provisioning request.
+	ProvisioningRequestPodAnnotationKey = "autoscaling.x-k8s.io/consume-provisioning-request"
+	// ProvisioningClassPodAnnotationKey is a key used to add annotation about Provisioning Class
+	ProvisioningClassPodAnnotationKey = "autoscaling.x-k8s.io/provisioning-class-name"
 )
diff --git a/cluster-autoscaler/go.mod b/cluster-autoscaler/go.mod
index 5c81dd53fa55..e470b6958e73 100644
--- a/cluster-autoscaler/go.mod
+++ b/cluster-autoscaler/go.mod
@@ -46,7 +46,7 @@ require (
 	k8s.io/api v0.31.0-alpha.2
 	k8s.io/apimachinery v0.31.0-alpha.2
 	k8s.io/apiserver v0.31.0-alpha.2
-	k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240606111128-34690b19c19b
+	k8s.io/autoscaler/cluster-autoscaler/apis v0.0.0-20240627115740-d52e4b9665d7
 	k8s.io/client-go v0.31.0-alpha.2
 	k8s.io/cloud-provider v0.30.0-alpha.3
 	k8s.io/cloud-provider-aws v1.27.0
diff --git a/cluster-autoscaler/processors/provreq/pods_filter.go b/cluster-autoscaler/processors/provreq/pods_filter.go
index a75f62dfe67d..fb61ce25f3f7 100644
--- a/cluster-autoscaler/processors/provreq/pods_filter.go
+++ b/cluster-autoscaler/processors/provreq/pods_filter.go
@@ -22,16 +22,14 @@ import (
 
 	apiv1 "k8s.io/api/core/v1"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/autoscaler/cluster-autoscaler/context"
 	"k8s.io/autoscaler/cluster-autoscaler/processors/pods"
+	provreqpods "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/pods"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
 )
 
-const (
-	// ProvisioningRequestPodAnnotationKey is an annotation on pod that indicate that pod was created by ProvisioningRequest.
-	ProvisioningRequestPodAnnotationKey = "cluster-autoscaler.kubernetes.io/consume-provisioning-request"
-	maxProvReqEvent                     = 50
-)
+const maxProvReqEvent = 50
 
 // EventManager is an interface for handling events for provisioning request.
 type EventManager interface {
@@ -102,6 +100,9 @@ func provisioningRequestName(pod *v1.Pod) (string, bool) {
 	if pod == nil || pod.Annotations == nil {
 		return "", false
 	}
-	provReqName, found := pod.Annotations[ProvisioningRequestPodAnnotationKey]
+	provReqName, found := pod.Annotations[v1beta1.ProvisioningRequestPodAnnotationKey]
+	if !found {
+		provReqName, found = pod.Annotations[provreqpods.DeprecatedProvisioningRequestPodAnnotationKey]
+	}
 	return provReqName, found
 }
diff --git a/cluster-autoscaler/processors/provreq/pods_filter_test.go b/cluster-autoscaler/processors/provreq/pods_filter_test.go
index 07c224dadac3..80977fcdd30c 100644
--- a/cluster-autoscaler/processors/provreq/pods_filter_test.go
+++ b/cluster-autoscaler/processors/provreq/pods_filter_test.go
@@ -24,17 +24,19 @@ import (
 	"github.com/stretchr/testify/assert"
 	apiv1 "k8s.io/api/core/v1"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/autoscaler/cluster-autoscaler/context"
+	"k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/pods"
 	. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
 	"k8s.io/client-go/tools/record"
 )
 
 func TestProvisioningRequestPodsFilter(t *testing.T) {
 	prPod1 := BuildTestPod("pr-pod-1", 500, 10)
-	prPod1.Annotations[ProvisioningRequestPodAnnotationKey] = "pr-class"
+	prPod1.Annotations[v1beta1.ProvisioningRequestPodAnnotationKey] = "pr-class"
 
 	prPod2 := BuildTestPod("pr-pod-2", 500, 10)
-	prPod2.Annotations[ProvisioningRequestPodAnnotationKey] = "pr-class-2"
+	prPod2.Annotations[pods.DeprecatedProvisioningRequestPodAnnotationKey] = "pr-class-2"
 
 	pod1 := BuildTestPod("pod-1", 500, 10)
 	pod2 := BuildTestPod("pod-2", 500, 10)
@@ -91,7 +93,7 @@ func TestEventManager(t *testing.T) {
 
 	for i := 0; i < 10; i++ {
 		prPod := BuildTestPod(fmt.Sprintf("pr-pod-%d", i), 10, 10)
-		prPod.Annotations[ProvisioningRequestPodAnnotationKey] = "pr-class"
+		prPod.Annotations[v1beta1.ProvisioningRequestPodAnnotationKey] = "pr-class"
 		unscheduledPods = append(unscheduledPods, prPod)
 	}
 	got, err := prFilter.Process(ctx, unscheduledPods)
diff --git a/cluster-autoscaler/proposals/provisioning-request.md b/cluster-autoscaler/proposals/provisioning-request.md
index 0540cd7d8dc0..b412d6645273 100644
--- a/cluster-autoscaler/proposals/provisioning-request.md
+++ b/cluster-autoscaler/proposals/provisioning-request.md
@@ -184,10 +184,13 @@ not required in ProvReq’s template, though can be specified):
 
 ```yaml
 annotations:
-    "cluster-autoscaler.kubernetes.io/provisioning-class-name": "provreq-class-name"
-    "cluster-autoscaler.kubernetes.io/consume-provisioning-request": "provreq-name"
+    "autoscaling.x-k8s.io/provisioning-class-name": "provreq-class-name"
+    "autoscaling.x-k8s.io/consume-provisioning-request": "provreq-name"
 ```
 
+Previous prosoal included annotations with prefix `cluster-autoscaler.kubernetes.io`
+but were deprecated as part of API reivew.
+
 If those are provided for the pods that consume the ProvReq with `check-capacity.kubernetes.io` class,
 the CA will not provision the capacity, even if it was needed (as some other pods might have been
 scheduled on it) and will result in visibility events passed to the ProvReq and pods.
diff --git a/cluster-autoscaler/provisioningrequest/orchestrator/wrapper_orchestrator.go b/cluster-autoscaler/provisioningrequest/orchestrator/wrapper_orchestrator.go
index 5c7f792e55b2..c74e4a1148ab 100644
--- a/cluster-autoscaler/provisioningrequest/orchestrator/wrapper_orchestrator.go
+++ b/cluster-autoscaler/provisioningrequest/orchestrator/wrapper_orchestrator.go
@@ -19,13 +19,13 @@ package orchestrator
 import (
 	appsv1 "k8s.io/api/apps/v1"
 	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
 	"k8s.io/autoscaler/cluster-autoscaler/context"
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaleup"
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaleup/orchestrator"
 	"k8s.io/autoscaler/cluster-autoscaler/estimator"
 	ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
-	"k8s.io/autoscaler/cluster-autoscaler/processors/provreq"
 	"k8s.io/autoscaler/cluster-autoscaler/processors/status"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
@@ -87,7 +87,7 @@ func (o *WrapperOrchestrator) ScaleUp(
 
 func splitOut(unschedulablePods []*apiv1.Pod) (provReqPods, regularPods []*apiv1.Pod) {
 	for _, pod := range unschedulablePods {
-		if _, ok := pod.Annotations[provreq.ProvisioningRequestPodAnnotationKey]; ok {
+		if _, ok := pod.Annotations[v1beta1.ProvisioningRequestPodAnnotationKey]; ok {
 			provReqPods = append(provReqPods, pod)
 		} else {
 			regularPods = append(regularPods, pod)
diff --git a/cluster-autoscaler/provisioningrequest/orchestrator/wrapper_orchestrator_test.go b/cluster-autoscaler/provisioningrequest/orchestrator/wrapper_orchestrator_test.go
index bf31f59f8b1b..814a213b340f 100644
--- a/cluster-autoscaler/provisioningrequest/orchestrator/wrapper_orchestrator_test.go
+++ b/cluster-autoscaler/provisioningrequest/orchestrator/wrapper_orchestrator_test.go
@@ -22,11 +22,11 @@ import (
 	"github.com/stretchr/testify/assert"
 	appsv1 "k8s.io/api/apps/v1"
 	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
 	"k8s.io/autoscaler/cluster-autoscaler/context"
 	"k8s.io/autoscaler/cluster-autoscaler/estimator"
 	ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
-	"k8s.io/autoscaler/cluster-autoscaler/processors/provreq"
 	"k8s.io/autoscaler/cluster-autoscaler/processors/status"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
@@ -53,7 +53,7 @@ func TestWrapperScaleUp(t *testing.T) {
 		BuildTestPod("pr-pod-2", 1, 100),
 	}
 	for _, pod := range provReqPods {
-		pod.Annotations[provreq.ProvisioningRequestPodAnnotationKey] = "true"
+		pod.Annotations[v1beta1.ProvisioningRequestPodAnnotationKey] = "true"
 	}
 	unschedulablePods := append(regularPods, provReqPods...)
 	_, err := o.ScaleUp(unschedulablePods, nil, nil, nil, false)
diff --git a/cluster-autoscaler/provisioningrequest/pods/pods.go b/cluster-autoscaler/provisioningrequest/pods/pods.go
index 712f85453ab1..99ee740f6945 100644
--- a/cluster-autoscaler/provisioningrequest/pods/pods.go
+++ b/cluster-autoscaler/provisioningrequest/pods/pods.go
@@ -23,15 +23,16 @@ import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1beta1"
 	"k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/provreqwrapper"
 	"k8s.io/kubernetes/pkg/controller"
 )
 
 const (
-	// ProvisioningRequestPodAnnotationKey is a key used to annotate pods consuming provisioning request.
-	ProvisioningRequestPodAnnotationKey = "cluster-autoscaler.kubernetes.io/consume-provisioning-request"
-	// ProvisioningClassPodAnnotationKey is a key used to add annotation about Provisioning Class
-	ProvisioningClassPodAnnotationKey = "cluster-autoscaler.kubernetes.io/provisioning-class-name"
+	// DeprecatedProvisioningRequestPodAnnotationKey is a key used to annotate pods consuming provisioning request.
+	DeprecatedProvisioningRequestPodAnnotationKey = "cluster-autoscaler.kubernetes.io/consume-provisioning-request"
+	// DeprecatedProvisioningClassPodAnnotationKey is a key used to add annotation about Provisioning Class
+	DeprecatedProvisioningClassPodAnnotationKey = "cluster-autoscaler.kubernetes.io/provisioning-class-name"
 )
 
 // PodsForProvisioningRequest returns a list of pods for which Provisioning
@@ -77,8 +78,8 @@ func populatePodFields(pr *provreqwrapper.ProvisioningRequest, pod *v1.Pod, i, j
 	if pod.Annotations == nil {
 		pod.Annotations = make(map[string]string)
 	}
-	pod.Annotations[ProvisioningRequestPodAnnotationKey] = pr.Name
-	pod.Annotations[ProvisioningClassPodAnnotationKey] = pr.Spec.ProvisioningClassName
+	pod.Annotations[v1beta1.ProvisioningRequestPodAnnotationKey] = pr.Name
+	pod.Annotations[v1beta1.ProvisioningClassPodAnnotationKey] = pr.Spec.ProvisioningClassName
 	pod.UID = types.UID(fmt.Sprintf("%s/%s", pod.Namespace, pod.Name))
 	pod.CreationTimestamp = pr.CreationTimestamp
 }
diff --git a/cluster-autoscaler/provisioningrequest/pods/pods_test.go b/cluster-autoscaler/provisioningrequest/pods/pods_test.go
index e33cc83e9422..c8c3e95c5261 100644
--- a/cluster-autoscaler/provisioningrequest/pods/pods_test.go
+++ b/cluster-autoscaler/provisioningrequest/pods/pods_test.go
@@ -42,8 +42,8 @@ func TestPodsForProvisioningRequest(t *testing.T) {
 				Namespace:    "test-namespace",
 				UID:          types.UID(fmt.Sprintf("test-namespace/%s", name)),
 				Annotations: map[string]string{
-					ProvisioningRequestPodAnnotationKey: prName,
-					ProvisioningClassPodAnnotationKey:   testProvisioningClassName,
+					v1beta1.ProvisioningRequestPodAnnotationKey: prName,
+					v1beta1.ProvisioningClassPodAnnotationKey:   testProvisioningClassName,
 				},
 				Labels:     map[string]string{},
 				Finalizers: []string{},

From 97419dbdb707f99a97e2b0adf34de3bbe00eb2d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Artur=20=C5=BByli=C5=84ski?= <azylinski@google.com>
Date: Fri, 28 Jun 2024 10:49:03 +0200
Subject: [PATCH 14/30] Fix: GCE FetchAvailableDiskTypes zones parsing

---
 .../gce/autoscaling_gce_client.go             |   8 +-
 .../gce/autoscaling_gce_client_test.go        |  33 ++++
 .../fixtures/diskTypes_aggregatedList.json    | 162 ++++++++++++++++++
 3 files changed, 202 insertions(+), 1 deletion(-)
 create mode 100644 cluster-autoscaler/cloudprovider/gce/fixtures/diskTypes_aggregatedList.json

diff --git a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
index 483420d2e9e3..dc5c43b402fa 100644
--- a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
+++ b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
@@ -695,7 +695,13 @@ func (client *autoscalingGceClientV1) FetchAvailableDiskTypes() (map[string][]st
 	if err := req.Pages(context.TODO(), func(page *gce.DiskTypeAggregatedList) error {
 		for _, diskTypesScopedList := range page.Items {
 			for _, diskType := range diskTypesScopedList.DiskTypes {
-				availableDiskTypes[diskType.Zone] = append(availableDiskTypes[diskType.Zone], diskType.Name)
+				// skip data for regions
+				if diskType.Zone == "" {
+					continue
+				}
+				// convert URL of the zone, into the short name, e.g. us-central1-a
+				zone := path.Base(diskType.Zone)
+				availableDiskTypes[zone] = append(availableDiskTypes[zone], diskType.Name)
 			}
 		}
 		return nil
diff --git a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
index 1bf27496b574..1f69573b8a37 100644
--- a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
@@ -21,6 +21,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/http"
+	"os"
 	"regexp"
 	"testing"
 	"time"
@@ -563,6 +564,31 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 	}
 }
 
+func TestFetchAvailableDiskTypes(t *testing.T) {
+	server := test_util.NewHttpServerMock()
+	defer server.Close()
+	g := newTestAutoscalingGceClient(t, "project-id", server.URL, "")
+
+	// ref: https://cloud.google.com/compute/docs/reference/rest/v1/diskTypes/aggregatedList
+	getDiskTypesAggregatedListOKResponse, _ := os.ReadFile("fixtures/diskTypes_aggregatedList.json")
+	server.On("handle", "/projects/project-id/aggregated/diskTypes").Return(string(getDiskTypesAggregatedListOKResponse)).Times(1)
+
+	t.Run("correctly parse a response", func(t *testing.T) {
+		want := map[string][]string{
+			// "us-central1" region should be skipped
+			"us-central1-a": {"local-ssd", "pd-balanced", "pd-ssd", "pd-standard"},
+			"us-central1-b": {"hyperdisk-balanced", "hyperdisk-extreme", "hyperdisk-throughput", "local-ssd", "pd-balanced", "pd-extreme", "pd-ssd", "pd-standard"},
+		}
+
+		got, err := g.FetchAvailableDiskTypes()
+
+		assert.NoError(t, err)
+		if diff := cmp.Diff(want, got, cmpopts.EquateErrors()); diff != "" {
+			t.Errorf("FetchAvailableDiskTypes(): err diff (-want +got):\n%s", diff)
+		}
+	})
+}
+
 func TestUserAgent(t *testing.T) {
 	server := test_util.NewHttpServerMock(test_util.MockFieldUserAgent, test_util.MockFieldResponse)
 	defer server.Close()
@@ -748,6 +774,13 @@ func TestAutoscalingClientTimeouts(t *testing.T) {
 			},
 			httpTimeout: instantTimeout,
 		},
+		"FetchAvailableDiskTypes_HttpClientTimeout": {
+			clientFunc: func(client *autoscalingGceClientV1) error {
+				_, err := client.FetchAvailableDiskTypes()
+				return err
+			},
+			httpTimeout: instantTimeout,
+		},
 		"FetchMigsWithName_HttpClientTimeout": {
 			clientFunc: func(client *autoscalingGceClientV1) error {
 				_, err := client.FetchMigsWithName("", &regexp.Regexp{})
diff --git a/cluster-autoscaler/cloudprovider/gce/fixtures/diskTypes_aggregatedList.json b/cluster-autoscaler/cloudprovider/gce/fixtures/diskTypes_aggregatedList.json
new file mode 100644
index 000000000000..782aecdb9245
--- /dev/null
+++ b/cluster-autoscaler/cloudprovider/gce/fixtures/diskTypes_aggregatedList.json
@@ -0,0 +1,162 @@
+{
+    "kind": "compute#diskTypeAggregatedList",
+    "id": "projects/project-id/aggregated/diskTypes",
+    "items": {
+        "regions/us-central1": {
+            "diskTypes": [
+                {
+                    "kind": "compute#diskType",
+                    "id": "30007",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "pd-balanced",
+                    "description": "Balanced Persistent Disk",
+                    "validDiskSize": "10GB-65536GB",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/regions/us-central1/diskTypes/pd-balanced",
+                    "defaultDiskSizeGb": "100",
+                    "region": "https://www.googleapis.com/compute/v1/projects/project-id/regions/us-central1"
+                }
+            ]
+        },
+        "zones/us-central1-a": {
+            "diskTypes": [
+                {
+                    "kind": "compute#diskType",
+                    "id": "30003",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "local-ssd",
+                    "description": "Local SSD",
+                    "validDiskSize": "375GB-375GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a/diskTypes/local-ssd",
+                    "defaultDiskSizeGb": "375"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30007",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "pd-balanced",
+                    "description": "Balanced Persistent Disk",
+                    "validDiskSize": "10GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a/diskTypes/pd-balanced",
+                    "defaultDiskSizeGb": "100"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30002",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "pd-ssd",
+                    "description": "SSD Persistent Disk",
+                    "validDiskSize": "10GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a/diskTypes/pd-ssd",
+                    "defaultDiskSizeGb": "100"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30001",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "pd-standard",
+                    "description": "Standard Persistent Disk",
+                    "validDiskSize": "10GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a/diskTypes/pd-standard",
+                    "defaultDiskSizeGb": "500"
+                }
+            ]
+        },
+        "zones/us-central1-b": {
+            "diskTypes": [
+                {
+                    "kind": "compute#diskType",
+                    "id": "30014",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "hyperdisk-balanced",
+                    "description": "Hyperdisk Balanced Persistent Disk",
+                    "validDiskSize": "4GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b/diskTypes/hyperdisk-balanced",
+                    "defaultDiskSizeGb": "100"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30012",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "hyperdisk-extreme",
+                    "description": "Hyperdisk Extreme Persistent Disk",
+                    "validDiskSize": "64GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b/diskTypes/hyperdisk-extreme",
+                    "defaultDiskSizeGb": "1000"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30013",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "hyperdisk-throughput",
+                    "description": "Hyperdisk Throughput Persistent Disk",
+                    "validDiskSize": "2048GB-32768GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b/diskTypes/hyperdisk-throughput",
+                    "defaultDiskSizeGb": "2048"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30003",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "local-ssd",
+                    "description": "Local SSD",
+                    "validDiskSize": "375GB-375GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b/diskTypes/local-ssd",
+                    "defaultDiskSizeGb": "375"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30007",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "pd-balanced",
+                    "description": "Balanced Persistent Disk",
+                    "validDiskSize": "10GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b/diskTypes/pd-balanced",
+                    "defaultDiskSizeGb": "100"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30008",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "pd-extreme",
+                    "description": "Extreme Persistent Disk",
+                    "validDiskSize": "500GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b/diskTypes/pd-extreme",
+                    "defaultDiskSizeGb": "1000"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30002",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "pd-ssd",
+                    "description": "SSD Persistent Disk",
+                    "validDiskSize": "10GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b/diskTypes/pd-ssd",
+                    "defaultDiskSizeGb": "100"
+                },
+                {
+                    "kind": "compute#diskType",
+                    "id": "30001",
+                    "creationTimestamp": "1969-12-31T16:00:00.000-08:00",
+                    "name": "pd-standard",
+                    "description": "Standard Persistent Disk",
+                    "validDiskSize": "10GB-65536GB",
+                    "zone": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b",
+                    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-b/diskTypes/pd-standard",
+                    "defaultDiskSizeGb": "500"
+                }
+            ]
+        }
+    },
+    "selfLink": "https://www.googleapis.com/compute/v1/projects/project-id/aggregated/diskTypes"
+}
\ No newline at end of file

From b689df765dc171ad6af475bd9bd5093b23c6e377 Mon Sep 17 00:00:00 2001
From: Marco Voelz <voelzmo@users.noreply.github.com>
Date: Fri, 28 Jun 2024 22:22:04 +0200
Subject: [PATCH 15/30] Remove unsupported parameter from admission-controller

---
 .../deploy/admission-controller-deployment.yaml                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vertical-pod-autoscaler/deploy/admission-controller-deployment.yaml b/vertical-pod-autoscaler/deploy/admission-controller-deployment.yaml
index f518392697be..64843a74d014 100644
--- a/vertical-pod-autoscaler/deploy/admission-controller-deployment.yaml
+++ b/vertical-pod-autoscaler/deploy/admission-controller-deployment.yaml
@@ -27,7 +27,7 @@ spec:
               valueFrom:
                 fieldRef:
                   fieldPath: metadata.namespace
-          args: ["--v=4", "--stderrthreshold=info", "--reload-cert"]
+          args: ["--v=4", "--stderrthreshold=info"]
           volumeMounts:
             - name: tls-certs
               mountPath: "/etc/tls-certs"

From fcba0533d4d378a67cdc4169b7c57bb104260006 Mon Sep 17 00:00:00 2001
From: Jon Huhn <huhnjon@gmail.com>
Date: Fri, 28 Jun 2024 21:59:16 +0000
Subject: [PATCH 16/30] Add Azure cluster-autoscaler e2e test

---
 .../cloudprovider/azure/test/Makefile         |  11 +-
 .../azure/test/e2e/azure_test.go              | 143 +++++++++
 .../azure/test/e2e/e2e_suite_test.go          |  90 ++++++
 .../cloudprovider/azure/test/go.mod           |  84 +++++
 .../cloudprovider/azure/test/go.sum           | 299 ++++++++++++++++++
 ...plate-prow-aks-aso-cluster-autoscaler.yaml |   9 +-
 6 files changed, 627 insertions(+), 9 deletions(-)
 create mode 100644 cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
 create mode 100644 cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
 create mode 100644 cluster-autoscaler/cloudprovider/azure/test/go.mod
 create mode 100644 cluster-autoscaler/cloudprovider/azure/test/go.sum

diff --git a/cluster-autoscaler/cloudprovider/azure/test/Makefile b/cluster-autoscaler/cloudprovider/azure/test/Makefile
index 9d5cd99a08b2..979fea3f5082 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/Makefile
+++ b/cluster-autoscaler/cloudprovider/azure/test/Makefile
@@ -30,7 +30,7 @@ CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME?=cluster-autoscaler
 install-e2e: $(HELM)
 	$(MAKE) -C $(CAS_ROOT) build-arch-$(GOARCH) make-image-arch-$(GOARCH)
 	docker push $(IMAGE)-$(GOARCH):$(TAG)
-	$(HELM) install cluster-autoscaler $(CAS_CHART) \
+	$(HELM) upgrade --install cluster-autoscaler $(CAS_CHART) \
 		--namespace $(CLUSTER_AUTOSCALER_NAMESPACE) --create-namespace \
 		--set cloudProvider=azure \
 		--set azureTenantID=$(AZURE_TENANT_ID) \
@@ -46,8 +46,15 @@ install-e2e: $(HELM)
 		--set image.repository=$(IMAGE)-$(GOARCH) \
 		--set image.tag=$(TAG) \
 		--set image.pullPolicy=Always \
+		--set extraArgs.scale-down-delay-after-add=10s \
+		--set extraArgs.scale-down-unneeded-time=10s \
+		--set extraArgs.scale-down-candidates-pool-ratio=1.0 \
+		--set extraArgs.unremovable-node-recheck-timeout=10s \
+		--set extraArgs.skip-nodes-with-system-pods=false \
+		--set extraArgs.skip-nodes-with-local-storage=false \
 		--wait
 
 .PHONY: test-e2e
 test-e2e: install-e2e
-	# TODO
+	go run github.com/onsi/ginkgo/v2/ginkgo e2e -v -- \
+		-resource-group="$$(KUBECONFIG= kubectl get managedclusters -o jsonpath='{.items[0].status.nodeResourceGroup}')"
diff --git a/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go b/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
new file mode 100644
index 000000000000..51938a4dcd10
--- /dev/null
+++ b/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
@@ -0,0 +1,143 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e_test
+
+import (
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/utils/ptr"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+var _ = Describe("Azure Provider", func() {
+	var (
+		namespace *corev1.Namespace
+	)
+
+	BeforeEach(func() {
+		Eventually(allVMSSStable, "10m", "30s").Should(Succeed())
+
+		namespace = &corev1.Namespace{
+			ObjectMeta: metav1.ObjectMeta{
+				GenerateName: "azure-e2e-",
+			},
+		}
+		Expect(k8s.Create(ctx, namespace)).To(Succeed())
+	})
+
+	AfterEach(func() {
+		Expect(k8s.Delete(ctx, namespace)).To(Succeed())
+		Eventually(func() bool {
+			err := k8s.Get(ctx, client.ObjectKeyFromObject(namespace), &corev1.Namespace{})
+			GinkgoLogr.Info("got err", "error", err)
+			return apierrors.IsNotFound(err)
+		}, "1m", "5s").Should(BeTrue(), "Namespace "+namespace.Name+" still exists")
+	})
+
+	It("scales up AKS node pools when pending Pods exist", func() {
+		nodes := &corev1.NodeList{}
+		Expect(k8s.List(ctx, nodes)).To(Succeed())
+		nodeCountBefore := len(nodes.Items)
+
+		By("Creating 100 Pods")
+		// https://raw.githubusercontent.com/kubernetes/website/main/content/en/examples/application/php-apache.yaml
+		deploy := &appsv1.Deployment{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "php-apache",
+				Namespace: namespace.Name,
+			},
+			Spec: appsv1.DeploymentSpec{
+				Selector: &metav1.LabelSelector{
+					MatchLabels: map[string]string{
+						"run": "php-apache",
+					},
+				},
+				Replicas: ptr.To[int32](100),
+				Template: corev1.PodTemplateSpec{
+					ObjectMeta: metav1.ObjectMeta{
+						Labels: map[string]string{
+							"run": "php-apache",
+						},
+					},
+					Spec: corev1.PodSpec{
+						Containers: []corev1.Container{
+							{
+								Name:  "php-apache",
+								Image: "registry.k8s.io/hpa-example",
+								Resources: corev1.ResourceRequirements{
+									Limits: corev1.ResourceList{
+										corev1.ResourceCPU: resource.MustParse("500m"),
+									},
+									Requests: corev1.ResourceList{
+										corev1.ResourceCPU: resource.MustParse("200m"),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		}
+		Expect(k8s.Create(ctx, deploy)).To(Succeed())
+
+		By("Waiting for more Ready Nodes to exist")
+		Eventually(func() (int, error) {
+			readyCount := 0
+			nodes := &corev1.NodeList{}
+			if err := k8s.List(ctx, nodes); err != nil {
+				return 0, err
+			}
+			for _, node := range nodes.Items {
+				for _, cond := range node.Status.Conditions {
+					if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
+						readyCount++
+						break
+					}
+				}
+			}
+			return readyCount, nil
+		}, "10m", "10s").Should(BeNumerically(">", nodeCountBefore))
+
+		Eventually(allVMSSStable, "10m", "30s").Should(Succeed())
+
+		By("Deleting 100 Pods")
+		Expect(k8s.Delete(ctx, deploy)).To(Succeed())
+
+		By("Waiting for the original number of Nodes to be Ready")
+		Eventually(func() (int, error) {
+			readyCount := 0
+			nodes := &corev1.NodeList{}
+			if err := k8s.List(ctx, nodes); err != nil {
+				return 0, err
+			}
+			for _, node := range nodes.Items {
+				for _, cond := range node.Status.Conditions {
+					if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
+						readyCount++
+						break
+					}
+				}
+			}
+			return readyCount, nil
+		}, "10m", "10s").Should(Equal(nodeCountBefore))
+	})
+})
diff --git a/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go b/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
new file mode 100644
index 000000000000..03aa817b844d
--- /dev/null
+++ b/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
@@ -0,0 +1,90 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e_test
+
+import (
+	"context"
+	"flag"
+	"os"
+	"testing"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/cli-runtime/pkg/genericclioptions"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+var (
+	ctx           = context.Background()
+	vmss          *armcompute.VirtualMachineScaleSetsClient
+	k8s           client.Client
+	resourceGroup string
+)
+
+func init() {
+	flag.StringVar(&resourceGroup, "resource-group", "", "resource group containing cluster-autoscaler-managed resources")
+}
+
+func TestE2E(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "e2e Suite")
+}
+
+var _ = BeforeSuite(func() {
+	azCred, err := azidentity.NewDefaultAzureCredential(nil)
+	Expect(err).NotTo(HaveOccurred())
+	vmss, err = armcompute.NewVirtualMachineScaleSetsClient(os.Getenv("AZURE_SUBSCRIPTION_ID"), azCred, nil)
+	Expect(err).NotTo(HaveOccurred())
+
+	k8sConfig := genericclioptions.NewConfigFlags(false)
+	restConfig, err := k8sConfig.ToRESTConfig()
+	Expect(err).NotTo(HaveOccurred())
+	k8s, err = client.New(restConfig, client.Options{})
+	Expect(err).NotTo(HaveOccurred())
+})
+
+func allVMSSStable(g Gomega) {
+	pager := vmss.NewListPager(resourceGroup, nil)
+	expectedNodes := 0
+	for pager.More() {
+		page, err := pager.NextPage(ctx)
+		g.Expect(err).NotTo(HaveOccurred())
+		for _, scaleset := range page.Value {
+			g.Expect(*scaleset.Properties.ProvisioningState).To(Equal("Succeeded"))
+			expectedNodes += int(*scaleset.SKU.Capacity)
+		}
+	}
+
+	nodes := &corev1.NodeList{}
+	g.Expect(k8s.List(ctx, nodes)).To(Succeed())
+	g.Expect(nodes.Items).To(SatisfyAll(
+		HaveLen(int(expectedNodes)),
+		ContainElements(Satisfy(func(node corev1.Node) bool {
+			ready := false
+			for _, cond := range node.Status.Conditions {
+				if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
+					ready = true
+					break
+				}
+			}
+			return ready
+		})),
+	))
+}
diff --git a/cluster-autoscaler/cloudprovider/azure/test/go.mod b/cluster-autoscaler/cloudprovider/azure/test/go.mod
new file mode 100644
index 000000000000..1ba78f1dbb40
--- /dev/null
+++ b/cluster-autoscaler/cloudprovider/azure/test/go.mod
@@ -0,0 +1,84 @@
+module k8s.io/autoscaler/cluster-autoscaler/cloudprovider/azure/test
+
+go 1.22.3
+
+require (
+	github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0
+	github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0
+	github.com/onsi/ginkgo/v2 v2.19.0
+	github.com/onsi/gomega v1.33.1
+	k8s.io/api v0.30.2
+	k8s.io/apimachinery v0.30.2
+	k8s.io/cli-runtime v0.30.2
+	k8s.io/utils v0.0.0-20230726121419-3b25d923346b
+	sigs.k8s.io/controller-runtime v0.18.4
+)
+
+require (
+	github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 // indirect
+	github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 // indirect
+	github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
+	github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
+	github.com/evanphx/json-patch v4.12.0+incompatible // indirect
+	github.com/evanphx/json-patch/v5 v5.9.0 // indirect
+	github.com/go-errors/errors v1.4.2 // indirect
+	github.com/go-logr/logr v1.4.1 // indirect
+	github.com/go-openapi/jsonpointer v0.19.6 // indirect
+	github.com/go-openapi/jsonreference v0.20.2 // indirect
+	github.com/go-openapi/swag v0.22.3 // indirect
+	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
+	github.com/google/btree v1.0.1 // indirect
+	github.com/google/gnostic-models v0.6.8 // indirect
+	github.com/google/go-cmp v0.6.0 // indirect
+	github.com/google/gofuzz v1.2.0 // indirect
+	github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
+	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
+	github.com/imdario/mergo v0.3.6 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/kylelemons/godebug v1.1.0 // indirect
+	github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
+	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
+	github.com/pkg/errors v0.9.1 // indirect
+	github.com/spf13/cobra v1.7.0 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/xlab/treeprint v1.2.0 // indirect
+	go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
+	golang.org/x/crypto v0.24.0 // indirect
+	golang.org/x/net v0.26.0 // indirect
+	golang.org/x/oauth2 v0.12.0 // indirect
+	golang.org/x/sync v0.7.0 // indirect
+	golang.org/x/sys v0.21.0 // indirect
+	golang.org/x/term v0.21.0 // indirect
+	golang.org/x/text v0.16.0 // indirect
+	golang.org/x/time v0.3.0 // indirect
+	golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
+	google.golang.org/appengine v1.6.7 // indirect
+	google.golang.org/protobuf v1.33.0 // indirect
+	gopkg.in/inf.v0 v0.9.1 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+	k8s.io/client-go v0.30.2 // indirect
+	k8s.io/klog/v2 v2.120.1 // indirect
+	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
+	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
+	sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 // indirect
+	sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
+	sigs.k8s.io/yaml v1.3.0 // indirect
+)
diff --git a/cluster-autoscaler/cloudprovider/azure/test/go.sum b/cluster-autoscaler/cloudprovider/azure/test/go.sum
new file mode 100644
index 000000000000..e90591e7109a
--- /dev/null
+++ b/cluster-autoscaler/cloudprovider/azure/test/go.sum
@@ -0,0 +1,299 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 h1:E+OJmp2tPvt1W+amx48v1eqbjDYsgN+RzP4q16yV5eM=
+github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1/go.mod h1:a6xsAQUZg+VsS3TJ05SRp524Hs4pZ/AeFSr5ENf0Yjo=
+github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc=
+github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg=
+github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 h1:jBQA3cKT4L2rWMpgE7Yt3Hwh2aUj8KXjIGLxjHeYNNo=
+github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0/go.mod h1:4OG6tQ9EOP/MT0NMjDlRzWoVFxfu9rN9B2X+tlSVktg=
+github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0 h1:LkHbJbgF3YyvC53aqYGR+wWQDn2Rdp9AQdGndf9QvY4=
+github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0/go.mod h1:QyiQdW4f4/BIfB8ZutZ2s+28RAgfa/pT+zS++ZHyM1I=
+github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do=
+github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0/go.mod h1:LRr2FzBTQlONPPa5HREE5+RjSCTXl7BwOvYOaWTqCaI=
+github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1 h1:7CBQ+Ei8SP2c6ydQTGCCrS35bDxgTMfoP2miAwK++OU=
+github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1/go.mod h1:c/wcGeGx5FUPbM/JltUYHZcKmigwyVLJlDq+4HdtXaw=
+github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8=
+github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
+github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU=
+github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
+github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
+github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84=
+github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
+github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg=
+github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
+github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
+github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
+github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
+github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
+github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
+github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
+github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
+github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
+github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
+github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g=
+github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk=
+github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4=
+github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA=
+github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
+github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
+github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg=
+github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 h1:pdN6V1QBWetyv/0+wjACpqVH+eVULgEjkurDLq3goeM=
+github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
+github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
+github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
+github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
+github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
+github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0=
+github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
+github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
+github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
+github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
+github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
+github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
+github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
+github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
+github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
+github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
+github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
+github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY=
+go.starlark.net v0.0.0-20230525235612-a134d8f9ddca/go.mod h1:jxU+3+j+71eXOW14274+SmmuW82qJzl6iZSeqEtTGds=
+go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
+go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
+go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo=
+go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
+golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA=
+golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
+golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4=
+golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
+golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
+golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
+golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
+golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
+google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+k8s.io/api v0.30.2 h1:+ZhRj+28QT4UOH+BKznu4CBgPWgkXO7XAvMcMl0qKvI=
+k8s.io/api v0.30.2/go.mod h1:ULg5g9JvOev2dG0u2hig4Z7tQ2hHIuS+m8MNZ+X6EmI=
+k8s.io/apiextensions-apiserver v0.30.1 h1:4fAJZ9985BmpJG6PkoxVRpXv9vmPUOVzl614xarePws=
+k8s.io/apiextensions-apiserver v0.30.1/go.mod h1:R4GuSrlhgq43oRY9sF2IToFh7PVlF1JjfWdoG3pixk4=
+k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg=
+k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc=
+k8s.io/cli-runtime v0.30.2 h1:ooM40eEJusbgHNEqnHziN9ZpLN5U4WcQGsdLKVxpkKE=
+k8s.io/cli-runtime v0.30.2/go.mod h1:Y4g/2XezFyTATQUbvV5WaChoUGhojv/jZAtdp5Zkm0A=
+k8s.io/client-go v0.30.2 h1:sBIVJdojUNPDU/jObC+18tXWcTJVcwyqS9diGdWHk50=
+k8s.io/client-go v0.30.2/go.mod h1:JglKSWULm9xlJLx4KCkfLLQ7XwtlbflV6uFFSHTMgVs=
+k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw=
+k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
+k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI=
+k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHvm5BZw=
+sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg=
+sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
+sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
+sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 h1:XX3Ajgzov2RKUdc5jW3t5jwY7Bo7dcRm+tFxT+NfgY0=
+sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3/go.mod h1:9n16EZKMhXBNSiUC5kSdFQJkdH3zbxS/JoO619G1VAY=
+sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 h1:W6cLQc5pnqM7vh3b7HvGNfXrJ/xL6BDMS0v1V/HHg5U=
+sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3/go.mod h1:JWP1Fj0VWGHyw3YUPjXSQnRnrwezrZSrApfX5S0nIag=
+sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
+sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
+sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
+sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
diff --git a/cluster-autoscaler/cloudprovider/azure/test/templates/cluster-template-prow-aks-aso-cluster-autoscaler.yaml b/cluster-autoscaler/cloudprovider/azure/test/templates/cluster-template-prow-aks-aso-cluster-autoscaler.yaml
index 8cf989e9e174..fc17628ee2bd 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/templates/cluster-template-prow-aks-aso-cluster-autoscaler.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/test/templates/cluster-template-prow-aks-aso-cluster-autoscaler.yaml
@@ -131,7 +131,7 @@ metadata:
   namespace: default
 spec:
   clusterName: ${CLUSTER_NAME}
-  replicas: ${WORKER_MACHINE_COUNT:=2}
+  replicas: 1
   template:
     metadata: {}
     spec:
@@ -162,11 +162,6 @@ spec:
       mode: System
       owner:
         name: ${CLUSTER_NAME}
-      tags:
-        cluster-autoscaler-enabled: "true"
-        cluster-autoscaler-name: ${CLUSTER_NAME}
-        max: "5"
-        min: "1"
       type: VirtualMachineScaleSets
       vmSize: ${AZURE_AKS_NODE_MACHINE_TYPE:=Standard_D2s_v3}
 ---
@@ -179,7 +174,7 @@ metadata:
   namespace: default
 spec:
   clusterName: ${CLUSTER_NAME}
-  replicas: ${WORKER_MACHINE_COUNT:=2}
+  replicas: 1
   template:
     metadata: {}
     spec:

From 999dc875746561692002e1b7d75a2cdd4ca0ed8d Mon Sep 17 00:00:00 2001
From: Jon Huhn <huhnjon@gmail.com>
Date: Mon, 1 Jul 2024 16:47:31 +0000
Subject: [PATCH 17/30] Update Azure cluster-autoscaler e2e test

---
 .../cloudprovider/azure/test/.gitignore       |  1 +
 .../cloudprovider/azure/test/Makefile         |  4 ++-
 .../azure/test/e2e/azure_test.go              | 28 +++++++++----------
 .../azure/test/e2e/e2e_suite_test.go          |  6 ++--
 4 files changed, 19 insertions(+), 20 deletions(-)
 create mode 100644 cluster-autoscaler/cloudprovider/azure/test/.gitignore

diff --git a/cluster-autoscaler/cloudprovider/azure/test/.gitignore b/cluster-autoscaler/cloudprovider/azure/test/.gitignore
new file mode 100644
index 000000000000..9c2b30a747bf
--- /dev/null
+++ b/cluster-autoscaler/cloudprovider/azure/test/.gitignore
@@ -0,0 +1 @@
+_artifacts
diff --git a/cluster-autoscaler/cloudprovider/azure/test/Makefile b/cluster-autoscaler/cloudprovider/azure/test/Makefile
index 979fea3f5082..c5f117714b24 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/Makefile
+++ b/cluster-autoscaler/cloudprovider/azure/test/Makefile
@@ -54,7 +54,9 @@ install-e2e: $(HELM)
 		--set extraArgs.skip-nodes-with-local-storage=false \
 		--wait
 
+ARTIFACTS?=_artifacts
+
 .PHONY: test-e2e
 test-e2e: install-e2e
-	go run github.com/onsi/ginkgo/v2/ginkgo e2e -v -- \
+	go run github.com/onsi/ginkgo/v2/ginkgo -v --trace --output-dir "$(ARTIFACTS)" --junit-report="junit.e2e_suite.1.xml" e2e -- \
 		-resource-group="$$(KUBECONFIG= kubectl get managedclusters -o jsonpath='{.items[0].status.nodeResourceGroup}')"
diff --git a/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go b/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
index 51938a4dcd10..6129570f1f73 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
@@ -48,7 +48,6 @@ var _ = Describe("Azure Provider", func() {
 		Expect(k8s.Delete(ctx, namespace)).To(Succeed())
 		Eventually(func() bool {
 			err := k8s.Get(ctx, client.ObjectKeyFromObject(namespace), &corev1.Namespace{})
-			GinkgoLogr.Info("got err", "error", err)
 			return apierrors.IsNotFound(err)
 		}, "1m", "5s").Should(BeTrue(), "Namespace "+namespace.Name+" still exists")
 	})
@@ -123,21 +122,20 @@ var _ = Describe("Azure Provider", func() {
 		Expect(k8s.Delete(ctx, deploy)).To(Succeed())
 
 		By("Waiting for the original number of Nodes to be Ready")
-		Eventually(func() (int, error) {
-			readyCount := 0
+		Eventually(func(g Gomega) {
 			nodes := &corev1.NodeList{}
-			if err := k8s.List(ctx, nodes); err != nil {
-				return 0, err
-			}
-			for _, node := range nodes.Items {
-				for _, cond := range node.Status.Conditions {
-					if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
-						readyCount++
-						break
+			g.Expect(k8s.List(ctx, nodes)).To(Succeed())
+			g.Expect(nodes.Items).To(SatisfyAll(
+				HaveLen(nodeCountBefore),
+				ContainElements(Satisfy(func(node corev1.Node) bool {
+					for _, cond := range node.Status.Conditions {
+						if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
+							return true
+						}
 					}
-				}
-			}
-			return readyCount, nil
-		}, "10m", "10s").Should(Equal(nodeCountBefore))
+					return false
+				})),
+			))
+		}, "20m", "10s").Should(Succeed())
 	})
 })
diff --git a/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go b/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
index 03aa817b844d..50c649737387 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
@@ -77,14 +77,12 @@ func allVMSSStable(g Gomega) {
 	g.Expect(nodes.Items).To(SatisfyAll(
 		HaveLen(int(expectedNodes)),
 		ContainElements(Satisfy(func(node corev1.Node) bool {
-			ready := false
 			for _, cond := range node.Status.Conditions {
 				if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
-					ready = true
-					break
+					return true
 				}
 			}
-			return ready
+			return false
 		})),
 	))
 }

From 3f18f060a0471dd31c4b9c42f5e1e909ca228a53 Mon Sep 17 00:00:00 2001
From: Jon Huhn <huhnjon@gmail.com>
Date: Mon, 1 Jul 2024 22:15:03 +0000
Subject: [PATCH 18/30] cluster-autoscaler Azure e2e: move Helm to ginkgo

---
 .../cloudprovider/azure/test/Makefile         |  57 +--
 .../azure/test/e2e/azure_test.go              |  11 +
 .../azure/test/e2e/e2e_suite_test.go          | 101 +++++-
 .../cloudprovider/azure/test/go.mod           |  87 ++++-
 .../cloudprovider/azure/test/go.sum           | 330 +++++++++++++++++-
 5 files changed, 515 insertions(+), 71 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/azure/test/Makefile b/cluster-autoscaler/cloudprovider/azure/test/Makefile
index c5f117714b24..b49a6044325e 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/Makefile
+++ b/cluster-autoscaler/cloudprovider/azure/test/Makefile
@@ -1,62 +1,25 @@
 REPO_ROOT:=$(shell git rev-parse --show-toplevel)
 CAS_ROOT:=$(REPO_ROOT)/cluster-autoscaler
-CAS_CHART:=$(REPO_ROOT)/charts/cluster-autoscaler
 
 include $(CAS_ROOT)/Makefile
 
-TOOLS_BIN_DIR := $(abspath bin)
-
-export PATH := $(TOOLS_BIN_DIR):$(PATH)
-
-HELM_VER := v3.15.2
-HELM_BIN := helm
-HELM := $(TOOLS_BIN_DIR)/$(HELM_BIN)-$(HELM_VER)
-
-helm: $(HELM)
-
-$(HELM):
-	mkdir -p $(TOOLS_BIN_DIR)
-	rm -f "$(TOOLS_BIN_DIR)/$(HELM_BIN)*"
-	curl --retry 3 -fsSL -o $(TOOLS_BIN_DIR)/get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
-	chmod 700 $(TOOLS_BIN_DIR)/get_helm.sh
-	USE_SUDO=false HELM_INSTALL_DIR=$(TOOLS_BIN_DIR) DESIRED_VERSION=$(HELM_VER) BINARY_NAME=$(HELM_BIN)-$(HELM_VER) $(TOOLS_BIN_DIR)/get_helm.sh
-	ln -sf $(HELM) $(TOOLS_BIN_DIR)/$(HELM_BIN)
-	rm -f $(TOOLS_BIN_DIR)/get_helm.sh
-
 CLUSTER_AUTOSCALER_NAMESPACE?=default
 CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME?=cluster-autoscaler
 
-.PHONY: install-e2e
-install-e2e: $(HELM)
+.PHONY: build-e2e
+build-e2e:
 	$(MAKE) -C $(CAS_ROOT) build-arch-$(GOARCH) make-image-arch-$(GOARCH)
 	docker push $(IMAGE)-$(GOARCH):$(TAG)
-	$(HELM) upgrade --install cluster-autoscaler $(CAS_CHART) \
-		--namespace $(CLUSTER_AUTOSCALER_NAMESPACE) --create-namespace \
-		--set cloudProvider=azure \
-		--set azureTenantID=$(AZURE_TENANT_ID) \
-		--set azureSubscriptionID=$(AZURE_SUBSCRIPTION_ID) \
-		--set azureUseWorkloadIdentityExtension=true \
-		--set-string podLabels."azure\.workload\.identity/use"=true \
-		--set rbac.serviceAccount.name=$(CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME) \
-		--set rbac.serviceAccount.annotations."azure\.workload\.identity/tenant-id"=$(AZURE_TENANT_ID) \
-		--set rbac.serviceAccount.annotations."azure\.workload\.identity/client-id"="$$(KUBECONFIG= kubectl get userassignedidentities -o jsonpath='{.items[0].status.clientId}')" \
-		--set autoDiscovery.clusterName="$$(KUBECONFIG= kubectl get cluster -o jsonpath='{.items[0].metadata.name}')" \
-		--set azureResourceGroup="$$(KUBECONFIG= kubectl get managedclusters -o jsonpath='{.items[0].status.nodeResourceGroup}')" \
-		--set nodeSelector."kubernetes\.io/os"=linux \
-		--set image.repository=$(IMAGE)-$(GOARCH) \
-		--set image.tag=$(TAG) \
-		--set image.pullPolicy=Always \
-		--set extraArgs.scale-down-delay-after-add=10s \
-		--set extraArgs.scale-down-unneeded-time=10s \
-		--set extraArgs.scale-down-candidates-pool-ratio=1.0 \
-		--set extraArgs.unremovable-node-recheck-timeout=10s \
-		--set extraArgs.skip-nodes-with-system-pods=false \
-		--set extraArgs.skip-nodes-with-local-storage=false \
-		--wait
 
 ARTIFACTS?=_artifacts
 
 .PHONY: test-e2e
-test-e2e: install-e2e
+test-e2e: build-e2e
 	go run github.com/onsi/ginkgo/v2/ginkgo -v --trace --output-dir "$(ARTIFACTS)" --junit-report="junit.e2e_suite.1.xml" e2e -- \
-		-resource-group="$$(KUBECONFIG= kubectl get managedclusters -o jsonpath='{.items[0].status.nodeResourceGroup}')"
+		-resource-group="$$(KUBECONFIG= kubectl get managedclusters -o jsonpath='{.items[0].status.nodeResourceGroup}')" \
+		-cluster-name="$$(KUBECONFIG= kubectl get cluster -o jsonpath='{.items[0].metadata.name}')" \
+		-client-id="$$(KUBECONFIG= kubectl get userassignedidentities -o jsonpath='{.items[0].status.clientId}')" \
+		-cas-namespace="$(CLUSTER_AUTOSCALER_NAMESPACE)" \
+		-cas-serviceaccount-name="$(CLUSTER_AUTOSCALER_SERVICEACCOUNT_NAME)" \
+		-cas-image-repository="$(IMAGE)-$(GOARCH)" \
+		-cas-image-tag="$(TAG)"
diff --git a/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go b/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
index 6129570f1f73..deb7b43196f3 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/test/e2e/azure_test.go
@@ -53,6 +53,17 @@ var _ = Describe("Azure Provider", func() {
 	})
 
 	It("scales up AKS node pools when pending Pods exist", func() {
+		ensureHelmValues(map[string]interface{}{
+			"extraArgs": map[string]interface{}{
+				"scale-down-delay-after-add":       "10s",
+				"scale-down-unneeded-time":         "10s",
+				"scale-down-candidates-pool-ratio": "1.0",
+				"unremovable-node-recheck-timeout": "10s",
+				"skip-nodes-with-system-pods":      "false",
+				"skip-nodes-with-local-storage":    "false",
+			},
+		})
+
 		nodes := &corev1.NodeList{}
 		Expect(k8s.List(ctx, nodes)).To(Succeed())
 		nodeCountBefore := len(nodes.Items)
diff --git a/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go b/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
index 50c649737387..5ad9a88bba27 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/test/e2e/e2e_suite_test.go
@@ -18,28 +18,52 @@ package e2e_test
 
 import (
 	"context"
+	"errors"
 	"flag"
+	"fmt"
 	"os"
 	"testing"
+	"time"
 
 	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
 	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	"helm.sh/helm/v3/pkg/action"
+	"helm.sh/helm/v3/pkg/chart/loader"
+	"helm.sh/helm/v3/pkg/cli"
+	"helm.sh/helm/v3/pkg/storage/driver"
 	corev1 "k8s.io/api/core/v1"
-	"k8s.io/cli-runtime/pkg/genericclioptions"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
+const (
+	casReleaseName = "cluster-autoscaler"
+)
+
 var (
-	ctx           = context.Background()
-	vmss          *armcompute.VirtualMachineScaleSetsClient
-	k8s           client.Client
-	resourceGroup string
+	ctx     = context.Background()
+	vmss    *armcompute.VirtualMachineScaleSetsClient
+	k8s     client.Client
+	helmEnv = cli.New()
+
+	resourceGroup         string
+	clusterName           string
+	clientID              string
+	casNamespace          string
+	casServiceAccountName string
+	casImageRepository    string
+	casImageTag           string
 )
 
 func init() {
 	flag.StringVar(&resourceGroup, "resource-group", "", "resource group containing cluster-autoscaler-managed resources")
+	flag.StringVar(&clusterName, "cluster-name", "", "Cluster API Cluster name for the cluster to be managed by cluster-autoscaler")
+	flag.StringVar(&clientID, "client-id", "", "Azure client ID to be used by cluster-autoscaler")
+	flag.StringVar(&casNamespace, "cas-namespace", "", "Namespace in which to install cluster-autoscaler")
+	flag.StringVar(&casServiceAccountName, "cas-serviceaccount-name", "", "Name of the ServiceAccount to be used by cluster-autoscaler")
+	flag.StringVar(&casImageRepository, "cas-image-repository", "", "Repository of the container image for cluster-autoscaler")
+	flag.StringVar(&casImageTag, "cas-image-tag", "", "Tag of the container image for cluster-autoscaler")
 }
 
 func TestE2E(t *testing.T) {
@@ -53,11 +77,41 @@ var _ = BeforeSuite(func() {
 	vmss, err = armcompute.NewVirtualMachineScaleSetsClient(os.Getenv("AZURE_SUBSCRIPTION_ID"), azCred, nil)
 	Expect(err).NotTo(HaveOccurred())
 
-	k8sConfig := genericclioptions.NewConfigFlags(false)
-	restConfig, err := k8sConfig.ToRESTConfig()
+	restConfig, err := helmEnv.RESTClientGetter().ToRESTConfig()
 	Expect(err).NotTo(HaveOccurred())
 	k8s, err = client.New(restConfig, client.Options{})
 	Expect(err).NotTo(HaveOccurred())
+
+	ensureHelmValues(map[string]interface{}{
+		"cloudProvider":                     "azure",
+		"azureTenantID":                     os.Getenv("AZURE_TENANT_ID"),
+		"azureSubscriptionID":               os.Getenv("AZURE_SUBSCRIPTION_ID"),
+		"azureUseWorkloadIdentityExtension": true,
+		"azureResourceGroup":                resourceGroup,
+		"podLabels": map[string]interface{}{
+			"azure.workload.identity/use": "true",
+		},
+		"rbac": map[string]interface{}{
+			"serviceAccount": map[string]interface{}{
+				"name": casServiceAccountName,
+				"annotations": map[string]interface{}{
+					"azure.workload.identity/tenant-id": os.Getenv("AZURE_TENANT_ID"),
+					"azure.workload.identity/client-id": clientID,
+				},
+			},
+		},
+		"autoDiscovery": map[string]interface{}{
+			"clusterName": clusterName,
+		},
+		"nodeSelector": map[string]interface{}{
+			"kubernetes.io/os": "linux",
+		},
+		"image": map[string]interface{}{
+			"repository": casImageRepository,
+			"tag":        casImageTag,
+			"pullPolicy": "Always",
+		},
+	})
 })
 
 func allVMSSStable(g Gomega) {
@@ -86,3 +140,36 @@ func allVMSSStable(g Gomega) {
 		})),
 	))
 }
+
+func ensureHelmValues(values map[string]interface{}) {
+	helmCfg := new(action.Configuration)
+	Expect(helmCfg.Init(helmEnv.RESTClientGetter(), casNamespace, "secret", func(format string, v ...interface{}) {
+		GinkgoLogr.Info(fmt.Sprintf(format, v...))
+	})).To(Succeed())
+
+	chart, err := loader.Load("../../../../../charts/cluster-autoscaler")
+	Expect(err).NotTo(HaveOccurred())
+
+	get := action.NewGet(helmCfg)
+	_, err = get.Run(casReleaseName)
+	if errors.Is(err, driver.ErrReleaseNotFound) {
+		install := action.NewInstall(helmCfg)
+		install.Timeout = 5 * time.Minute
+		install.Wait = true
+		install.CreateNamespace = true
+		install.ReleaseName = casReleaseName
+		install.Namespace = casNamespace
+		_, err := install.Run(chart, values)
+		Expect(err).NotTo(HaveOccurred())
+		return
+	} else {
+		Expect(err).NotTo(HaveOccurred())
+	}
+
+	upgrade := action.NewUpgrade(helmCfg)
+	upgrade.Timeout = 5 * time.Minute
+	upgrade.Wait = true
+	upgrade.ReuseValues = true
+	_, err = upgrade.Run(casReleaseName, chart, values)
+	Expect(err).NotTo(HaveOccurred())
+}
diff --git a/cluster-autoscaler/cloudprovider/azure/test/go.mod b/cluster-autoscaler/cloudprovider/azure/test/go.mod
index 1ba78f1dbb40..fecd92a84880 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/go.mod
+++ b/cluster-autoscaler/cloudprovider/azure/test/go.mod
@@ -7,28 +7,56 @@ require (
 	github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0
 	github.com/onsi/ginkgo/v2 v2.19.0
 	github.com/onsi/gomega v1.33.1
+	helm.sh/helm/v3 v3.15.2
 	k8s.io/api v0.30.2
 	k8s.io/apimachinery v0.30.2
-	k8s.io/cli-runtime v0.30.2
 	k8s.io/utils v0.0.0-20230726121419-3b25d923346b
 	sigs.k8s.io/controller-runtime v0.18.4
 )
 
 require (
+	github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
 	github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 // indirect
 	github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 // indirect
 	github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
 	github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect
+	github.com/BurntSushi/toml v1.3.2 // indirect
+	github.com/MakeNowJust/heredoc v1.0.0 // indirect
+	github.com/Masterminds/goutils v1.1.1 // indirect
+	github.com/Masterminds/semver/v3 v3.2.1 // indirect
+	github.com/Masterminds/sprig/v3 v3.2.3 // indirect
+	github.com/Masterminds/squirrel v1.5.4 // indirect
+	github.com/Microsoft/hcsshim v0.11.4 // indirect
+	github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535 // indirect
+	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/chai2010/gettext-go v1.0.2 // indirect
+	github.com/containerd/containerd v1.7.12 // indirect
+	github.com/containerd/log v0.1.0 // indirect
+	github.com/cyphar/filepath-securejoin v0.2.4 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/distribution/reference v0.5.0 // indirect
+	github.com/docker/cli v25.0.1+incompatible // indirect
+	github.com/docker/distribution v2.8.3+incompatible // indirect
+	github.com/docker/docker v25.0.5+incompatible // indirect
+	github.com/docker/docker-credential-helpers v0.7.0 // indirect
+	github.com/docker/go-connections v0.5.0 // indirect
+	github.com/docker/go-metrics v0.0.1 // indirect
 	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
-	github.com/evanphx/json-patch v4.12.0+incompatible // indirect
+	github.com/evanphx/json-patch v5.7.0+incompatible // indirect
 	github.com/evanphx/json-patch/v5 v5.9.0 // indirect
+	github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
+	github.com/fatih/color v1.13.0 // indirect
+	github.com/felixge/httpsnoop v1.0.3 // indirect
 	github.com/go-errors/errors v1.4.2 // indirect
+	github.com/go-gorp/gorp/v3 v3.1.0 // indirect
 	github.com/go-logr/logr v1.4.1 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-openapi/jsonpointer v0.19.6 // indirect
 	github.com/go-openapi/jsonreference v0.20.2 // indirect
 	github.com/go-openapi/swag v0.22.3 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
+	github.com/gobwas/glob v0.2.3 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
@@ -39,25 +67,64 @@ require (
 	github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/google/uuid v1.6.0 // indirect
+	github.com/gorilla/mux v1.8.0 // indirect
+	github.com/gorilla/websocket v1.5.0 // indirect
+	github.com/gosuri/uitable v0.0.4 // indirect
 	github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
-	github.com/imdario/mergo v0.3.6 // indirect
+	github.com/hashicorp/errwrap v1.1.0 // indirect
+	github.com/hashicorp/go-multierror v1.1.1 // indirect
+	github.com/huandu/xstrings v1.4.0 // indirect
+	github.com/imdario/mergo v0.3.13 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/jmoiron/sqlx v1.3.5 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/compress v1.16.0 // indirect
 	github.com/kylelemons/godebug v1.1.0 // indirect
+	github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
+	github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
+	github.com/lib/pq v1.10.9 // indirect
 	github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
-	github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.17 // indirect
+	github.com/mattn/go-runewidth v0.0.9 // indirect
+	github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
+	github.com/mitchellh/copystructure v1.2.0 // indirect
+	github.com/mitchellh/go-wordwrap v1.0.1 // indirect
+	github.com/mitchellh/reflectwalk v1.0.2 // indirect
+	github.com/moby/locker v1.0.1 // indirect
+	github.com/moby/spdystream v0.2.0 // indirect
+	github.com/moby/term v0.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
+	github.com/opencontainers/go-digest v1.0.0 // indirect
+	github.com/opencontainers/image-spec v1.1.0-rc6 // indirect
 	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
 	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
 	github.com/pkg/errors v0.9.1 // indirect
-	github.com/spf13/cobra v1.7.0 // indirect
+	github.com/prometheus/client_golang v1.16.0 // indirect
+	github.com/prometheus/client_model v0.4.0 // indirect
+	github.com/prometheus/common v0.44.0 // indirect
+	github.com/prometheus/procfs v0.12.0 // indirect
+	github.com/rubenv/sql-migrate v1.5.2 // indirect
+	github.com/russross/blackfriday/v2 v2.1.0 // indirect
+	github.com/shopspring/decimal v1.3.1 // indirect
+	github.com/sirupsen/logrus v1.9.3 // indirect
+	github.com/spf13/cast v1.5.0 // indirect
+	github.com/spf13/cobra v1.8.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
+	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
+	github.com/xeipuuv/gojsonschema v1.2.0 // indirect
 	github.com/xlab/treeprint v1.2.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect
+	go.opentelemetry.io/otel v1.19.0 // indirect
+	go.opentelemetry.io/otel/metric v1.19.0 // indirect
+	go.opentelemetry.io/otel/trace v1.19.0 // indirect
 	go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
 	golang.org/x/crypto v0.24.0 // indirect
 	golang.org/x/net v0.26.0 // indirect
@@ -69,16 +136,24 @@ require (
 	golang.org/x/time v0.3.0 // indirect
 	golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
 	google.golang.org/appengine v1.6.7 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
+	google.golang.org/grpc v1.58.3 // indirect
 	google.golang.org/protobuf v1.33.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
+	k8s.io/apiextensions-apiserver v0.30.1 // indirect
+	k8s.io/apiserver v0.30.1 // indirect
+	k8s.io/cli-runtime v0.30.2 // indirect
 	k8s.io/client-go v0.30.2 // indirect
+	k8s.io/component-base v0.30.1 // indirect
 	k8s.io/klog/v2 v2.120.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
+	k8s.io/kubectl v0.30.0 // indirect
+	oras.land/oras-go v1.2.5 // indirect
 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
 	sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 // indirect
 	sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 // indirect
 	sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
-	sigs.k8s.io/yaml v1.3.0 // indirect
+	sigs.k8s.io/yaml v1.4.0 // indirect
 )
diff --git a/cluster-autoscaler/cloudprovider/azure/test/go.sum b/cluster-autoscaler/cloudprovider/azure/test/go.sum
index e90591e7109a..a3fea425df7b 100644
--- a/cluster-autoscaler/cloudprovider/azure/test/go.sum
+++ b/cluster-autoscaler/cloudprovider/azure/test/go.sum
@@ -1,4 +1,6 @@
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=
+github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 h1:E+OJmp2tPvt1W+amx48v1eqbjDYsgN+RzP4q16yV5eM=
 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1/go.mod h1:a6xsAQUZg+VsS3TJ05SRp524Hs4pZ/AeFSr5ENf0Yjo=
 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc=
@@ -16,30 +18,121 @@ github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg6
 github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU=
 github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
+github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
+github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
+github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
+github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
+github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
+github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
+github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
+github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
+github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0=
+github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
+github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
+github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
+github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
+github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
+github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
+github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
+github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8=
+github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w=
+github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d h1:UrqY+r/OJnIp5u0s1SbQ8dVfLCZJsnvazdBP5hS4iRs=
+github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ=
+github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
+github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
+github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
+github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
+github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535 h1:4daAzAu0S6Vi7/lbWECcX0j45yZReDZ56BQsrVBOEEY=
+github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg=
+github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
+github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/bshuster-repo/logrus-logstash-hook v1.0.0 h1:e+C0SB5R1pu//O4MQ3f9cFuPGoOVeF2fE4Og9otCc70=
+github.com/bshuster-repo/logrus-logstash-hook v1.0.0/go.mod h1:zsTqEiSzDgAa/8GZR7E1qaXrhYNDKBYy5/dWPTIflbk=
+github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd h1:rFt+Y/IK1aEZkEHchZRSq9OQbsSzIT/OrI8YFFmRIng=
+github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8=
+github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b h1:otBG+dV+YK+Soembjv71DPz3uX/V/6MMlSyD9JBQ6kQ=
+github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0BsqsP2LwDJ9aOkm/6J86V6lyAXCoQWGw3K50=
+github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0 h1:nvj0OLI3YqYXer/kZD8Ri1aaunCxIEsOst1BVJswV0o=
+github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
+github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk=
+github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
-github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
+github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
+github.com/containerd/containerd v1.7.12 h1:+KQsnv4VnzyxWcfO9mlxxELaoztsDEjOuCMPAuPqgU0=
+github.com/containerd/containerd v1.7.12/go.mod h1:/5OMpE1p0ylxtEUGY8kuCYkDRzJm9NO1TFMWjUpdevk=
+github.com/containerd/continuity v0.4.2 h1:v3y/4Yz5jwnvqPKJJ+7Wf93fyWoCB3F5EclWG023MDM=
+github.com/containerd/continuity v0.4.2/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ=
+github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
+github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
+github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
+github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
+github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2 h1:aBfCb7iqHmDEIp6fBvC/hQUddQfg+3qdYjwzaiP9Hnc=
+github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2/go.mod h1:WHNsWjnIn2V1LYOrME7e8KxSeKunYHsxEm4am0BUtcI=
+github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0=
+github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
+github.com/docker/cli v25.0.1+incompatible h1:mFpqnrS6Hsm3v1k7Wa/BO23oz0k121MTbTO1lpcGSkU=
+github.com/docker/cli v25.0.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
+github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
+github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
+github.com/docker/docker v25.0.5+incompatible h1:UmQydMduGkrD5nQde1mecF/YnSbTOaPeFIeP5C4W+DE=
+github.com/docker/docker v25.0.5+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
+github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
+github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
+github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
+github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8=
+github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
+github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQV8=
+github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw=
+github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1 h1:ZClxb8laGDf5arXfYcAtECDFgAgHklGI8CxgjHnXKJ4=
+github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE=
 github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
 github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84=
-github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
+github.com/evanphx/json-patch v5.7.0+incompatible h1:vgGkfT/9f8zE6tvSCe74nfpAVDQ2tG6yudJd8LBksgI=
+github.com/evanphx/json-patch v5.7.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg=
 github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
+github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d h1:105gxyaGwCFad8crR9dcMQWvV9Hvulu6hwUh4tWPJnM=
+github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4=
+github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=
+github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
+github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
+github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI=
+github.com/foxcpp/go-mockdns v1.0.0/go.mod h1:lgRN6+KxQBawyIghpnl5CezHFGS9VLzvtVlwxvzXTQ4=
+github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
+github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps=
 github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
 github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
+github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs=
+github.com/go-gorp/gorp/v3 v3.1.0/go.mod h1:dLEjIyyRNiXvNZ8PSmzpt1GsWAUK8kjVhEpjH8TixEw=
+github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
+github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
+github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
 github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
 github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
 github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
@@ -48,13 +141,27 @@ github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2Kv
 github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
 github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g=
 github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
+github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
+github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
 github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/gobuffalo/logger v1.0.6 h1:nnZNpxYo0zx+Aj9RfMPBm+x9zAU2OayFh/xrAWi34HU=
+github.com/gobuffalo/logger v1.0.6/go.mod h1:J31TBEHR1QLV2683OXTAItYIg8pv2JMHnF/quuAbMjs=
+github.com/gobuffalo/packd v1.0.1 h1:U2wXfRr4E9DH8IdsDLlRFwTZTK7hLfq9qT/QHXGVe/0=
+github.com/gobuffalo/packd v1.0.1/go.mod h1:PP2POP3p3RXGz7Jh6eYEf93S7vA2za6xM7QT85L4+VY=
+github.com/gobuffalo/packr/v2 v2.8.3 h1:xE1yzvnO56cUC0sTpKR3DIbxZgB54AftTFMhB2XEWlY=
+github.com/gobuffalo/packr/v2 v2.8.3/go.mod h1:0SahksCVcx4IMnigTjiFuyldmTrdTctXsOdiU5KwbKc=
+github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
+github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
+github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk=
 github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -67,6 +174,8 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq
 github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/gomodule/redigo v1.8.2 h1:H5XSIre1MB5NbPYFp+i1NBbb5qN1W8Y8YAQoAYbkm8k=
+github.com/gomodule/redigo v1.8.2/go.mod h1:P9dn9mFrCBvWhGE1wpxx6fgq7BAeLBk+UUUzlpkBYO0=
 github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4=
 github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA=
 github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
@@ -87,20 +196,52 @@ github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQN
 github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
+github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
+github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
+github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
+github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
+github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
+github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/gosuri/uitable v0.0.4 h1:IG2xLKRvErL3uhY6e1BylFzG+aJiwQviDDTfOKeKTpY=
+github.com/gosuri/uitable v0.0.4/go.mod h1:tKR86bXuXPZazfOTG1FIzvjIdXzd0mo4Vtn16vt0PJo=
 github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 h1:pdN6V1QBWetyv/0+wjACpqVH+eVULgEjkurDLq3goeM=
 github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
-github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
-github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
+github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
+github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=
+github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
+github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
+github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU=
+github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
+github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
+github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk=
+github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g=
+github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
+github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
+github.com/karrick/godirwalk v1.16.1 h1:DynhcF+bztK8gooS0+NDJFrdNZjJ3gzVzC545UNA9iw=
+github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4=
+github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
@@ -110,67 +251,186 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
+github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
+github.com/lann/builder v0.0.0-20180802200727-47ae307949d0/go.mod h1:dXGbAdH5GtBTC4WfIxhKZfyBF/HBFgRZSWwZ9g/He9o=
+github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 h1:P6pPBnrTSX3DEVR4fDembhRWSsG5rVo6hYhAB/ADZrk=
+github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6FmdpVm2joNMFikkuWg0EoCKLGUMNw=
+github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
+github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
 github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
 github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
-github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
-github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/markbates/errx v1.1.0 h1:QDFeR+UP95dO12JgW+tgi2UVfo0V8YBHiUIOaeBPiEI=
+github.com/markbates/errx v1.1.0/go.mod h1:PLa46Oex9KNbVDZhKel8v1OT7hD5JZ2eI7AHhA0wswc=
+github.com/markbates/oncer v1.0.0 h1:E83IaVAHygyndzPimgUYJjbshhDTALZyXxvk9FOlQRY=
+github.com/markbates/oncer v1.0.0/go.mod h1:Z59JA581E9GP6w96jai+TGqafHPW+cPfRxz2aSZ0mcI=
+github.com/markbates/safe v1.0.1 h1:yjZkbvRM6IzKj9tlu/zMJLS0n/V351OZWRnF3QfaUxI=
+github.com/markbates/safe v1.0.1/go.mod h1:nAqgmRi7cY2nqMc92/bSEeQA+R4OheNU2T1kNSCBdG0=
+github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
+github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
+github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
+github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
+github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
+github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
+github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
+github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
+github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
+github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
+github.com/miekg/dns v1.1.25 h1:dFwPR6SfLtrSwgDcIq2bcU/gVutB4sNApq2HBdqcakg=
+github.com/miekg/dns v1.1.25/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
+github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
+github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
+github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
+github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0=
+github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0=
+github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
+github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
+github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
+github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
+github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=
+github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8=
+github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c=
+github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78=
+github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
+github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
+github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0=
 github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
+github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
+github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
 github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
 github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
 github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
 github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
+github.com/opencontainers/image-spec v1.1.0-rc6 h1:XDqvyKsJEbRtATzkgItUqBA7QHk58yxX1Ov9HERHNqU=
+github.com/opencontainers/image-spec v1.1.0-rc6/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
 github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
 github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
+github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI=
+github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE=
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
+github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/poy/onpar v1.1.2 h1:QaNrNiZx0+Nar5dLgTVp5mXkyoVFIbepjyEoGSnhbAY=
+github.com/poy/onpar v1.1.2/go.mod h1:6X8FLNoxyr9kkmnlqpK6LSoiOtrO6MICtWwEuWkLjzg=
+github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
+github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
+github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g=
+github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8=
+github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc=
+github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
+github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY=
+github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU=
+github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
+github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc=
+github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
+github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
+github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
+github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
+github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
+github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
+github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
 github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
 github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/rubenv/sql-migrate v1.5.2 h1:bMDqOnrJVV/6JQgQ/MxOpU+AdO8uzYYA/TxFUBzFtS0=
+github.com/rubenv/sql-migrate v1.5.2/go.mod h1:H38GW8Vqf8F0Su5XignRyaRcbXbJunSWxs+kmzlg0Is=
+github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
 github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
-github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
-github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
+github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
+github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
+github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
+github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
+github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
+github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w=
+github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU=
+github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
+github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
+github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
+github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
+github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
+github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
+github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
+github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
 github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
 github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43 h1:+lm10QQTNSBd8DVTNGHx7o/IKu9HYDvLMffDhbyLccI=
+github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs=
+github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50 h1:hlE8//ciYMztlGpl/VA+Zm1AcTPHYkHJPbHqE6WJUXE=
+github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPSUX/bi6SeDMUh6brw0nXpxHnc96TguQh0+r/ssA=
+github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f h1:ERexzlUfuTvpE74urLSbIQW0Z/6hF9t8U4NsJLaioAY=
+github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg=
+go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
+go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 h1:x8Z78aZx8cOF0+Kkazoc7lwUNMGy0LrzEMxTm4BbTxg=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q=
+go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs=
+go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY=
+go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE=
+go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8=
+go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg=
+go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo=
 go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY=
 go.starlark.net v0.0.0-20230525235612-a134d8f9ddca/go.mod h1:jxU+3+j+71eXOW14274+SmmuW82qJzl6iZSeqEtTGds=
 go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
 go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
 go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo=
 go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
+golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
 golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
 golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -181,15 +441,23 @@ golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvx
 golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
+golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
 golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
 golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -197,27 +465,45 @@ golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4=
 golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
 golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
 golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
 golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
 golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
 golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
 golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
@@ -230,6 +516,7 @@ golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBn
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -243,9 +530,13 @@ google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCID
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ=
+google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -256,17 +547,26 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
 google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o=
+gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g=
+helm.sh/helm/v3 v3.15.2 h1:/3XINUFinJOBjQplGnjw92eLGpgXXp1L8chWPkCkDuw=
+helm.sh/helm/v3 v3.15.2/go.mod h1:FzSIP8jDQaa6WAVg9F+OkKz7J0ZmAga4MABtTbsb9WQ=
 honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 k8s.io/api v0.30.2 h1:+ZhRj+28QT4UOH+BKznu4CBgPWgkXO7XAvMcMl0qKvI=
@@ -275,16 +575,24 @@ k8s.io/apiextensions-apiserver v0.30.1 h1:4fAJZ9985BmpJG6PkoxVRpXv9vmPUOVzl614xa
 k8s.io/apiextensions-apiserver v0.30.1/go.mod h1:R4GuSrlhgq43oRY9sF2IToFh7PVlF1JjfWdoG3pixk4=
 k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg=
 k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc=
+k8s.io/apiserver v0.30.1 h1:BEWEe8bzS12nMtDKXzCF5Q5ovp6LjjYkSp8qOPk8LZ8=
+k8s.io/apiserver v0.30.1/go.mod h1:i87ZnQ+/PGAmSbD/iEKM68bm1D5reX8fO4Ito4B01mo=
 k8s.io/cli-runtime v0.30.2 h1:ooM40eEJusbgHNEqnHziN9ZpLN5U4WcQGsdLKVxpkKE=
 k8s.io/cli-runtime v0.30.2/go.mod h1:Y4g/2XezFyTATQUbvV5WaChoUGhojv/jZAtdp5Zkm0A=
 k8s.io/client-go v0.30.2 h1:sBIVJdojUNPDU/jObC+18tXWcTJVcwyqS9diGdWHk50=
 k8s.io/client-go v0.30.2/go.mod h1:JglKSWULm9xlJLx4KCkfLLQ7XwtlbflV6uFFSHTMgVs=
+k8s.io/component-base v0.30.1 h1:bvAtlPh1UrdaZL20D9+sWxsJljMi0QZ3Lmw+kmZAaxQ=
+k8s.io/component-base v0.30.1/go.mod h1:e/X9kDiOebwlI41AvBHuWdqFriSRrX50CdwA9TFaHLI=
 k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw=
 k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
 k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
 k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
+k8s.io/kubectl v0.30.0 h1:xbPvzagbJ6RNYVMVuiHArC1grrV5vSmmIcSZuCdzRyk=
+k8s.io/kubectl v0.30.0/go.mod h1:zgolRw2MQXLPwmic2l/+iHs239L49fhSeICuMhQQXTI=
 k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI=
 k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+oras.land/oras-go v1.2.5 h1:XpYuAwAb0DfQsunIyMfeET92emK8km3W4yEzZvUbsTo=
+oras.land/oras-go v1.2.5/go.mod h1:PuAwRShRZCsZb7g8Ar3jKKQR/2A/qN+pkYxIOd/FAoo=
 sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHvm5BZw=
 sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg=
 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
@@ -295,5 +603,5 @@ sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 h1:W6cLQc5pnqM
 sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3/go.mod h1:JWP1Fj0VWGHyw3YUPjXSQnRnrwezrZSrApfX5S0nIag=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
-sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
-sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
+sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
+sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=

From 4260701989c98fd13941b89e0778057c3f85c39c Mon Sep 17 00:00:00 2001
From: Maria Oparka <oparka@google.com>
Date: Tue, 2 Jul 2024 09:33:01 +0200
Subject: [PATCH 19/30] Change AnnotationNodeInfoProvider nested provider to
 interface

---
 .../annotation_node_info_provider.go              | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/cluster-autoscaler/processors/nodeinfosprovider/annotation_node_info_provider.go b/cluster-autoscaler/processors/nodeinfosprovider/annotation_node_info_provider.go
index 294bf76a553b..8497ac5064e9 100644
--- a/cluster-autoscaler/processors/nodeinfosprovider/annotation_node_info_provider.go
+++ b/cluster-autoscaler/processors/nodeinfosprovider/annotation_node_info_provider.go
@@ -29,19 +29,26 @@ import (
 
 // AnnotationNodeInfoProvider is a wrapper for MixedTemplateNodeInfoProvider.
 type AnnotationNodeInfoProvider struct {
-	mixedTemplateNodeInfoProvider *MixedTemplateNodeInfoProvider
+	templateNodeInfoProvider TemplateNodeInfoProvider
 }
 
-// NewAnnotationNodeInfoProvider returns AnnotationNodeInfoProvider.
+// NewAnnotationNodeInfoProvider returns AnnotationNodeInfoProvider wrapping MixedTemplateNodeInfoProvider.
 func NewAnnotationNodeInfoProvider(t *time.Duration, forceDaemonSets bool) *AnnotationNodeInfoProvider {
 	return &AnnotationNodeInfoProvider{
-		mixedTemplateNodeInfoProvider: NewMixedTemplateNodeInfoProvider(t, forceDaemonSets),
+		templateNodeInfoProvider: NewMixedTemplateNodeInfoProvider(t, forceDaemonSets),
+	}
+}
+
+// NewCustomAnnotationNodeInfoProvider returns AnnotationNodeInfoProvider wrapping TemplateNodeInfoProvider.
+func NewCustomAnnotationNodeInfoProvider(templateNodeInfoProvider TemplateNodeInfoProvider) *AnnotationNodeInfoProvider {
+	return &AnnotationNodeInfoProvider{
+		templateNodeInfoProvider: templateNodeInfoProvider,
 	}
 }
 
 // Process returns the nodeInfos set for this cluster.
 func (p *AnnotationNodeInfoProvider) Process(ctx *context.AutoscalingContext, nodes []*apiv1.Node, daemonsets []*appsv1.DaemonSet, taintConfig taints.TaintConfig, currentTime time.Time) (map[string]*schedulerframework.NodeInfo, errors.AutoscalerError) {
-	nodeInfos, err := p.mixedTemplateNodeInfoProvider.Process(ctx, nodes, daemonsets, taintConfig, currentTime)
+	nodeInfos, err := p.templateNodeInfoProvider.Process(ctx, nodes, daemonsets, taintConfig, currentTime)
 	if err != nil {
 		return nil, err
 	}

From eeedaeabb08714d510c965937169fba18a87f028 Mon Sep 17 00:00:00 2001
From: Ismail Alidzhikov <i.alidjikov@gmail.com>
Date: Thu, 27 Jun 2024 14:50:46 +0300
Subject: [PATCH 20/30] vpa-updater: Add support for leader election

---
 vertical-pod-autoscaler/FAQ.md                |   7 +
 vertical-pod-autoscaler/deploy/vpa-rbac.yaml  |  37 ++
 vertical-pod-autoscaler/go.mod                |   2 +-
 vertical-pod-autoscaler/pkg/updater/main.go   |  74 +++
 .../pkg/utils/status/status_object.go         |   2 +-
 .../k8s.io/apimachinery/pkg/util/uuid/uuid.go |  27 ++
 .../client-go/tools/leaderelection/OWNERS     |  11 +
 .../tools/leaderelection/healthzadaptor.go    |  69 +++
 .../tools/leaderelection/leaderelection.go    | 420 ++++++++++++++++++
 .../client-go/tools/leaderelection/metrics.go | 109 +++++
 .../leaderelection/resourcelock/interface.go  | 205 +++++++++
 .../leaderelection/resourcelock/leaselock.go  | 139 ++++++
 .../leaderelection/resourcelock/multilock.go  | 104 +++++
 .../k8s.io/component-base/config/OWNERS       |  13 +
 .../k8s.io/component-base/config/doc.go       |  19 +
 .../config/options/leaderelectionconfig.go    |  53 +++
 .../k8s.io/component-base/config/types.go     |  80 ++++
 .../config/zz_generated.deepcopy.go           |  73 +++
 vertical-pod-autoscaler/vendor/modules.txt    |   5 +
 19 files changed, 1447 insertions(+), 2 deletions(-)
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/apimachinery/pkg/util/uuid/uuid.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/OWNERS
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/healthzadaptor.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/leaderelection.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/metrics.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/interface.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/leaselock.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/multilock.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/component-base/config/OWNERS
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/component-base/config/doc.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/component-base/config/options/leaderelectionconfig.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/component-base/config/types.go
 create mode 100644 vertical-pod-autoscaler/vendor/k8s.io/component-base/config/zz_generated.deepcopy.go

diff --git a/vertical-pod-autoscaler/FAQ.md b/vertical-pod-autoscaler/FAQ.md
index 42ac7067229a..0e36a7649043 100644
--- a/vertical-pod-autoscaler/FAQ.md
+++ b/vertical-pod-autoscaler/FAQ.md
@@ -224,3 +224,10 @@ Name | Type | Description | Default
 `kube-api-burst` | Float64 | QPS burst limit when making requests to Kubernetes apiserver | 10.0
 `use-admission-controller-status` | Bool | If true, updater will only evict pods when admission controller status is valid. | true
 `vpa-object-namespace` | String | Namespace to search for VPA objects. Empty means all namespaces will be used. | apiv1.NamespaceAll
+`leader-elect` | Bool | Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability. | false
+`leader-elect-lease-duration` | Duration | The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. | 15s
+`leader-elect-renew-deadline` | Duration | The interval between attempts by the acting master to renew a leadership slot before it stops leading. This must be less than the lease duration. This is only applicable if leader election is enabled. | 10s
+`leader-elect-resource-lock` | String | The type of resource object that is used for locking during leader election. Supported options are 'leases', 'endpointsleases' and 'configmapsleases'. | "leases"
+`leader-elect-resource-name` | String | The name of resource object that is used for locking during leader election. | "vpa-updater"
+`leader-elect-resource-namespace` | String | The namespace of resource object that is used for locking during leader election. | "kube-system"
+`leader-elect-retry-period` | Duration | The duration the clients should wait between attempting acquisition and renewal of a leadership. This is only applicable if leader election is enabled. | 2s
diff --git a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml
index 45147c36b7ef..c182e8eb07c9 100644
--- a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml
+++ b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml
@@ -357,3 +357,40 @@ subjects:
   - kind: ServiceAccount
     name: vpa-updater
     namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: system:leader-locking-vpa-updater
+  namespace: kube-system
+rules:
+  - apiGroups:
+      - "coordination.k8s.io"
+    resources:
+      - leases
+    verbs:
+      - create
+  - apiGroups:
+      - "coordination.k8s.io"
+    resourceNames:
+      - vpa-updater
+    resources:
+      - leases
+    verbs:
+      - get
+      - watch
+      - update
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: system:leader-locking-vpa-updater
+  namespace: kube-system
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: system:leader-locking-vpa-updater
+subjects:
+  - kind: ServiceAccount
+    name: vpa-updater
+    namespace: kube-system
diff --git a/vertical-pod-autoscaler/go.mod b/vertical-pod-autoscaler/go.mod
index 05d3befb4123..714442e384dc 100644
--- a/vertical-pod-autoscaler/go.mod
+++ b/vertical-pod-autoscaler/go.mod
@@ -8,6 +8,7 @@ require (
 	github.com/prometheus/client_golang v1.17.0
 	github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16
 	github.com/prometheus/common v0.44.0
+	github.com/spf13/pflag v1.0.5
 	github.com/stretchr/testify v1.8.2
 	golang.org/x/time v0.4.0
 	k8s.io/api v0.28.3
@@ -50,7 +51,6 @@ require (
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/prometheus/procfs v0.11.1 // indirect
 	github.com/spf13/cobra v1.7.0 // indirect
-	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/stretchr/objx v0.5.0 // indirect
 	golang.org/x/net v0.21.0 // indirect
 	golang.org/x/oauth2 v0.8.0 // indirect
diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go
index c69b8c286494..c82397f572e6 100644
--- a/vertical-pod-autoscaler/pkg/updater/main.go
+++ b/vertical-pod-autoscaler/pkg/updater/main.go
@@ -22,10 +22,17 @@ import (
 	"os"
 	"time"
 
+	"github.com/spf13/pflag"
 	apiv1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/uuid"
 	"k8s.io/client-go/informers"
 	kube_client "k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/leaderelection"
+	"k8s.io/client-go/tools/leaderelection/resourcelock"
 	kube_flag "k8s.io/component-base/cli/flag"
+	componentbaseconfig "k8s.io/component-base/config"
+	componentbaseoptions "k8s.io/component-base/config/options"
 	"k8s.io/klog/v2"
 
 	"k8s.io/autoscaler/vertical-pod-autoscaler/common"
@@ -78,6 +85,10 @@ const (
 
 func main() {
 	klog.InitFlags(nil)
+
+	leaderElection := defaultLeaderElectionConfiguration()
+	componentbaseoptions.BindLeaderElectionFlags(&leaderElection, pflag.CommandLine)
+
 	kube_flag.InitFlags()
 	klog.V(1).Infof("Vertical Pod Autoscaler %s Updater", common.VerticalPodAutoscalerVersion)
 
@@ -85,6 +96,69 @@ func main() {
 	metrics.Initialize(*address, healthCheck)
 	metrics_updater.Register()
 
+	if !leaderElection.LeaderElect {
+		run(healthCheck)
+	} else {
+		id, err := os.Hostname()
+		if err != nil {
+			klog.Fatalf("Unable to get hostname: %v", err)
+		}
+		id = id + "_" + string(uuid.NewUUID())
+
+		config := common.CreateKubeConfigOrDie(*kubeconfig, float32(*kubeApiQps), int(*kubeApiBurst))
+		kubeClient := kube_client.NewForConfigOrDie(config)
+
+		lock, err := resourcelock.New(
+			leaderElection.ResourceLock,
+			leaderElection.ResourceNamespace,
+			leaderElection.ResourceName,
+			kubeClient.CoreV1(),
+			kubeClient.CoordinationV1(),
+			resourcelock.ResourceLockConfig{
+				Identity: id,
+			},
+		)
+		if err != nil {
+			klog.Fatalf("Unable to create leader election lock: %v", err)
+		}
+
+		leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{
+			Lock:            lock,
+			LeaseDuration:   leaderElection.LeaseDuration.Duration,
+			RenewDeadline:   leaderElection.RenewDeadline.Duration,
+			RetryPeriod:     leaderElection.RetryPeriod.Duration,
+			ReleaseOnCancel: true,
+			Callbacks: leaderelection.LeaderCallbacks{
+				OnStartedLeading: func(_ context.Context) {
+					run(healthCheck)
+				},
+				OnStoppedLeading: func() {
+					klog.Fatal("lost master")
+				},
+			},
+		})
+	}
+}
+
+const (
+	defaultLeaseDuration = 15 * time.Second
+	defaultRenewDeadline = 10 * time.Second
+	defaultRetryPeriod   = 2 * time.Second
+)
+
+func defaultLeaderElectionConfiguration() componentbaseconfig.LeaderElectionConfiguration {
+	return componentbaseconfig.LeaderElectionConfiguration{
+		LeaderElect:       false,
+		LeaseDuration:     metav1.Duration{Duration: defaultLeaseDuration},
+		RenewDeadline:     metav1.Duration{Duration: defaultRenewDeadline},
+		RetryPeriod:       metav1.Duration{Duration: defaultRetryPeriod},
+		ResourceLock:      resourcelock.LeasesResourceLock,
+		ResourceName:      "vpa-updater",
+		ResourceNamespace: metav1.NamespaceSystem,
+	}
+}
+
+func run(healthCheck *metrics.HealthCheck) {
 	config := common.CreateKubeConfigOrDie(*kubeconfig, float32(*kubeApiQps), int(*kubeApiBurst))
 	kubeClient := kube_client.NewForConfigOrDie(config)
 	vpaClient := vpa_clientset.NewForConfigOrDie(config)
diff --git a/vertical-pod-autoscaler/pkg/utils/status/status_object.go b/vertical-pod-autoscaler/pkg/utils/status/status_object.go
index 8277db362006..232da4964b50 100644
--- a/vertical-pod-autoscaler/pkg/utils/status/status_object.go
+++ b/vertical-pod-autoscaler/pkg/utils/status/status_object.go
@@ -37,7 +37,7 @@ const (
 	AdmissionControllerStatusName = "vpa-admission-controller"
 	// AdmissionControllerStatusNamespace is the namespace of
 	// the Admission Controller status object.
-	AdmissionControllerStatusNamespace = "kube-system"
+	AdmissionControllerStatusNamespace = metav1.NamespaceSystem
 	// AdmissionControllerStatusTimeout is a time after which
 	// if not updated the Admission Controller status is no longer valid.
 	AdmissionControllerStatusTimeout = 1 * time.Minute
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/apimachinery/pkg/util/uuid/uuid.go b/vertical-pod-autoscaler/vendor/k8s.io/apimachinery/pkg/util/uuid/uuid.go
new file mode 100644
index 000000000000..1fa351aab60f
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/apimachinery/pkg/util/uuid/uuid.go
@@ -0,0 +1,27 @@
+/*
+Copyright 2014 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uuid
+
+import (
+	"github.com/google/uuid"
+
+	"k8s.io/apimachinery/pkg/types"
+)
+
+func NewUUID() types.UID {
+	return types.UID(uuid.New().String())
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/OWNERS b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/OWNERS
new file mode 100644
index 000000000000..908bdacdfeec
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/OWNERS
@@ -0,0 +1,11 @@
+# See the OWNERS docs at https://go.k8s.io/owners
+
+approvers:
+  - mikedanese
+reviewers:
+  - wojtek-t
+  - deads2k
+  - mikedanese
+  - ingvagabund
+emeritus_approvers:
+  - timothysc
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/healthzadaptor.go b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/healthzadaptor.go
new file mode 100644
index 000000000000..b9353729190d
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/healthzadaptor.go
@@ -0,0 +1,69 @@
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package leaderelection
+
+import (
+	"net/http"
+	"sync"
+	"time"
+)
+
+// HealthzAdaptor associates the /healthz endpoint with the LeaderElection object.
+// It helps deal with the /healthz endpoint being set up prior to the LeaderElection.
+// This contains the code needed to act as an adaptor between the leader
+// election code the health check code. It allows us to provide health
+// status about the leader election. Most specifically about if the leader
+// has failed to renew without exiting the process. In that case we should
+// report not healthy and rely on the kubelet to take down the process.
+type HealthzAdaptor struct {
+	pointerLock sync.Mutex
+	le          *LeaderElector
+	timeout     time.Duration
+}
+
+// Name returns the name of the health check we are implementing.
+func (l *HealthzAdaptor) Name() string {
+	return "leaderElection"
+}
+
+// Check is called by the healthz endpoint handler.
+// It fails (returns an error) if we own the lease but had not been able to renew it.
+func (l *HealthzAdaptor) Check(req *http.Request) error {
+	l.pointerLock.Lock()
+	defer l.pointerLock.Unlock()
+	if l.le == nil {
+		return nil
+	}
+	return l.le.Check(l.timeout)
+}
+
+// SetLeaderElection ties a leader election object to a HealthzAdaptor
+func (l *HealthzAdaptor) SetLeaderElection(le *LeaderElector) {
+	l.pointerLock.Lock()
+	defer l.pointerLock.Unlock()
+	l.le = le
+}
+
+// NewLeaderHealthzAdaptor creates a basic healthz adaptor to monitor a leader election.
+// timeout determines the time beyond the lease expiry to be allowed for timeout.
+// checks within the timeout period after the lease expires will still return healthy.
+func NewLeaderHealthzAdaptor(timeout time.Duration) *HealthzAdaptor {
+	result := &HealthzAdaptor{
+		timeout: timeout,
+	}
+	return result
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/leaderelection.go b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/leaderelection.go
new file mode 100644
index 000000000000..c1151baf2073
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/leaderelection.go
@@ -0,0 +1,420 @@
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package leaderelection implements leader election of a set of endpoints.
+// It uses an annotation in the endpoints object to store the record of the
+// election state. This implementation does not guarantee that only one
+// client is acting as a leader (a.k.a. fencing).
+//
+// A client only acts on timestamps captured locally to infer the state of the
+// leader election. The client does not consider timestamps in the leader
+// election record to be accurate because these timestamps may not have been
+// produced by a local clock. The implemention does not depend on their
+// accuracy and only uses their change to indicate that another client has
+// renewed the leader lease. Thus the implementation is tolerant to arbitrary
+// clock skew, but is not tolerant to arbitrary clock skew rate.
+//
+// However the level of tolerance to skew rate can be configured by setting
+// RenewDeadline and LeaseDuration appropriately. The tolerance expressed as a
+// maximum tolerated ratio of time passed on the fastest node to time passed on
+// the slowest node can be approximately achieved with a configuration that sets
+// the same ratio of LeaseDuration to RenewDeadline. For example if a user wanted
+// to tolerate some nodes progressing forward in time twice as fast as other nodes,
+// the user could set LeaseDuration to 60 seconds and RenewDeadline to 30 seconds.
+//
+// While not required, some method of clock synchronization between nodes in the
+// cluster is highly recommended. It's important to keep in mind when configuring
+// this client that the tolerance to skew rate varies inversely to master
+// availability.
+//
+// Larger clusters often have a more lenient SLA for API latency. This should be
+// taken into account when configuring the client. The rate of leader transitions
+// should be monitored and RetryPeriod and LeaseDuration should be increased
+// until the rate is stable and acceptably low. It's important to keep in mind
+// when configuring this client that the tolerance to API latency varies inversely
+// to master availability.
+//
+// DISCLAIMER: this is an alpha API. This library will likely change significantly
+// or even be removed entirely in subsequent releases. Depend on this API at
+// your own risk.
+package leaderelection
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/apimachinery/pkg/util/wait"
+	rl "k8s.io/client-go/tools/leaderelection/resourcelock"
+	"k8s.io/klog/v2"
+	"k8s.io/utils/clock"
+)
+
+const (
+	JitterFactor = 1.2
+)
+
+// NewLeaderElector creates a LeaderElector from a LeaderElectionConfig
+func NewLeaderElector(lec LeaderElectionConfig) (*LeaderElector, error) {
+	if lec.LeaseDuration <= lec.RenewDeadline {
+		return nil, fmt.Errorf("leaseDuration must be greater than renewDeadline")
+	}
+	if lec.RenewDeadline <= time.Duration(JitterFactor*float64(lec.RetryPeriod)) {
+		return nil, fmt.Errorf("renewDeadline must be greater than retryPeriod*JitterFactor")
+	}
+	if lec.LeaseDuration < 1 {
+		return nil, fmt.Errorf("leaseDuration must be greater than zero")
+	}
+	if lec.RenewDeadline < 1 {
+		return nil, fmt.Errorf("renewDeadline must be greater than zero")
+	}
+	if lec.RetryPeriod < 1 {
+		return nil, fmt.Errorf("retryPeriod must be greater than zero")
+	}
+	if lec.Callbacks.OnStartedLeading == nil {
+		return nil, fmt.Errorf("OnStartedLeading callback must not be nil")
+	}
+	if lec.Callbacks.OnStoppedLeading == nil {
+		return nil, fmt.Errorf("OnStoppedLeading callback must not be nil")
+	}
+
+	if lec.Lock == nil {
+		return nil, fmt.Errorf("Lock must not be nil.")
+	}
+	id := lec.Lock.Identity()
+	if id == "" {
+		return nil, fmt.Errorf("Lock identity is empty")
+	}
+
+	le := LeaderElector{
+		config:  lec,
+		clock:   clock.RealClock{},
+		metrics: globalMetricsFactory.newLeaderMetrics(),
+	}
+	le.metrics.leaderOff(le.config.Name)
+	return &le, nil
+}
+
+type LeaderElectionConfig struct {
+	// Lock is the resource that will be used for locking
+	Lock rl.Interface
+
+	// LeaseDuration is the duration that non-leader candidates will
+	// wait to force acquire leadership. This is measured against time of
+	// last observed ack.
+	//
+	// A client needs to wait a full LeaseDuration without observing a change to
+	// the record before it can attempt to take over. When all clients are
+	// shutdown and a new set of clients are started with different names against
+	// the same leader record, they must wait the full LeaseDuration before
+	// attempting to acquire the lease. Thus LeaseDuration should be as short as
+	// possible (within your tolerance for clock skew rate) to avoid a possible
+	// long waits in the scenario.
+	//
+	// Core clients default this value to 15 seconds.
+	LeaseDuration time.Duration
+	// RenewDeadline is the duration that the acting master will retry
+	// refreshing leadership before giving up.
+	//
+	// Core clients default this value to 10 seconds.
+	RenewDeadline time.Duration
+	// RetryPeriod is the duration the LeaderElector clients should wait
+	// between tries of actions.
+	//
+	// Core clients default this value to 2 seconds.
+	RetryPeriod time.Duration
+
+	// Callbacks are callbacks that are triggered during certain lifecycle
+	// events of the LeaderElector
+	Callbacks LeaderCallbacks
+
+	// WatchDog is the associated health checker
+	// WatchDog may be null if it's not needed/configured.
+	WatchDog *HealthzAdaptor
+
+	// ReleaseOnCancel should be set true if the lock should be released
+	// when the run context is cancelled. If you set this to true, you must
+	// ensure all code guarded by this lease has successfully completed
+	// prior to cancelling the context, or you may have two processes
+	// simultaneously acting on the critical path.
+	ReleaseOnCancel bool
+
+	// Name is the name of the resource lock for debugging
+	Name string
+}
+
+// LeaderCallbacks are callbacks that are triggered during certain
+// lifecycle events of the LeaderElector. These are invoked asynchronously.
+//
+// possible future callbacks:
+//   - OnChallenge()
+type LeaderCallbacks struct {
+	// OnStartedLeading is called when a LeaderElector client starts leading
+	OnStartedLeading func(context.Context)
+	// OnStoppedLeading is called when a LeaderElector client stops leading
+	OnStoppedLeading func()
+	// OnNewLeader is called when the client observes a leader that is
+	// not the previously observed leader. This includes the first observed
+	// leader when the client starts.
+	OnNewLeader func(identity string)
+}
+
+// LeaderElector is a leader election client.
+type LeaderElector struct {
+	config LeaderElectionConfig
+	// internal bookkeeping
+	observedRecord    rl.LeaderElectionRecord
+	observedRawRecord []byte
+	observedTime      time.Time
+	// used to implement OnNewLeader(), may lag slightly from the
+	// value observedRecord.HolderIdentity if the transition has
+	// not yet been reported.
+	reportedLeader string
+
+	// clock is wrapper around time to allow for less flaky testing
+	clock clock.Clock
+
+	// used to lock the observedRecord
+	observedRecordLock sync.Mutex
+
+	metrics leaderMetricsAdapter
+}
+
+// Run starts the leader election loop. Run will not return
+// before leader election loop is stopped by ctx or it has
+// stopped holding the leader lease
+func (le *LeaderElector) Run(ctx context.Context) {
+	defer runtime.HandleCrash()
+	defer le.config.Callbacks.OnStoppedLeading()
+
+	if !le.acquire(ctx) {
+		return // ctx signalled done
+	}
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+	go le.config.Callbacks.OnStartedLeading(ctx)
+	le.renew(ctx)
+}
+
+// RunOrDie starts a client with the provided config or panics if the config
+// fails to validate. RunOrDie blocks until leader election loop is
+// stopped by ctx or it has stopped holding the leader lease
+func RunOrDie(ctx context.Context, lec LeaderElectionConfig) {
+	le, err := NewLeaderElector(lec)
+	if err != nil {
+		panic(err)
+	}
+	if lec.WatchDog != nil {
+		lec.WatchDog.SetLeaderElection(le)
+	}
+	le.Run(ctx)
+}
+
+// GetLeader returns the identity of the last observed leader or returns the empty string if
+// no leader has yet been observed.
+// This function is for informational purposes. (e.g. monitoring, logs, etc.)
+func (le *LeaderElector) GetLeader() string {
+	return le.getObservedRecord().HolderIdentity
+}
+
+// IsLeader returns true if the last observed leader was this client else returns false.
+func (le *LeaderElector) IsLeader() bool {
+	return le.getObservedRecord().HolderIdentity == le.config.Lock.Identity()
+}
+
+// acquire loops calling tryAcquireOrRenew and returns true immediately when tryAcquireOrRenew succeeds.
+// Returns false if ctx signals done.
+func (le *LeaderElector) acquire(ctx context.Context) bool {
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+	succeeded := false
+	desc := le.config.Lock.Describe()
+	klog.Infof("attempting to acquire leader lease %v...", desc)
+	wait.JitterUntil(func() {
+		succeeded = le.tryAcquireOrRenew(ctx)
+		le.maybeReportTransition()
+		if !succeeded {
+			klog.V(4).Infof("failed to acquire lease %v", desc)
+			return
+		}
+		le.config.Lock.RecordEvent("became leader")
+		le.metrics.leaderOn(le.config.Name)
+		klog.Infof("successfully acquired lease %v", desc)
+		cancel()
+	}, le.config.RetryPeriod, JitterFactor, true, ctx.Done())
+	return succeeded
+}
+
+// renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails or ctx signals done.
+func (le *LeaderElector) renew(ctx context.Context) {
+	defer le.config.Lock.RecordEvent("stopped leading")
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+	wait.Until(func() {
+		timeoutCtx, timeoutCancel := context.WithTimeout(ctx, le.config.RenewDeadline)
+		defer timeoutCancel()
+		err := wait.PollImmediateUntil(le.config.RetryPeriod, func() (bool, error) {
+			return le.tryAcquireOrRenew(timeoutCtx), nil
+		}, timeoutCtx.Done())
+
+		le.maybeReportTransition()
+		desc := le.config.Lock.Describe()
+		if err == nil {
+			klog.V(5).Infof("successfully renewed lease %v", desc)
+			return
+		}
+		le.metrics.leaderOff(le.config.Name)
+		klog.Infof("failed to renew lease %v: %v", desc, err)
+		cancel()
+	}, le.config.RetryPeriod, ctx.Done())
+
+	// if we hold the lease, give it up
+	if le.config.ReleaseOnCancel {
+		le.release()
+	}
+}
+
+// release attempts to release the leader lease if we have acquired it.
+func (le *LeaderElector) release() bool {
+	if !le.IsLeader() {
+		return true
+	}
+	now := metav1.NewTime(le.clock.Now())
+	leaderElectionRecord := rl.LeaderElectionRecord{
+		LeaderTransitions:    le.observedRecord.LeaderTransitions,
+		LeaseDurationSeconds: 1,
+		RenewTime:            now,
+		AcquireTime:          now,
+	}
+	if err := le.config.Lock.Update(context.TODO(), leaderElectionRecord); err != nil {
+		klog.Errorf("Failed to release lock: %v", err)
+		return false
+	}
+
+	le.setObservedRecord(&leaderElectionRecord)
+	return true
+}
+
+// tryAcquireOrRenew tries to acquire a leader lease if it is not already acquired,
+// else it tries to renew the lease if it has already been acquired. Returns true
+// on success else returns false.
+func (le *LeaderElector) tryAcquireOrRenew(ctx context.Context) bool {
+	now := metav1.NewTime(le.clock.Now())
+	leaderElectionRecord := rl.LeaderElectionRecord{
+		HolderIdentity:       le.config.Lock.Identity(),
+		LeaseDurationSeconds: int(le.config.LeaseDuration / time.Second),
+		RenewTime:            now,
+		AcquireTime:          now,
+	}
+
+	// 1. obtain or create the ElectionRecord
+	oldLeaderElectionRecord, oldLeaderElectionRawRecord, err := le.config.Lock.Get(ctx)
+	if err != nil {
+		if !errors.IsNotFound(err) {
+			klog.Errorf("error retrieving resource lock %v: %v", le.config.Lock.Describe(), err)
+			return false
+		}
+		if err = le.config.Lock.Create(ctx, leaderElectionRecord); err != nil {
+			klog.Errorf("error initially creating leader election record: %v", err)
+			return false
+		}
+
+		le.setObservedRecord(&leaderElectionRecord)
+
+		return true
+	}
+
+	// 2. Record obtained, check the Identity & Time
+	if !bytes.Equal(le.observedRawRecord, oldLeaderElectionRawRecord) {
+		le.setObservedRecord(oldLeaderElectionRecord)
+
+		le.observedRawRecord = oldLeaderElectionRawRecord
+	}
+	if len(oldLeaderElectionRecord.HolderIdentity) > 0 &&
+		le.observedTime.Add(time.Second*time.Duration(oldLeaderElectionRecord.LeaseDurationSeconds)).After(now.Time) &&
+		!le.IsLeader() {
+		klog.V(4).Infof("lock is held by %v and has not yet expired", oldLeaderElectionRecord.HolderIdentity)
+		return false
+	}
+
+	// 3. We're going to try to update. The leaderElectionRecord is set to it's default
+	// here. Let's correct it before updating.
+	if le.IsLeader() {
+		leaderElectionRecord.AcquireTime = oldLeaderElectionRecord.AcquireTime
+		leaderElectionRecord.LeaderTransitions = oldLeaderElectionRecord.LeaderTransitions
+	} else {
+		leaderElectionRecord.LeaderTransitions = oldLeaderElectionRecord.LeaderTransitions + 1
+	}
+
+	// update the lock itself
+	if err = le.config.Lock.Update(ctx, leaderElectionRecord); err != nil {
+		klog.Errorf("Failed to update lock: %v", err)
+		return false
+	}
+
+	le.setObservedRecord(&leaderElectionRecord)
+	return true
+}
+
+func (le *LeaderElector) maybeReportTransition() {
+	if le.observedRecord.HolderIdentity == le.reportedLeader {
+		return
+	}
+	le.reportedLeader = le.observedRecord.HolderIdentity
+	if le.config.Callbacks.OnNewLeader != nil {
+		go le.config.Callbacks.OnNewLeader(le.reportedLeader)
+	}
+}
+
+// Check will determine if the current lease is expired by more than timeout.
+func (le *LeaderElector) Check(maxTolerableExpiredLease time.Duration) error {
+	if !le.IsLeader() {
+		// Currently not concerned with the case that we are hot standby
+		return nil
+	}
+	// If we are more than timeout seconds after the lease duration that is past the timeout
+	// on the lease renew. Time to start reporting ourselves as unhealthy. We should have
+	// died but conditions like deadlock can prevent this. (See #70819)
+	if le.clock.Since(le.observedTime) > le.config.LeaseDuration+maxTolerableExpiredLease {
+		return fmt.Errorf("failed election to renew leadership on lease %s", le.config.Name)
+	}
+
+	return nil
+}
+
+// setObservedRecord will set a new observedRecord and update observedTime to the current time.
+// Protect critical sections with lock.
+func (le *LeaderElector) setObservedRecord(observedRecord *rl.LeaderElectionRecord) {
+	le.observedRecordLock.Lock()
+	defer le.observedRecordLock.Unlock()
+
+	le.observedRecord = *observedRecord
+	le.observedTime = le.clock.Now()
+}
+
+// getObservedRecord returns observersRecord.
+// Protect critical sections with lock.
+func (le *LeaderElector) getObservedRecord() rl.LeaderElectionRecord {
+	le.observedRecordLock.Lock()
+	defer le.observedRecordLock.Unlock()
+
+	return le.observedRecord
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/metrics.go b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/metrics.go
new file mode 100644
index 000000000000..65917bf88e1d
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/metrics.go
@@ -0,0 +1,109 @@
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package leaderelection
+
+import (
+	"sync"
+)
+
+// This file provides abstractions for setting the provider (e.g., prometheus)
+// of metrics.
+
+type leaderMetricsAdapter interface {
+	leaderOn(name string)
+	leaderOff(name string)
+}
+
+// GaugeMetric represents a single numerical value that can arbitrarily go up
+// and down.
+type SwitchMetric interface {
+	On(name string)
+	Off(name string)
+}
+
+type noopMetric struct{}
+
+func (noopMetric) On(name string)  {}
+func (noopMetric) Off(name string) {}
+
+// defaultLeaderMetrics expects the caller to lock before setting any metrics.
+type defaultLeaderMetrics struct {
+	// leader's value indicates if the current process is the owner of name lease
+	leader SwitchMetric
+}
+
+func (m *defaultLeaderMetrics) leaderOn(name string) {
+	if m == nil {
+		return
+	}
+	m.leader.On(name)
+}
+
+func (m *defaultLeaderMetrics) leaderOff(name string) {
+	if m == nil {
+		return
+	}
+	m.leader.Off(name)
+}
+
+type noMetrics struct{}
+
+func (noMetrics) leaderOn(name string)  {}
+func (noMetrics) leaderOff(name string) {}
+
+// MetricsProvider generates various metrics used by the leader election.
+type MetricsProvider interface {
+	NewLeaderMetric() SwitchMetric
+}
+
+type noopMetricsProvider struct{}
+
+func (_ noopMetricsProvider) NewLeaderMetric() SwitchMetric {
+	return noopMetric{}
+}
+
+var globalMetricsFactory = leaderMetricsFactory{
+	metricsProvider: noopMetricsProvider{},
+}
+
+type leaderMetricsFactory struct {
+	metricsProvider MetricsProvider
+
+	onlyOnce sync.Once
+}
+
+func (f *leaderMetricsFactory) setProvider(mp MetricsProvider) {
+	f.onlyOnce.Do(func() {
+		f.metricsProvider = mp
+	})
+}
+
+func (f *leaderMetricsFactory) newLeaderMetrics() leaderMetricsAdapter {
+	mp := f.metricsProvider
+	if mp == (noopMetricsProvider{}) {
+		return noMetrics{}
+	}
+	return &defaultLeaderMetrics{
+		leader: mp.NewLeaderMetric(),
+	}
+}
+
+// SetProvider sets the metrics provider for all subsequently created work
+// queues. Only the first call has an effect.
+func SetProvider(metricsProvider MetricsProvider) {
+	globalMetricsFactory.setProvider(metricsProvider)
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/interface.go b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/interface.go
new file mode 100644
index 000000000000..483753d632ca
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/interface.go
@@ -0,0 +1,205 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package resourcelock
+
+import (
+	"context"
+	"fmt"
+	clientset "k8s.io/client-go/kubernetes"
+	restclient "k8s.io/client-go/rest"
+	"time"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	coordinationv1 "k8s.io/client-go/kubernetes/typed/coordination/v1"
+	corev1 "k8s.io/client-go/kubernetes/typed/core/v1"
+)
+
+const (
+	LeaderElectionRecordAnnotationKey = "control-plane.alpha.kubernetes.io/leader"
+	endpointsResourceLock             = "endpoints"
+	configMapsResourceLock            = "configmaps"
+	LeasesResourceLock                = "leases"
+	// When using endpointsLeasesResourceLock, you need to ensure that
+	// API Priority & Fairness is configured with non-default flow-schema
+	// that will catch the necessary operations on leader-election related
+	// endpoint objects.
+	//
+	// The example of such flow scheme could look like this:
+	//   apiVersion: flowcontrol.apiserver.k8s.io/v1beta2
+	//   kind: FlowSchema
+	//   metadata:
+	//     name: my-leader-election
+	//   spec:
+	//     distinguisherMethod:
+	//       type: ByUser
+	//     matchingPrecedence: 200
+	//     priorityLevelConfiguration:
+	//       name: leader-election   # reference the <leader-election> PL
+	//     rules:
+	//     - resourceRules:
+	//       - apiGroups:
+	//         - ""
+	//         namespaces:
+	//         - '*'
+	//         resources:
+	//         - endpoints
+	//         verbs:
+	//         - get
+	//         - create
+	//         - update
+	//       subjects:
+	//       - kind: ServiceAccount
+	//         serviceAccount:
+	//           name: '*'
+	//           namespace: kube-system
+	endpointsLeasesResourceLock = "endpointsleases"
+	// When using configMapsLeasesResourceLock, you need to ensure that
+	// API Priority & Fairness is configured with non-default flow-schema
+	// that will catch the necessary operations on leader-election related
+	// configmap objects.
+	//
+	// The example of such flow scheme could look like this:
+	//   apiVersion: flowcontrol.apiserver.k8s.io/v1beta2
+	//   kind: FlowSchema
+	//   metadata:
+	//     name: my-leader-election
+	//   spec:
+	//     distinguisherMethod:
+	//       type: ByUser
+	//     matchingPrecedence: 200
+	//     priorityLevelConfiguration:
+	//       name: leader-election   # reference the <leader-election> PL
+	//     rules:
+	//     - resourceRules:
+	//       - apiGroups:
+	//         - ""
+	//         namespaces:
+	//         - '*'
+	//         resources:
+	//         - configmaps
+	//         verbs:
+	//         - get
+	//         - create
+	//         - update
+	//       subjects:
+	//       - kind: ServiceAccount
+	//         serviceAccount:
+	//           name: '*'
+	//           namespace: kube-system
+	configMapsLeasesResourceLock = "configmapsleases"
+)
+
+// LeaderElectionRecord is the record that is stored in the leader election annotation.
+// This information should be used for observational purposes only and could be replaced
+// with a random string (e.g. UUID) with only slight modification of this code.
+// TODO(mikedanese): this should potentially be versioned
+type LeaderElectionRecord struct {
+	// HolderIdentity is the ID that owns the lease. If empty, no one owns this lease and
+	// all callers may acquire. Versions of this library prior to Kubernetes 1.14 will not
+	// attempt to acquire leases with empty identities and will wait for the full lease
+	// interval to expire before attempting to reacquire. This value is set to empty when
+	// a client voluntarily steps down.
+	HolderIdentity       string      `json:"holderIdentity"`
+	LeaseDurationSeconds int         `json:"leaseDurationSeconds"`
+	AcquireTime          metav1.Time `json:"acquireTime"`
+	RenewTime            metav1.Time `json:"renewTime"`
+	LeaderTransitions    int         `json:"leaderTransitions"`
+}
+
+// EventRecorder records a change in the ResourceLock.
+type EventRecorder interface {
+	Eventf(obj runtime.Object, eventType, reason, message string, args ...interface{})
+}
+
+// ResourceLockConfig common data that exists across different
+// resource locks
+type ResourceLockConfig struct {
+	// Identity is the unique string identifying a lease holder across
+	// all participants in an election.
+	Identity string
+	// EventRecorder is optional.
+	EventRecorder EventRecorder
+}
+
+// Interface offers a common interface for locking on arbitrary
+// resources used in leader election.  The Interface is used
+// to hide the details on specific implementations in order to allow
+// them to change over time.  This interface is strictly for use
+// by the leaderelection code.
+type Interface interface {
+	// Get returns the LeaderElectionRecord
+	Get(ctx context.Context) (*LeaderElectionRecord, []byte, error)
+
+	// Create attempts to create a LeaderElectionRecord
+	Create(ctx context.Context, ler LeaderElectionRecord) error
+
+	// Update will update and existing LeaderElectionRecord
+	Update(ctx context.Context, ler LeaderElectionRecord) error
+
+	// RecordEvent is used to record events
+	RecordEvent(string)
+
+	// Identity will return the locks Identity
+	Identity() string
+
+	// Describe is used to convert details on current resource lock
+	// into a string
+	Describe() string
+}
+
+// Manufacture will create a lock of a given type according to the input parameters
+func New(lockType string, ns string, name string, coreClient corev1.CoreV1Interface, coordinationClient coordinationv1.CoordinationV1Interface, rlc ResourceLockConfig) (Interface, error) {
+	leaseLock := &LeaseLock{
+		LeaseMeta: metav1.ObjectMeta{
+			Namespace: ns,
+			Name:      name,
+		},
+		Client:     coordinationClient,
+		LockConfig: rlc,
+	}
+	switch lockType {
+	case endpointsResourceLock:
+		return nil, fmt.Errorf("endpoints lock is removed, migrate to %s (using version v0.27.x)", endpointsLeasesResourceLock)
+	case configMapsResourceLock:
+		return nil, fmt.Errorf("configmaps lock is removed, migrate to %s (using version v0.27.x)", configMapsLeasesResourceLock)
+	case LeasesResourceLock:
+		return leaseLock, nil
+	case endpointsLeasesResourceLock:
+		return nil, fmt.Errorf("endpointsleases lock is removed, migrate to %s", LeasesResourceLock)
+	case configMapsLeasesResourceLock:
+		return nil, fmt.Errorf("configmapsleases lock is removed, migrated to %s", LeasesResourceLock)
+	default:
+		return nil, fmt.Errorf("Invalid lock-type %s", lockType)
+	}
+}
+
+// NewFromKubeconfig will create a lock of a given type according to the input parameters.
+// Timeout set for a client used to contact to Kubernetes should be lower than
+// RenewDeadline to keep a single hung request from forcing a leader loss.
+// Setting it to max(time.Second, RenewDeadline/2) as a reasonable heuristic.
+func NewFromKubeconfig(lockType string, ns string, name string, rlc ResourceLockConfig, kubeconfig *restclient.Config, renewDeadline time.Duration) (Interface, error) {
+	// shallow copy, do not modify the kubeconfig
+	config := *kubeconfig
+	timeout := renewDeadline / 2
+	if timeout < time.Second {
+		timeout = time.Second
+	}
+	config.Timeout = timeout
+	leaderElectionClient := clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "leader-election"))
+	return New(lockType, ns, name, leaderElectionClient.CoreV1(), leaderElectionClient.CoordinationV1(), rlc)
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/leaselock.go b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/leaselock.go
new file mode 100644
index 000000000000..8a9d7d60f2da
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/leaselock.go
@@ -0,0 +1,139 @@
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package resourcelock
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+
+	coordinationv1 "k8s.io/api/coordination/v1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	coordinationv1client "k8s.io/client-go/kubernetes/typed/coordination/v1"
+)
+
+type LeaseLock struct {
+	// LeaseMeta should contain a Name and a Namespace of a
+	// LeaseMeta object that the LeaderElector will attempt to lead.
+	LeaseMeta  metav1.ObjectMeta
+	Client     coordinationv1client.LeasesGetter
+	LockConfig ResourceLockConfig
+	lease      *coordinationv1.Lease
+}
+
+// Get returns the election record from a Lease spec
+func (ll *LeaseLock) Get(ctx context.Context) (*LeaderElectionRecord, []byte, error) {
+	lease, err := ll.Client.Leases(ll.LeaseMeta.Namespace).Get(ctx, ll.LeaseMeta.Name, metav1.GetOptions{})
+	if err != nil {
+		return nil, nil, err
+	}
+	ll.lease = lease
+	record := LeaseSpecToLeaderElectionRecord(&ll.lease.Spec)
+	recordByte, err := json.Marshal(*record)
+	if err != nil {
+		return nil, nil, err
+	}
+	return record, recordByte, nil
+}
+
+// Create attempts to create a Lease
+func (ll *LeaseLock) Create(ctx context.Context, ler LeaderElectionRecord) error {
+	var err error
+	ll.lease, err = ll.Client.Leases(ll.LeaseMeta.Namespace).Create(ctx, &coordinationv1.Lease{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      ll.LeaseMeta.Name,
+			Namespace: ll.LeaseMeta.Namespace,
+		},
+		Spec: LeaderElectionRecordToLeaseSpec(&ler),
+	}, metav1.CreateOptions{})
+	return err
+}
+
+// Update will update an existing Lease spec.
+func (ll *LeaseLock) Update(ctx context.Context, ler LeaderElectionRecord) error {
+	if ll.lease == nil {
+		return errors.New("lease not initialized, call get or create first")
+	}
+	ll.lease.Spec = LeaderElectionRecordToLeaseSpec(&ler)
+
+	lease, err := ll.Client.Leases(ll.LeaseMeta.Namespace).Update(ctx, ll.lease, metav1.UpdateOptions{})
+	if err != nil {
+		return err
+	}
+
+	ll.lease = lease
+	return nil
+}
+
+// RecordEvent in leader election while adding meta-data
+func (ll *LeaseLock) RecordEvent(s string) {
+	if ll.LockConfig.EventRecorder == nil {
+		return
+	}
+	events := fmt.Sprintf("%v %v", ll.LockConfig.Identity, s)
+	subject := &coordinationv1.Lease{ObjectMeta: ll.lease.ObjectMeta}
+	// Populate the type meta, so we don't have to get it from the schema
+	subject.Kind = "Lease"
+	subject.APIVersion = coordinationv1.SchemeGroupVersion.String()
+	ll.LockConfig.EventRecorder.Eventf(subject, corev1.EventTypeNormal, "LeaderElection", events)
+}
+
+// Describe is used to convert details on current resource lock
+// into a string
+func (ll *LeaseLock) Describe() string {
+	return fmt.Sprintf("%v/%v", ll.LeaseMeta.Namespace, ll.LeaseMeta.Name)
+}
+
+// Identity returns the Identity of the lock
+func (ll *LeaseLock) Identity() string {
+	return ll.LockConfig.Identity
+}
+
+func LeaseSpecToLeaderElectionRecord(spec *coordinationv1.LeaseSpec) *LeaderElectionRecord {
+	var r LeaderElectionRecord
+	if spec.HolderIdentity != nil {
+		r.HolderIdentity = *spec.HolderIdentity
+	}
+	if spec.LeaseDurationSeconds != nil {
+		r.LeaseDurationSeconds = int(*spec.LeaseDurationSeconds)
+	}
+	if spec.LeaseTransitions != nil {
+		r.LeaderTransitions = int(*spec.LeaseTransitions)
+	}
+	if spec.AcquireTime != nil {
+		r.AcquireTime = metav1.Time{Time: spec.AcquireTime.Time}
+	}
+	if spec.RenewTime != nil {
+		r.RenewTime = metav1.Time{Time: spec.RenewTime.Time}
+	}
+	return &r
+
+}
+
+func LeaderElectionRecordToLeaseSpec(ler *LeaderElectionRecord) coordinationv1.LeaseSpec {
+	leaseDurationSeconds := int32(ler.LeaseDurationSeconds)
+	leaseTransitions := int32(ler.LeaderTransitions)
+	return coordinationv1.LeaseSpec{
+		HolderIdentity:       &ler.HolderIdentity,
+		LeaseDurationSeconds: &leaseDurationSeconds,
+		AcquireTime:          &metav1.MicroTime{Time: ler.AcquireTime.Time},
+		RenewTime:            &metav1.MicroTime{Time: ler.RenewTime.Time},
+		LeaseTransitions:     &leaseTransitions,
+	}
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/multilock.go b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/multilock.go
new file mode 100644
index 000000000000..5ee1dcbb50f0
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/client-go/tools/leaderelection/resourcelock/multilock.go
@@ -0,0 +1,104 @@
+/*
+Copyright 2019 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package resourcelock
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+)
+
+const (
+	UnknownLeader = "leaderelection.k8s.io/unknown"
+)
+
+// MultiLock is used for lock's migration
+type MultiLock struct {
+	Primary   Interface
+	Secondary Interface
+}
+
+// Get returns the older election record of the lock
+func (ml *MultiLock) Get(ctx context.Context) (*LeaderElectionRecord, []byte, error) {
+	primary, primaryRaw, err := ml.Primary.Get(ctx)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	secondary, secondaryRaw, err := ml.Secondary.Get(ctx)
+	if err != nil {
+		// Lock is held by old client
+		if apierrors.IsNotFound(err) && primary.HolderIdentity != ml.Identity() {
+			return primary, primaryRaw, nil
+		}
+		return nil, nil, err
+	}
+
+	if primary.HolderIdentity != secondary.HolderIdentity {
+		primary.HolderIdentity = UnknownLeader
+		primaryRaw, err = json.Marshal(primary)
+		if err != nil {
+			return nil, nil, err
+		}
+	}
+	return primary, ConcatRawRecord(primaryRaw, secondaryRaw), nil
+}
+
+// Create attempts to create both primary lock and secondary lock
+func (ml *MultiLock) Create(ctx context.Context, ler LeaderElectionRecord) error {
+	err := ml.Primary.Create(ctx, ler)
+	if err != nil && !apierrors.IsAlreadyExists(err) {
+		return err
+	}
+	return ml.Secondary.Create(ctx, ler)
+}
+
+// Update will update and existing annotation on both two resources.
+func (ml *MultiLock) Update(ctx context.Context, ler LeaderElectionRecord) error {
+	err := ml.Primary.Update(ctx, ler)
+	if err != nil {
+		return err
+	}
+	_, _, err = ml.Secondary.Get(ctx)
+	if err != nil && apierrors.IsNotFound(err) {
+		return ml.Secondary.Create(ctx, ler)
+	}
+	return ml.Secondary.Update(ctx, ler)
+}
+
+// RecordEvent in leader election while adding meta-data
+func (ml *MultiLock) RecordEvent(s string) {
+	ml.Primary.RecordEvent(s)
+	ml.Secondary.RecordEvent(s)
+}
+
+// Describe is used to convert details on current resource lock
+// into a string
+func (ml *MultiLock) Describe() string {
+	return ml.Primary.Describe()
+}
+
+// Identity returns the Identity of the lock
+func (ml *MultiLock) Identity() string {
+	return ml.Primary.Identity()
+}
+
+func ConcatRawRecord(primaryRaw, secondaryRaw []byte) []byte {
+	return bytes.Join([][]byte{primaryRaw, secondaryRaw}, []byte(","))
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/OWNERS b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/OWNERS
new file mode 100644
index 000000000000..7243d3cc82a9
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/OWNERS
@@ -0,0 +1,13 @@
+# See the OWNERS docs at https://go.k8s.io/owners
+
+# Disable inheritance as this is an api owners file
+options:
+  no_parent_owners: true
+approvers:
+  - api-approvers
+reviewers:
+  - api-reviewers
+labels:
+  - kind/api-change
+  - sig/api-machinery
+  - sig/scheduling
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/doc.go b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/doc.go
new file mode 100644
index 000000000000..dd0a5a53a7b0
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/doc.go
@@ -0,0 +1,19 @@
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// +k8s:deepcopy-gen=package
+
+package config // import "k8s.io/component-base/config"
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/options/leaderelectionconfig.go b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/options/leaderelectionconfig.go
new file mode 100644
index 000000000000..bf2a44a0a832
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/options/leaderelectionconfig.go
@@ -0,0 +1,53 @@
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package options
+
+import (
+	"github.com/spf13/pflag"
+	"k8s.io/component-base/config"
+)
+
+// BindLeaderElectionFlags binds the LeaderElectionConfiguration struct fields to a flagset
+func BindLeaderElectionFlags(l *config.LeaderElectionConfiguration, fs *pflag.FlagSet) {
+	fs.BoolVar(&l.LeaderElect, "leader-elect", l.LeaderElect, ""+
+		"Start a leader election client and gain leadership before "+
+		"executing the main loop. Enable this when running replicated "+
+		"components for high availability.")
+	fs.DurationVar(&l.LeaseDuration.Duration, "leader-elect-lease-duration", l.LeaseDuration.Duration, ""+
+		"The duration that non-leader candidates will wait after observing a leadership "+
+		"renewal until attempting to acquire leadership of a led but unrenewed leader "+
+		"slot. This is effectively the maximum duration that a leader can be stopped "+
+		"before it is replaced by another candidate. This is only applicable if leader "+
+		"election is enabled.")
+	fs.DurationVar(&l.RenewDeadline.Duration, "leader-elect-renew-deadline", l.RenewDeadline.Duration, ""+
+		"The interval between attempts by the acting master to renew a leadership slot "+
+		"before it stops leading. This must be less than the lease duration. "+
+		"This is only applicable if leader election is enabled.")
+	fs.DurationVar(&l.RetryPeriod.Duration, "leader-elect-retry-period", l.RetryPeriod.Duration, ""+
+		"The duration the clients should wait between attempting acquisition and renewal "+
+		"of a leadership. This is only applicable if leader election is enabled.")
+	fs.StringVar(&l.ResourceLock, "leader-elect-resource-lock", l.ResourceLock, ""+
+		"The type of resource object that is used for locking during "+
+		"leader election. Supported options are 'leases', 'endpointsleases' "+
+		"and 'configmapsleases'.")
+	fs.StringVar(&l.ResourceName, "leader-elect-resource-name", l.ResourceName, ""+
+		"The name of resource object that is used for locking during "+
+		"leader election.")
+	fs.StringVar(&l.ResourceNamespace, "leader-elect-resource-namespace", l.ResourceNamespace, ""+
+		"The namespace of resource object that is used for locking during "+
+		"leader election.")
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/types.go b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/types.go
new file mode 100644
index 000000000000..e1b9469d76e7
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/types.go
@@ -0,0 +1,80 @@
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package config
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// ClientConnectionConfiguration contains details for constructing a client.
+type ClientConnectionConfiguration struct {
+	// kubeconfig is the path to a KubeConfig file.
+	Kubeconfig string
+	// acceptContentTypes defines the Accept header sent by clients when connecting to a server, overriding the
+	// default value of 'application/json'. This field will control all connections to the server used by a particular
+	// client.
+	AcceptContentTypes string
+	// contentType is the content type used when sending data to the server from this client.
+	ContentType string
+	// qps controls the number of queries per second allowed for this connection.
+	QPS float32
+	// burst allows extra queries to accumulate when a client is exceeding its rate.
+	Burst int32
+}
+
+// LeaderElectionConfiguration defines the configuration of leader election
+// clients for components that can run with leader election enabled.
+type LeaderElectionConfiguration struct {
+	// leaderElect enables a leader election client to gain leadership
+	// before executing the main loop. Enable this when running replicated
+	// components for high availability.
+	LeaderElect bool
+	// leaseDuration is the duration that non-leader candidates will wait
+	// after observing a leadership renewal until attempting to acquire
+	// leadership of a led but unrenewed leader slot. This is effectively the
+	// maximum duration that a leader can be stopped before it is replaced
+	// by another candidate. This is only applicable if leader election is
+	// enabled.
+	LeaseDuration metav1.Duration
+	// renewDeadline is the interval between attempts by the acting master to
+	// renew a leadership slot before it stops leading. This must be less
+	// than or equal to the lease duration. This is only applicable if leader
+	// election is enabled.
+	RenewDeadline metav1.Duration
+	// retryPeriod is the duration the clients should wait between attempting
+	// acquisition and renewal of a leadership. This is only applicable if
+	// leader election is enabled.
+	RetryPeriod metav1.Duration
+	// resourceLock indicates the resource object type that will be used to lock
+	// during leader election cycles.
+	ResourceLock string
+	// resourceName indicates the name of resource object that will be used to lock
+	// during leader election cycles.
+	ResourceName string
+	// resourceNamespace indicates the namespace of resource object that will be used to lock
+	// during leader election cycles.
+	ResourceNamespace string
+}
+
+// DebuggingConfiguration holds configuration for Debugging related features.
+type DebuggingConfiguration struct {
+	// enableProfiling enables profiling via web interface host:port/debug/pprof/
+	EnableProfiling bool
+	// enableContentionProfiling enables block profiling, if
+	// enableProfiling is true.
+	EnableContentionProfiling bool
+}
diff --git a/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/zz_generated.deepcopy.go b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/zz_generated.deepcopy.go
new file mode 100644
index 000000000000..fb0c1f1e6a89
--- /dev/null
+++ b/vertical-pod-autoscaler/vendor/k8s.io/component-base/config/zz_generated.deepcopy.go
@@ -0,0 +1,73 @@
+//go:build !ignore_autogenerated
+// +build !ignore_autogenerated
+
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by deepcopy-gen. DO NOT EDIT.
+
+package config
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClientConnectionConfiguration) DeepCopyInto(out *ClientConnectionConfiguration) {
+	*out = *in
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClientConnectionConfiguration.
+func (in *ClientConnectionConfiguration) DeepCopy() *ClientConnectionConfiguration {
+	if in == nil {
+		return nil
+	}
+	out := new(ClientConnectionConfiguration)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DebuggingConfiguration) DeepCopyInto(out *DebuggingConfiguration) {
+	*out = *in
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DebuggingConfiguration.
+func (in *DebuggingConfiguration) DeepCopy() *DebuggingConfiguration {
+	if in == nil {
+		return nil
+	}
+	out := new(DebuggingConfiguration)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LeaderElectionConfiguration) DeepCopyInto(out *LeaderElectionConfiguration) {
+	*out = *in
+	out.LeaseDuration = in.LeaseDuration
+	out.RenewDeadline = in.RenewDeadline
+	out.RetryPeriod = in.RetryPeriod
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LeaderElectionConfiguration.
+func (in *LeaderElectionConfiguration) DeepCopy() *LeaderElectionConfiguration {
+	if in == nil {
+		return nil
+	}
+	out := new(LeaderElectionConfiguration)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/vertical-pod-autoscaler/vendor/modules.txt b/vertical-pod-autoscaler/vendor/modules.txt
index 10accfb3f565..b65d914a54d7 100644
--- a/vertical-pod-autoscaler/vendor/modules.txt
+++ b/vertical-pod-autoscaler/vendor/modules.txt
@@ -314,6 +314,7 @@ k8s.io/apimachinery/pkg/util/net
 k8s.io/apimachinery/pkg/util/runtime
 k8s.io/apimachinery/pkg/util/sets
 k8s.io/apimachinery/pkg/util/strategicpatch
+k8s.io/apimachinery/pkg/util/uuid
 k8s.io/apimachinery/pkg/util/validation
 k8s.io/apimachinery/pkg/util/validation/field
 k8s.io/apimachinery/pkg/util/version
@@ -625,6 +626,8 @@ k8s.io/client-go/tools/clientcmd
 k8s.io/client-go/tools/clientcmd/api
 k8s.io/client-go/tools/clientcmd/api/latest
 k8s.io/client-go/tools/clientcmd/api/v1
+k8s.io/client-go/tools/leaderelection
+k8s.io/client-go/tools/leaderelection/resourcelock
 k8s.io/client-go/tools/metrics
 k8s.io/client-go/tools/pager
 k8s.io/client-go/tools/record
@@ -640,6 +643,8 @@ k8s.io/client-go/util/workqueue
 # k8s.io/component-base v0.28.3 => k8s.io/component-base v0.28.3
 ## explicit; go 1.20
 k8s.io/component-base/cli/flag
+k8s.io/component-base/config
+k8s.io/component-base/config/options
 k8s.io/component-base/metrics
 k8s.io/component-base/metrics/legacyregistry
 k8s.io/component-base/metrics/prometheus/restclient

From 211ecdc329f2a2a6767cccc915fa9d7802024a54 Mon Sep 17 00:00:00 2001
From: Ismail Alidzhikov <i.alidjikov@gmail.com>
Date: Fri, 28 Jun 2024 09:57:37 +0300
Subject: [PATCH 21/30] vpa-recommender: Add support for leader election

---
 vertical-pod-autoscaler/FAQ.md                |  7 ++
 vertical-pod-autoscaler/deploy/vpa-rbac.yaml  | 37 ++++++++
 .../pkg/recommender/main.go                   | 92 +++++++++++++++++--
 3 files changed, 127 insertions(+), 9 deletions(-)

diff --git a/vertical-pod-autoscaler/FAQ.md b/vertical-pod-autoscaler/FAQ.md
index 0e36a7649043..e9451ccb5229 100644
--- a/vertical-pod-autoscaler/FAQ.md
+++ b/vertical-pod-autoscaler/FAQ.md
@@ -203,6 +203,13 @@ Name | Type | Description | Default
 `memory-histogram-decay-half-life` | Duration | The amount of time it takes a historical memory usage sample to lose half of its weight. In other words, a fresh usage sample is twice as 'important' as one with age equal to the half life period. | model.DefaultMemoryHistogramDecayHalfLife
 `cpu-histogram-decay-half-life` | Duration | The amount of time it takes a historical CPU usage sample to lose half of its weight. | model.DefaultCPUHistogramDecayHalfLife
 `cpu-integer-post-processor-enabled` | Bool | Enable the CPU integer recommendation post processor | false
+`leader-elect` | Bool | Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability. | false
+`leader-elect-lease-duration` | Duration | The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. | 15s
+`leader-elect-renew-deadline` | Duration | The interval between attempts by the acting master to renew a leadership slot before it stops leading. This must be less than the lease duration. This is only applicable if leader election is enabled. | 10s
+`leader-elect-resource-lock` | String | The type of resource object that is used for locking during leader election. Supported options are 'leases', 'endpointsleases' and 'configmapsleases'. | "leases"
+`leader-elect-resource-name` | String | The name of resource object that is used for locking during leader election. | "vpa-recommender"
+`leader-elect-resource-namespace` | String | The namespace of resource object that is used for locking during leader election. | "kube-system"
+`leader-elect-retry-period` | Duration | The duration the clients should wait between attempting acquisition and renewal of a leadership. This is only applicable if leader election is enabled. | 2s
 
 ### What are the parameters to VPA updater?
 
diff --git a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml
index c182e8eb07c9..b04707a19f9b 100644
--- a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml
+++ b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml
@@ -394,3 +394,40 @@ subjects:
   - kind: ServiceAccount
     name: vpa-updater
     namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: system:leader-locking-vpa-recommender
+  namespace: kube-system
+rules:
+  - apiGroups:
+      - "coordination.k8s.io"
+    resources:
+      - leases
+    verbs:
+      - create
+  - apiGroups:
+      - "coordination.k8s.io"
+    resourceNames:
+      - vpa-recommender
+    resources:
+      - leases
+    verbs:
+      - get
+      - watch
+      - update
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: system:leader-locking-vpa-recommender
+  namespace: kube-system
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: system:leader-locking-vpa-recommender
+subjects:
+  - kind: ServiceAccount
+    name: vpa-recommender
+    namespace: kube-system
diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go
index 595f06162701..ada01fd68c0b 100644
--- a/vertical-pod-autoscaler/pkg/recommender/main.go
+++ b/vertical-pod-autoscaler/pkg/recommender/main.go
@@ -19,15 +19,22 @@ package main
 import (
 	"context"
 	"flag"
+	"os"
 	"time"
 
-	resourceclient "k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1"
-
+	"github.com/spf13/pflag"
 	apiv1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/uuid"
 	"k8s.io/client-go/informers"
 	kube_client "k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/leaderelection"
+	"k8s.io/client-go/tools/leaderelection/resourcelock"
 	kube_flag "k8s.io/component-base/cli/flag"
-	klog "k8s.io/klog/v2"
+	componentbaseconfig "k8s.io/component-base/config"
+	componentbaseoptions "k8s.io/component-base/config/options"
+	"k8s.io/klog/v2"
+	resourceclient "k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1"
 
 	"k8s.io/autoscaler/vertical-pod-autoscaler/common"
 	vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned"
@@ -39,7 +46,7 @@ import (
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/routines"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target"
-	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
+	controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher"
 	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
 	metrics_quality "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/quality"
 	metrics_recommender "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/recommender"
@@ -107,9 +114,81 @@ const (
 
 func main() {
 	klog.InitFlags(nil)
+
+	leaderElection := defaultLeaderElectionConfiguration()
+	componentbaseoptions.BindLeaderElectionFlags(&leaderElection, pflag.CommandLine)
+
 	kube_flag.InitFlags()
 	klog.V(1).Infof("Vertical Pod Autoscaler %s Recommender: %v", common.VerticalPodAutoscalerVersion, *recommenderName)
 
+	healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval*5, true)
+	metrics.Initialize(*address, healthCheck)
+	metrics_recommender.Register()
+	metrics_quality.Register()
+
+	if !leaderElection.LeaderElect {
+		run(healthCheck)
+	} else {
+		id, err := os.Hostname()
+		if err != nil {
+			klog.Fatalf("Unable to get hostname: %v", err)
+		}
+		id = id + "_" + string(uuid.NewUUID())
+
+		config := common.CreateKubeConfigOrDie(*kubeconfig, float32(*kubeApiQps), int(*kubeApiBurst))
+		kubeClient := kube_client.NewForConfigOrDie(config)
+
+		lock, err := resourcelock.New(
+			leaderElection.ResourceLock,
+			leaderElection.ResourceNamespace,
+			leaderElection.ResourceName,
+			kubeClient.CoreV1(),
+			kubeClient.CoordinationV1(),
+			resourcelock.ResourceLockConfig{
+				Identity: id,
+			},
+		)
+		if err != nil {
+			klog.Fatalf("Unable to create leader election lock: %v", err)
+		}
+
+		leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{
+			Lock:            lock,
+			LeaseDuration:   leaderElection.LeaseDuration.Duration,
+			RenewDeadline:   leaderElection.RenewDeadline.Duration,
+			RetryPeriod:     leaderElection.RetryPeriod.Duration,
+			ReleaseOnCancel: true,
+			Callbacks: leaderelection.LeaderCallbacks{
+				OnStartedLeading: func(_ context.Context) {
+					run(healthCheck)
+				},
+				OnStoppedLeading: func() {
+					klog.Fatal("lost master")
+				},
+			},
+		})
+	}
+}
+
+const (
+	defaultLeaseDuration = 15 * time.Second
+	defaultRenewDeadline = 10 * time.Second
+	defaultRetryPeriod   = 2 * time.Second
+)
+
+func defaultLeaderElectionConfiguration() componentbaseconfig.LeaderElectionConfiguration {
+	return componentbaseconfig.LeaderElectionConfiguration{
+		LeaderElect:       false,
+		LeaseDuration:     metav1.Duration{Duration: defaultLeaseDuration},
+		RenewDeadline:     metav1.Duration{Duration: defaultRenewDeadline},
+		RetryPeriod:       metav1.Duration{Duration: defaultRetryPeriod},
+		ResourceLock:      resourcelock.LeasesResourceLock,
+		ResourceName:      "vpa-recommender",
+		ResourceNamespace: metav1.NamespaceSystem,
+	}
+}
+
+func run(healthCheck *metrics.HealthCheck) {
 	config := common.CreateKubeConfigOrDie(*kubeconfig, float32(*kubeApiQps), int(*kubeApiBurst))
 	kubeClient := kube_client.NewForConfigOrDie(config)
 	clusterState := model.NewClusterState(aggregateContainerStateGCInterval)
@@ -119,11 +198,6 @@ func main() {
 
 	model.InitializeAggregationsConfig(model.NewAggregationsConfig(*memoryAggregationInterval, *memoryAggregationIntervalCount, *memoryHistogramDecayHalfLife, *cpuHistogramDecayHalfLife, *oomBumpUpRatio, *oomMinBumpUp))
 
-	healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval*5, true)
-	metrics.Initialize(*address, healthCheck)
-	metrics_recommender.Register()
-	metrics_quality.Register()
-
 	useCheckpoints := *storage != "prometheus"
 
 	var postProcessors []routines.RecommendationPostProcessor

From d5c12cf95986ba7d6aac71557ba8726473cf6a6a Mon Sep 17 00:00:00 2001
From: Ismail Alidzhikov <i.alidjikov@gmail.com>
Date: Tue, 2 Jul 2024 09:39:11 +0300
Subject: [PATCH 22/30] Fix the failing liveness probe for the non-leader
 replica

---
 .../pkg/admission-controller/main.go                |  2 +-
 vertical-pod-autoscaler/pkg/recommender/main.go     |  5 ++++-
 vertical-pod-autoscaler/pkg/updater/main.go         |  6 +++++-
 .../pkg/utils/metrics/healthcheck.go                | 13 +++++++++++--
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/vertical-pod-autoscaler/pkg/admission-controller/main.go b/vertical-pod-autoscaler/pkg/admission-controller/main.go
index 835e465cfae9..16a3500e58ef 100644
--- a/vertical-pod-autoscaler/pkg/admission-controller/main.go
+++ b/vertical-pod-autoscaler/pkg/admission-controller/main.go
@@ -83,7 +83,7 @@ func main() {
 	kube_flag.InitFlags()
 	klog.V(1).Infof("Vertical Pod Autoscaler %s Admission Controller", common.VerticalPodAutoscalerVersion)
 
-	healthCheck := metrics.NewHealthCheck(time.Minute, false)
+	healthCheck := metrics.NewHealthCheck(time.Minute)
 	metrics.Initialize(*address, healthCheck)
 	metrics_admission.Register()
 
diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go
index ada01fd68c0b..f87831b007ba 100644
--- a/vertical-pod-autoscaler/pkg/recommender/main.go
+++ b/vertical-pod-autoscaler/pkg/recommender/main.go
@@ -121,7 +121,7 @@ func main() {
 	kube_flag.InitFlags()
 	klog.V(1).Infof("Vertical Pod Autoscaler %s Recommender: %v", common.VerticalPodAutoscalerVersion, *recommenderName)
 
-	healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval*5, true)
+	healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval * 5)
 	metrics.Initialize(*address, healthCheck)
 	metrics_recommender.Register()
 	metrics_quality.Register()
@@ -285,6 +285,9 @@ func run(healthCheck *metrics.HealthCheck) {
 		recommender.GetClusterStateFeeder().InitFromHistoryProvider(provider)
 	}
 
+	// Start updating health check endpoint.
+	healthCheck.StartMonitoring()
+
 	ticker := time.Tick(*metricsFetcherInterval)
 	for range ticker {
 		recommender.RunOnce()
diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go
index c82397f572e6..7589005a164e 100644
--- a/vertical-pod-autoscaler/pkg/updater/main.go
+++ b/vertical-pod-autoscaler/pkg/updater/main.go
@@ -92,7 +92,7 @@ func main() {
 	kube_flag.InitFlags()
 	klog.V(1).Infof("Vertical Pod Autoscaler %s Updater", common.VerticalPodAutoscalerVersion)
 
-	healthCheck := metrics.NewHealthCheck(*updaterInterval*5, true)
+	healthCheck := metrics.NewHealthCheck(*updaterInterval * 5)
 	metrics.Initialize(*address, healthCheck)
 	metrics_updater.Register()
 
@@ -195,6 +195,10 @@ func run(healthCheck *metrics.HealthCheck) {
 	if err != nil {
 		klog.Fatalf("Failed to create updater: %v", err)
 	}
+
+	// Start updating health check endpoint.
+	healthCheck.StartMonitoring()
+
 	ticker := time.Tick(*updaterInterval)
 	for range ticker {
 		ctx, cancel := context.WithTimeout(context.Background(), *updaterInterval)
diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/healthcheck.go b/vertical-pod-autoscaler/pkg/utils/metrics/healthcheck.go
index 78417dc13789..44003603a63c 100644
--- a/vertical-pod-autoscaler/pkg/utils/metrics/healthcheck.go
+++ b/vertical-pod-autoscaler/pkg/utils/metrics/healthcheck.go
@@ -35,15 +35,24 @@ type HealthCheck struct {
 }
 
 // NewHealthCheck builds new HealthCheck object with given timeout.
-func NewHealthCheck(activityTimeout time.Duration, checkTimeout bool) *HealthCheck {
+func NewHealthCheck(activityTimeout time.Duration) *HealthCheck {
 	return &HealthCheck{
 		activityTimeout: activityTimeout,
-		checkTimeout:    checkTimeout,
+		checkTimeout:    false,
 		lastActivity:    time.Now(),
 		mutex:           &sync.Mutex{},
 	}
 }
 
+// StartMonitoring activates checks for the component inactivity.
+func (hc *HealthCheck) StartMonitoring() {
+	hc.mutex.Lock()
+	defer hc.mutex.Unlock()
+
+	hc.checkTimeout = true
+	hc.lastActivity = time.Now()
+}
+
 // checkLastActivity returns true if the last activity was too long ago, with duration from it.
 func (hc *HealthCheck) checkLastActivity() (bool, time.Duration) {
 	hc.mutex.Lock()

From 2824fc9b161b4e30c9b623c621e9a60c696e7693 Mon Sep 17 00:00:00 2001
From: Yaroslava Serdiuk <yaroslava@google.com>
Date: Tue, 2 Jul 2024 09:16:35 +0000
Subject: [PATCH 23/30] Do not fail if multiple ProvReqs are injected

---
 .../besteffortatomic/provisioning_class.go    | 10 ++--
 .../checkcapacity/provisioningclass.go        | 12 ++---
 .../provreqclient/client.go                   | 46 ++++++++++------
 .../provreqclient/client_test.go              | 54 +++++++------------
 4 files changed, 60 insertions(+), 62 deletions(-)

diff --git a/cluster-autoscaler/provisioningrequest/besteffortatomic/provisioning_class.go b/cluster-autoscaler/provisioningrequest/besteffortatomic/provisioning_class.go
index 0c7091c92e56..4adf8471d427 100644
--- a/cluster-autoscaler/provisioningrequest/besteffortatomic/provisioning_class.go
+++ b/cluster-autoscaler/provisioningrequest/besteffortatomic/provisioning_class.go
@@ -81,13 +81,13 @@ func (o *bestEffortAtomicProvClass) Provision(
 	if len(unschedulablePods) == 0 {
 		return &status.ScaleUpStatus{Result: status.ScaleUpNotTried}, nil
 	}
-	pr, err := provreqclient.ProvisioningRequestForPods(o.client, unschedulablePods)
-	if err != nil {
-		return status.UpdateScaleUpError(&status.ScaleUpStatus{}, errors.NewAutoscalerError(errors.InternalError, err.Error()))
-	}
-	if pr.Spec.ProvisioningClassName != v1beta1.ProvisioningClassBestEffortAtomicScaleUp {
+	prs := provreqclient.ProvisioningRequestsForPods(o.client, unschedulablePods)
+	prs = provreqclient.FilterOutProvisioningClass(prs, v1beta1.ProvisioningClassBestEffortAtomicScaleUp)
+	if len(prs) == 0 {
 		return &status.ScaleUpStatus{Result: status.ScaleUpNotTried}, nil
 	}
+	// Pick 1 ProvisioningRequest.
+	pr := prs[0]
 
 	o.context.ClusterSnapshot.Fork()
 	defer o.context.ClusterSnapshot.Revert()
diff --git a/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass.go b/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass.go
index d535d433a648..c87f73bb73fe 100644
--- a/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass.go
+++ b/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass.go
@@ -73,14 +73,14 @@ func (o *checkCapacityProvClass) Provision(
 	if len(unschedulablePods) == 0 {
 		return &status.ScaleUpStatus{Result: status.ScaleUpNotTried}, nil
 	}
-	pr, err := provreqclient.ProvisioningRequestForPods(o.client, unschedulablePods)
-	if err != nil {
-		return status.UpdateScaleUpError(&status.ScaleUpStatus{}, errors.NewAutoscalerError(errors.InternalError, err.Error()))
-	}
-	if pr.Spec.ProvisioningClassName != v1beta1.ProvisioningClassCheckCapacity {
+
+	prs := provreqclient.ProvisioningRequestsForPods(o.client, unschedulablePods)
+	prs = provreqclient.FilterOutProvisioningClass(prs, v1beta1.ProvisioningClassCheckCapacity)
+	if len(prs) == 0 {
 		return &status.ScaleUpStatus{Result: status.ScaleUpNotTried}, nil
 	}
-
+	// Pick 1 ProvisioningRequest.
+	pr := prs[0]
 	o.context.ClusterSnapshot.Fork()
 	defer o.context.ClusterSnapshot.Revert()
 
diff --git a/cluster-autoscaler/provisioningrequest/provreqclient/client.go b/cluster-autoscaler/provisioningrequest/provreqclient/client.go
index f7ead2d5b3ca..ddfe8c9f40cd 100644
--- a/cluster-autoscaler/provisioningrequest/provreqclient/client.go
+++ b/cluster-autoscaler/provisioningrequest/provreqclient/client.go
@@ -178,30 +178,31 @@ func newPodTemplatesLister(client *kubernetes.Clientset, stopChannel <-chan stru
 	return podTemplLister, nil
 }
 
-// ProvisioningRequestForPods check that all pods belong to one ProvisioningRequest and return it.
-func ProvisioningRequestForPods(client *ProvisioningRequestClient, unschedulablePods []*apiv1.Pod) (*provreqwrapper.ProvisioningRequest, error) {
+// ProvisioningRequestsForPods check that all pods belong to one ProvisioningRequest and return it.
+func ProvisioningRequestsForPods(client *ProvisioningRequestClient, unschedulablePods []*apiv1.Pod) []*provreqwrapper.ProvisioningRequest {
+	prMap := make(map[string]*provreqwrapper.ProvisioningRequest)
+	prList := []*provreqwrapper.ProvisioningRequest{}
 	if len(unschedulablePods) == 0 {
-		return nil, fmt.Errorf("empty unschedulablePods list")
-	}
-	if unschedulablePods[0].OwnerReferences == nil || len(unschedulablePods[0].OwnerReferences) == 0 {
-		return nil, fmt.Errorf("pod %s has no OwnerReference", unschedulablePods[0].Name)
-	}
-	provReq, err := client.ProvisioningRequest(unschedulablePods[0].Namespace, unschedulablePods[0].OwnerReferences[0].Name)
-	if err != nil {
-		return nil, fmt.Errorf("failed retrive ProvisioningRequest from unscheduled pods, err: %v", err)
+		return prList
 	}
 	for _, pod := range unschedulablePods {
-		if pod.Namespace != unschedulablePods[0].Namespace {
-			return nil, fmt.Errorf("pods %s and %s are from different namespaces", pod.Name, unschedulablePods[0].Name)
-		}
 		if pod.OwnerReferences == nil || len(pod.OwnerReferences) == 0 {
-			return nil, fmt.Errorf("pod %s has no OwnerReference", pod.Name)
+			klog.Errorf("pod %s has no OwnerReference", pod.Name)
+			continue
+		}
+		provReq, err := client.ProvisioningRequest(pod.Namespace, pod.OwnerReferences[0].Name)
+		if err != nil {
+			klog.Errorf("failed retrive ProvisioningRequest from unscheduled pods, err: %v", err)
+			continue
 		}
-		if pod.OwnerReferences[0].Name != unschedulablePods[0].OwnerReferences[0].Name {
-			return nil, fmt.Errorf("pods %s and %s have different OwnerReference", pod.Name, unschedulablePods[0].Name)
+		if prMap[provReq.Name] == nil {
+			prMap[provReq.Name] = provReq
 		}
 	}
-	return provReq, nil
+	for _, pr := range prMap {
+		prList = append(prList, pr)
+	}
+	return prList
 }
 
 // DeleteProvisioningRequest deletes the given ProvisioningRequest CR using the ProvisioningRequestInterface and returns an error in case of failure.
@@ -216,3 +217,14 @@ func (c *ProvisioningRequestClient) DeleteProvisioningRequest(pr *v1beta1.Provis
 	klog.V(4).Infof("Deleted ProvisioningRequest %s/%s", pr.Namespace, pr.Name)
 	return nil
 }
+
+// FilterOutProvisioningClass filters out ProvReqs that belongs to certain Provisioning Class
+func FilterOutProvisioningClass(prList []*provreqwrapper.ProvisioningRequest, class string) []*provreqwrapper.ProvisioningRequest {
+	newPrList := []*provreqwrapper.ProvisioningRequest{}
+	for _, pr := range prList {
+		if pr.Spec.ProvisioningClassName == class {
+			newPrList = append(newPrList, pr)
+		}
+	}
+	return newPrList
+}
diff --git a/cluster-autoscaler/provisioningrequest/provreqclient/client_test.go b/cluster-autoscaler/provisioningrequest/provreqclient/client_test.go
index 15f9e10c3928..0ec9a610c62f 100644
--- a/cluster-autoscaler/provisioningrequest/provreqclient/client_test.go
+++ b/cluster-autoscaler/provisioningrequest/provreqclient/client_test.go
@@ -49,7 +49,7 @@ func TestFetchPodTemplates(t *testing.T) {
 	}
 }
 
-func TestProvisioningRequestForPods(t *testing.T) {
+func TestProvisioningRequestsForPods(t *testing.T) {
 	checkCapacityProvReq := provreqwrapper.BuildTestProvisioningRequest("ns", "check-capacity", "1m", "100", "", int32(100), false, time.Now(), v1beta1.ProvisioningClassCheckCapacity)
 	customProvReq := provreqwrapper.BuildTestProvisioningRequest("ns", "custom", "1m", "100", "", int32(100), false, time.Now(), "custom")
 	checkCapacityPods, _ := pods.PodsForProvisioningRequest(checkCapacityProvReq)
@@ -57,55 +57,41 @@ func TestProvisioningRequestForPods(t *testing.T) {
 	regularPod := BuildTestPod("p1", 600, 100)
 	client := NewFakeProvisioningRequestClient(context.Background(), t, checkCapacityProvReq, customProvReq)
 	testCases := []struct {
-		name      string
-		pods      []*apiv1.Pod
-		className string
-		err       bool
-		pr        *provreqwrapper.ProvisioningRequest
+		name string
+		pods []*apiv1.Pod
+		err  bool
+		prs  []*provreqwrapper.ProvisioningRequest
 	}{
 		{
-			name:      "no pods",
-			pods:      []*apiv1.Pod{},
-			className: "some-class",
-			err:       true,
+			name: "no pods",
+			pods: []*apiv1.Pod{},
 		},
 		{
-			name:      "pods from one Provisioning Class",
-			pods:      checkCapacityPods,
-			className: v1beta1.ProvisioningClassCheckCapacity,
-			pr:        checkCapacityProvReq,
+			name: "pods from one Provisioning Class",
+			pods: checkCapacityPods,
+			prs:  []*provreqwrapper.ProvisioningRequest{checkCapacityProvReq},
 		},
 		{
-			name:      "pods from different Provisioning Classes",
-			pods:      append(checkCapacityPods, customProvReqPods...),
-			className: v1beta1.ProvisioningClassCheckCapacity,
-			err:       true,
+			name: "pods from different Provisioning Classes",
+			pods: append(checkCapacityPods, customProvReqPods...),
+			prs:  []*provreqwrapper.ProvisioningRequest{checkCapacityProvReq, customProvReq},
 		},
 		{
-			name:      "regular pod",
-			pods:      []*apiv1.Pod{regularPod},
-			className: v1beta1.ProvisioningClassCheckCapacity,
-			err:       true,
+			name: "regular pod",
+			pods: []*apiv1.Pod{regularPod},
 		},
 		{
-			name:      "provreq pods and regular pod",
-			pods:      append(checkCapacityPods, regularPod),
-			className: v1beta1.ProvisioningClassCheckCapacity,
-			err:       true,
+			name: "provreq pods and regular pod",
+			pods: append(checkCapacityPods, regularPod),
+			prs:  []*provreqwrapper.ProvisioningRequest{checkCapacityProvReq},
 		},
 	}
 	for _, tc := range testCases {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			t.Parallel()
-			pr, err := ProvisioningRequestForPods(client, tc.pods)
-			if tc.err {
-				assert.Error(t, err)
-			} else {
-				assert.NoError(t, err)
-				assert.Equal(t, pr, tc.pr)
-				assert.Equal(t, pr.Spec.ProvisioningClassName, tc.className)
-			}
+			got := ProvisioningRequestsForPods(client, tc.pods)
+			assert.ElementsMatch(t, got, tc.prs)
 		})
 	}
 }

From 81b6d3d7e8abc9f663b7f18e768ad3383c848292 Mon Sep 17 00:00:00 2001
From: Yaroslava Serdiuk <yaroslava@google.com>
Date: Tue, 2 Jul 2024 15:56:22 +0300
Subject: [PATCH 24/30] Update
 cluster-autoscaler/provisioningrequest/provreqclient/client.go
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Bartek Wróblewski <bwroblewski@google.com>
---
 cluster-autoscaler/provisioningrequest/provreqclient/client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cluster-autoscaler/provisioningrequest/provreqclient/client.go b/cluster-autoscaler/provisioningrequest/provreqclient/client.go
index ddfe8c9f40cd..e05d2bd02428 100644
--- a/cluster-autoscaler/provisioningrequest/provreqclient/client.go
+++ b/cluster-autoscaler/provisioningrequest/provreqclient/client.go
@@ -195,7 +195,7 @@ func ProvisioningRequestsForPods(client *ProvisioningRequestClient, unschedulabl
 			klog.Errorf("failed retrive ProvisioningRequest from unscheduled pods, err: %v", err)
 			continue
 		}
-		if prMap[provReq.Name] == nil {
+		if _, found := prMap[provReq.Name]; !found {
 			prMap[provReq.Name] = provReq
 		}
 	}

From 195303198682bd73daf3c8478d7e812ee833f15d Mon Sep 17 00:00:00 2001
From: Yaroslava Serdiuk <yaroslava@google.com>
Date: Tue, 2 Jul 2024 15:56:33 +0300
Subject: [PATCH 25/30] Update
 cluster-autoscaler/provisioningrequest/provreqclient/client.go
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Bartek Wróblewski <bwroblewski@google.com>
---
 cluster-autoscaler/provisioningrequest/provreqclient/client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cluster-autoscaler/provisioningrequest/provreqclient/client.go b/cluster-autoscaler/provisioningrequest/provreqclient/client.go
index e05d2bd02428..d8963a4d9978 100644
--- a/cluster-autoscaler/provisioningrequest/provreqclient/client.go
+++ b/cluster-autoscaler/provisioningrequest/provreqclient/client.go
@@ -192,7 +192,7 @@ func ProvisioningRequestsForPods(client *ProvisioningRequestClient, unschedulabl
 		}
 		provReq, err := client.ProvisioningRequest(pod.Namespace, pod.OwnerReferences[0].Name)
 		if err != nil {
-			klog.Errorf("failed retrive ProvisioningRequest from unscheduled pods, err: %v", err)
+			klog.Errorf("failed to retrieve ProvisioningRequest from unschedulable pod, err: %v", err)
 			continue
 		}
 		if _, found := prMap[provReq.Name]; !found {

From f8b5990ada6fdec0d53117b5d40b8b33961933ae Mon Sep 17 00:00:00 2001
From: Maria Oparka <oparka@google.com>
Date: Mon, 1 Jul 2024 16:39:25 +0200
Subject: [PATCH 26/30] Add InstanceTemplate field to GceInstance

---
 .../gce/autoscaling_gce_client.go             | 25 +++---
 .../gce/autoscaling_gce_client_test.go        | 82 +++++++++++++++----
 .../cloudprovider/gce/gce_url.go              | 17 ++++
 .../cloudprovider/gce/gce_url_test.go         | 36 ++++++++
 4 files changed, 132 insertions(+), 28 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
index dc5c43b402fa..4dde60940700 100644
--- a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
+++ b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
@@ -20,7 +20,6 @@ import (
 	"context"
 	"fmt"
 	"net/http"
-	"net/url"
 	"path"
 	"regexp"
 	"strings"
@@ -96,8 +95,9 @@ var (
 // GceInstance extends cloudprovider.Instance with GCE specific numeric id.
 type GceInstance struct {
 	cloudprovider.Instance
-	NumericId uint64
-	Igm       GceRef
+	NumericId            uint64
+	Igm                  GceRef
+	InstanceTemplateName string
 }
 
 // AutoscalingGceClient is used for communicating with GCE API.
@@ -481,6 +481,13 @@ func (i *instanceListBuilder) gceInstanceToInstance(ref GceRef, gceInstance *gce
 		NumericId: gceInstance.Id,
 	}
 
+	if gceInstance.Version != nil {
+		instanceTemplate, err := InstanceTemplateNameFromUrl(gceInstance.Version.InstanceTemplate)
+		if err == nil {
+			instance.InstanceTemplateName = instanceTemplate.Name
+		}
+	}
+
 	if instance.Status.State != cloudprovider.InstanceCreating {
 		return instance
 	}
@@ -725,17 +732,7 @@ func (client *autoscalingGceClientV1) FetchMigTemplateName(migRef GceRef) (Insta
 		}
 		return InstanceTemplateName{}, err
 	}
-	templateUrl, err := url.Parse(igm.InstanceTemplate)
-	if err != nil {
-		return InstanceTemplateName{}, err
-	}
-	regional, err := IsInstanceTemplateRegional(templateUrl.String())
-	if err != nil {
-		return InstanceTemplateName{}, err
-	}
-
-	_, templateName := path.Split(templateUrl.EscapedPath())
-	return InstanceTemplateName{templateName, regional}, nil
+	return InstanceTemplateNameFromUrl(igm.InstanceTemplate)
 }
 
 func (client *autoscalingGceClientV1) FetchMigTemplate(migRef GceRef, templateName string, regional bool) (*gce.InstanceTemplate, error) {
diff --git a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
index 1f69573b8a37..b5e829ef2b7d 100644
--- a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
@@ -245,6 +245,10 @@ func TestErrors(t *testing.T) {
 func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 	const goodInstanceUrlTempl = "https://content.googleapis.com/compute/v1/projects/myprojid/zones/myzone/instances/myinst_%d"
 	const badInstanceUrl = "https://badurl.com/compute/v1/projects3/myprojid/zones/myzone/instances/myinst"
+
+	const instanceTemplateNameTempl = "my_inst_templ%d"
+	const instanceTemplateUrlTempl = "https://content.googleapis.com/compute/v1/projects/myprojid/global/instanceTemplates/my_inst_templ%d"
+
 	server := test_util.NewHttpServerMock()
 	defer server.Close()
 	g := newTestAutoscalingGceClient(t, "project1", server.URL, "")
@@ -266,6 +270,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 							Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 						},
+						Version: &gce_api.ManagedInstanceVersion{
+							InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 2),
+						},
 					},
 					{
 						Id:            42,
@@ -274,6 +281,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 							Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 						},
+						Version: &gce_api.ManagedInstanceVersion{
+							InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 42),
+						},
 					},
 				},
 			},
@@ -283,14 +293,16 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						Id:     "gce://myprojid/myzone/myinst_2",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 2,
+					NumericId:            2,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 2),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_42",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 42,
+					NumericId:            42,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 42),
 				},
 			},
 		},
@@ -305,6 +317,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 							Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 						},
+						Version: &gce_api.ManagedInstanceVersion{
+							InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 2),
+						},
 					},
 					{
 						Id:            42,
@@ -313,6 +328,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 							Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 						},
+						Version: &gce_api.ManagedInstanceVersion{
+							InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 42),
+						},
 					},
 				},
 				NextPageToken: "foo",
@@ -327,6 +345,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 							LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 								Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 							},
+							Version: &gce_api.ManagedInstanceVersion{
+								InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 127),
+							},
 						},
 						{
 							Id:            456,
@@ -335,6 +356,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 							LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 								Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 							},
+							Version: &gce_api.ManagedInstanceVersion{
+								InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 17),
+							},
 						},
 					},
 				},
@@ -345,28 +369,32 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						Id:     "gce://myprojid/myzone/myinst_2",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 2,
+					NumericId:            2,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 2),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_42",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 42,
+					NumericId:            42,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 42),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_123",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 123,
+					NumericId:            123,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 127),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_456",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 456,
+					NumericId:            456,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 17),
 				},
 			},
 		},
@@ -381,6 +409,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 							Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 						},
+						Version: &gce_api.ManagedInstanceVersion{
+							InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 17),
+						},
 					},
 					{
 						Id:            42,
@@ -389,6 +420,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 							Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 						},
+						Version: &gce_api.ManagedInstanceVersion{
+							InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 17),
+						},
 					},
 				},
 				NextPageToken: "foo",
@@ -403,6 +437,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 							LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 								Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 							},
+							Version: &gce_api.ManagedInstanceVersion{
+								InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 17),
+							},
 						},
 						{
 							Id:            456,
@@ -411,6 +448,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 							LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 								Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 							},
+							Version: &gce_api.ManagedInstanceVersion{
+								InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 17),
+							},
 						},
 					},
 					NextPageToken: "bar",
@@ -424,6 +464,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 							LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 								Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 							},
+							Version: &gce_api.ManagedInstanceVersion{
+								InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 17),
+							},
 						},
 						{
 							Id:            666,
@@ -432,6 +475,9 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 							LastAttempt: &gce_api.ManagedInstanceLastAttempt{
 								Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
 							},
+							Version: &gce_api.ManagedInstanceVersion{
+								InstanceTemplate: fmt.Sprintf(instanceTemplateUrlTempl, 127),
+							},
 						},
 					},
 				},
@@ -442,42 +488,48 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						Id:     "gce://myprojid/myzone/myinst_2",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 2,
+					NumericId:            2,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 17),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_42",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 42,
+					NumericId:            42,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 17),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_123",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 123,
+					NumericId:            123,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 17),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_456",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 456,
+					NumericId:            456,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 17),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_789",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 789,
+					NumericId:            789,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 17),
 				},
 				{
 					Instance: cloudprovider.Instance{
 						Id:     "gce://myprojid/myzone/myinst_666",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 666,
+					NumericId:            666,
+					InstanceTemplateName: fmt.Sprintf(instanceTemplateNameTempl, 127),
 				},
 			},
 		},
@@ -509,7 +561,8 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						Id:     "gce://myprojid/myzone/myinst_42",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 42,
+					NumericId:            42,
+					InstanceTemplateName: "",
 				},
 			},
 		},
@@ -540,7 +593,8 @@ func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
 						Id:     "gce://myprojid/myzone/myinst_42",
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
-					NumericId: 42,
+					NumericId:            42,
+					InstanceTemplateName: "",
 				},
 			},
 		},
diff --git a/cluster-autoscaler/cloudprovider/gce/gce_url.go b/cluster-autoscaler/cloudprovider/gce/gce_url.go
index fbdeda5583c5..e8011560a456 100644
--- a/cluster-autoscaler/cloudprovider/gce/gce_url.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_url.go
@@ -18,6 +18,8 @@ package gce
 
 import (
 	"fmt"
+	"net/url"
+	"path"
 	"regexp"
 )
 
@@ -98,6 +100,21 @@ func IsInstanceTemplateRegional(templateUrl string) (bool, error) {
 	return regexp.MatchString("(/projects/.*[A-Za-z0-9]+.*/regions/)", templateUrl)
 }
 
+// InstanceTemplateNameFromUrl retrieves name of the Instance Template from the url.
+func InstanceTemplateNameFromUrl(instanceTemplateLink string) (InstanceTemplateName, error) {
+	templateUrl, err := url.Parse(instanceTemplateLink)
+	if err != nil {
+		return InstanceTemplateName{}, err
+	}
+	regional, err := IsInstanceTemplateRegional(templateUrl.String())
+	if err != nil {
+		return InstanceTemplateName{}, err
+	}
+
+	_, templateName := path.Split(templateUrl.EscapedPath())
+	return InstanceTemplateName{templateName, regional}, nil
+}
+
 func parseGceUrl(prefix, url, expectedResource string) (project string, zone string, name string, err error) {
 	reg := regexp.MustCompile(fmt.Sprintf("%sprojects/.*/zones/.*/%s/.*", prefix, expectedResource))
 	errMsg := fmt.Errorf("wrong url: expected format %sprojects/<project-id>/zones/<zone>/%s/<name>, got %s", prefix, expectedResource, url)
diff --git a/cluster-autoscaler/cloudprovider/gce/gce_url_test.go b/cluster-autoscaler/cloudprovider/gce/gce_url_test.go
index 56e3152f8fed..68170866d81e 100644
--- a/cluster-autoscaler/cloudprovider/gce/gce_url_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_url_test.go
@@ -337,3 +337,39 @@ func TestIsInstanceTemplateRegional(t *testing.T) {
 		})
 	}
 }
+
+func TestInstanceTemplateNameFromUrl(t *testing.T) {
+	tests := []struct {
+		name                       string
+		templateUrl                string
+		expectInstanceTemplateName InstanceTemplateName
+		wantErr                    error
+	}{
+		{
+			name:                       "Has regional instance url",
+			templateUrl:                "https://www.googleapis.com/compute/v1/projects/test-project/regions/us-central1/instanceTemplates/instance-template",
+			expectInstanceTemplateName: InstanceTemplateName{"instance-template", true},
+		},
+		{
+			name:                       "Has global instance url",
+			templateUrl:                "https://www.googleapis.com/compute/v1/projects/test-project/global/instanceTemplates/instance-template",
+			expectInstanceTemplateName: InstanceTemplateName{"instance-template", false},
+		},
+		{
+			name:                       "Empty url",
+			templateUrl:                "",
+			expectInstanceTemplateName: InstanceTemplateName{"", false},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			itName, err := InstanceTemplateNameFromUrl(tt.templateUrl)
+			assert.Equal(t, tt.wantErr, err)
+			if tt.wantErr != nil {
+				return
+			}
+			assert.Equal(t, tt.expectInstanceTemplateName, itName)
+		})
+	}
+}

From 8971a2917758c91b7a36626e2ca794e51ad8570f Mon Sep 17 00:00:00 2001
From: Damika Gamlath <damika@google.com>
Date: Fri, 21 Jun 2024 15:23:32 +0000
Subject: [PATCH 27/30] refactor gce.RegenerateMigInstancesCache() to use
 Instance.List API for listing MIG instances

---
 .../cloudprovider/gce/gce_cloud_provider.go   |   2 +-
 .../cloudprovider/gce/gce_manager.go          |   4 +-
 .../cloudprovider/gce/gce_manager_test.go     |   2 +-
 .../cloudprovider/gce/mig_info_provider.go    | 151 ++++-
 .../gce/mig_info_provider_test.go             | 532 +++++++++++++++++-
 .../config/autoscaling_options.go             |   5 +-
 cluster-autoscaler/main.go                    |   9 +-
 7 files changed, 653 insertions(+), 52 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
index ef40afccdfc2..7a7b5b42e308 100644
--- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
@@ -383,7 +383,7 @@ func BuildGCE(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
 		defer config.Close()
 	}
 
-	manager, err := CreateGceManager(config, do, opts.GCEOptions.LocalSSDDiskSizeProvider, opts.Regional, opts.GCEOptions.ConcurrentRefreshes, opts.UserAgent, opts.GCEOptions.DomainUrl, opts.GCEOptions.MigInstancesMinRefreshWaitTime)
+	manager, err := CreateGceManager(config, do, opts.GCEOptions.LocalSSDDiskSizeProvider, opts.Regional, opts.GCEOptions.BulkMigInstancesListingEnabled, opts.GCEOptions.ConcurrentRefreshes, opts.UserAgent, opts.GCEOptions.DomainUrl, opts.GCEOptions.MigInstancesMinRefreshWaitTime)
 	if err != nil {
 		klog.Fatalf("Failed to create GCE Manager: %v", err)
 	}
diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager.go b/cluster-autoscaler/cloudprovider/gce/gce_manager.go
index 0db8fa76c1ce..534d3e15b220 100644
--- a/cluster-autoscaler/cloudprovider/gce/gce_manager.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_manager.go
@@ -128,7 +128,7 @@ type gceManagerImpl struct {
 // CreateGceManager constructs GceManager object.
 func CreateGceManager(configReader io.Reader, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions,
 	localSSDDiskSizeProvider localssdsize.LocalSSDSizeProvider,
-	regional bool, concurrentGceRefreshes int, userAgent, domainUrl string, migInstancesMinRefreshWaitTime time.Duration) (GceManager, error) {
+	regional, bulkGceMigInstancesListingEnabled bool, concurrentGceRefreshes int, userAgent, domainUrl string, migInstancesMinRefreshWaitTime time.Duration) (GceManager, error) {
 	// Create Google Compute Engine token.
 	var err error
 	tokenSource := google.ComputeTokenSource("")
@@ -188,7 +188,7 @@ func CreateGceManager(configReader io.Reader, discoveryOpts cloudprovider.NodeGr
 		cache:                    cache,
 		GceService:               gceService,
 		migLister:                migLister,
-		migInfoProvider:          NewCachingMigInfoProvider(cache, migLister, gceService, projectId, concurrentGceRefreshes, migInstancesMinRefreshWaitTime),
+		migInfoProvider:          NewCachingMigInfoProvider(cache, migLister, gceService, projectId, concurrentGceRefreshes, migInstancesMinRefreshWaitTime, bulkGceMigInstancesListingEnabled),
 		location:                 location,
 		regional:                 regional,
 		projectId:                projectId,
diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go b/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go
index 275fcf7a2278..dbbe5864d4d6 100644
--- a/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go
@@ -354,7 +354,7 @@ func newTestGceManager(t *testing.T, testServerURL string, regional bool) *gceMa
 	manager := &gceManagerImpl{
 		cache:                  cache,
 		migLister:              migLister,
-		migInfoProvider:        NewCachingMigInfoProvider(cache, migLister, gceService, projectId, 1, 0*time.Second),
+		migInfoProvider:        NewCachingMigInfoProvider(cache, migLister, gceService, projectId, 1, 0*time.Second, false),
 		GceService:             gceService,
 		projectId:              projectId,
 		regional:               regional,
diff --git a/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go b/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go
index 277bc2dd64a5..87c7a7564fb0 100644
--- a/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go
+++ b/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go
@@ -62,15 +62,16 @@ type timeProvider interface {
 }
 
 type cachingMigInfoProvider struct {
-	migInfoMutex                   sync.Mutex
-	cache                          *GceCache
-	migLister                      MigLister
-	gceClient                      AutoscalingGceClient
-	projectId                      string
-	concurrentGceRefreshes         int
-	migInstanceMutex               sync.Mutex
-	migInstancesMinRefreshWaitTime time.Duration
-	timeProvider                   timeProvider
+	migInfoMutex                      sync.Mutex
+	cache                             *GceCache
+	migLister                         MigLister
+	gceClient                         AutoscalingGceClient
+	projectId                         string
+	concurrentGceRefreshes            int
+	migInstanceMutex                  sync.Mutex
+	migInstancesMinRefreshWaitTime    time.Duration
+	timeProvider                      timeProvider
+	bulkGceMigInstancesListingEnabled bool
 }
 
 type realTime struct{}
@@ -80,15 +81,16 @@ func (r *realTime) Now() time.Time {
 }
 
 // NewCachingMigInfoProvider creates an instance of caching MigInfoProvider
-func NewCachingMigInfoProvider(cache *GceCache, migLister MigLister, gceClient AutoscalingGceClient, projectId string, concurrentGceRefreshes int, migInstancesMinRefreshWaitTime time.Duration) MigInfoProvider {
+func NewCachingMigInfoProvider(cache *GceCache, migLister MigLister, gceClient AutoscalingGceClient, projectId string, concurrentGceRefreshes int, migInstancesMinRefreshWaitTime time.Duration, bulkGceMigInstancesListingEnabled bool) MigInfoProvider {
 	return &cachingMigInfoProvider{
-		cache:                          cache,
-		migLister:                      migLister,
-		gceClient:                      gceClient,
-		projectId:                      projectId,
-		concurrentGceRefreshes:         concurrentGceRefreshes,
-		migInstancesMinRefreshWaitTime: migInstancesMinRefreshWaitTime,
-		timeProvider:                   &realTime{},
+		cache:                             cache,
+		migLister:                         migLister,
+		gceClient:                         gceClient,
+		projectId:                         projectId,
+		concurrentGceRefreshes:            concurrentGceRefreshes,
+		migInstancesMinRefreshWaitTime:    migInstancesMinRefreshWaitTime,
+		timeProvider:                      &realTime{},
+		bulkGceMigInstancesListingEnabled: bulkGceMigInstancesListingEnabled,
 	}
 }
 
@@ -151,6 +153,11 @@ func (c *cachingMigInfoProvider) getCachedMigForInstance(instanceRef GceRef) (Mi
 func (c *cachingMigInfoProvider) RegenerateMigInstancesCache() error {
 	c.cache.InvalidateAllMigInstances()
 	c.cache.InvalidateAllInstancesToMig()
+
+	if c.bulkGceMigInstancesListingEnabled {
+		return c.bulkListMigInstances()
+	}
+
 	migs := c.migLister.GetMigs()
 	errors := make([]error, len(migs))
 	workqueue.ParallelizeUntil(context.Background(), c.concurrentGceRefreshes, len(migs), func(piece int) {
@@ -165,6 +172,116 @@ func (c *cachingMigInfoProvider) RegenerateMigInstancesCache() error {
 	return nil
 }
 
+func (c *cachingMigInfoProvider) bulkListMigInstances() error {
+	c.cache.InvalidateMigInstancesState()
+	err := c.fillMigInfoCache()
+	if err != nil {
+		return err
+	}
+	instances, listErr := c.listInstancesInAllZonesWithMigs()
+	migToInstances := groupInstancesToMigs(instances)
+	updateErr := c.updateMigInstancesCache(migToInstances)
+
+	if listErr != nil {
+		return listErr
+	}
+	return updateErr
+}
+
+func (c *cachingMigInfoProvider) listInstancesInAllZonesWithMigs() ([]GceInstance, error) {
+	var zones []string
+	for zone := range c.listAllZonesWithMigs() {
+		zones = append(zones, zone)
+	}
+	var allInstances []GceInstance
+	errors := make([]error, len(zones))
+	zoneInstances := make([][]GceInstance, len(zones))
+	workqueue.ParallelizeUntil(context.Background(), c.concurrentGceRefreshes, len(zones), func(piece int) {
+		zoneInstances[piece], errors[piece] = c.gceClient.FetchAllInstances(c.projectId, zones[piece], "")
+	}, workqueue.WithChunkSize(c.concurrentGceRefreshes))
+
+	for _, instances := range zoneInstances {
+		allInstances = append(allInstances, instances...)
+	}
+	for _, err := range errors {
+		if err != nil {
+			return allInstances, err
+		}
+	}
+	return allInstances, nil
+}
+
+func groupInstancesToMigs(instances []GceInstance) map[GceRef][]GceInstance {
+	migToInstances := map[GceRef][]GceInstance{}
+	for _, instance := range instances {
+		migToInstances[instance.Igm] = append(migToInstances[instance.Igm], instance)
+	}
+	return migToInstances
+}
+
+func (c *cachingMigInfoProvider) isMigInstancesConsistent(mig Mig, migToInstances map[GceRef][]GceInstance) bool {
+	migRef := mig.GceRef()
+	state, found := c.cache.GetMigInstancesState(migRef)
+	if !found {
+		return false
+	}
+	instanceCount := state[cloudprovider.InstanceRunning] + state[cloudprovider.InstanceCreating] + state[cloudprovider.InstanceDeleting]
+
+	instances, found := migToInstances[migRef]
+	if !found && instanceCount > 0 {
+		return false
+	}
+	return instanceCount == int64(len(instances))
+}
+
+func (c *cachingMigInfoProvider) isMigCreatingOrDeletingInstances(mig Mig) bool {
+	migRef := mig.GceRef()
+	state, found := c.cache.GetMigInstancesState(migRef)
+	if !found {
+		return false
+	}
+	return state[cloudprovider.InstanceCreating] > 0 || state[cloudprovider.InstanceDeleting] > 0
+}
+
+// updateMigInstancesCache updates the mig instances for each mig
+func (c *cachingMigInfoProvider) updateMigInstancesCache(migToInstances map[GceRef][]GceInstance) error {
+	var errors []error
+	for _, mig := range c.migLister.GetMigs() {
+		migRef := mig.GceRef()
+		// If there is an inconsistency between number of instances according to instances.List
+		// and number of instances according to migInstancesStateCount for the given mig, which can be due to
+		// - abandoned instance
+		// - missing/malformed "created-by" reference
+		// we use an igm.ListInstances call as the authoritative source of instance information
+		if !c.isMigInstancesConsistent(mig, migToInstances) {
+			if err := c.fillMigInstances(migRef); err != nil {
+				errors = append(errors, err)
+			}
+			continue
+		}
+
+		// mig instances are re-fetched along with instance.Status.ErrorInfo for migs with
+		// instances in creating or deleting state
+		if c.isMigCreatingOrDeletingInstances(mig) {
+			if err := c.fillMigInstances(migRef); err != nil {
+				errors = append(errors, err)
+			}
+			continue
+		}
+
+		// for all other cases, mig instances cache is updated with the instances obtained from instance.List api
+		instances := migToInstances[migRef]
+		err := c.cache.SetMigInstances(migRef, instances, c.timeProvider.Now())
+		if err != nil {
+			errors = append(errors, err)
+		}
+	}
+	if len(errors) > 0 {
+		return errors[0]
+	}
+	return nil
+}
+
 func (c *cachingMigInfoProvider) findMigWithMatchingBasename(instanceRef GceRef) Mig {
 	for _, mig := range c.migLister.GetMigs() {
 		migRef := mig.GceRef()
diff --git a/cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go b/cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go
index 70a18654a9bc..53331e964b20 100644
--- a/cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go
@@ -46,6 +46,66 @@ var (
 			Name:    "mig",
 		},
 	}
+	mig1 = &gceMig{
+		gceRef: GceRef{
+			Project: "myprojid",
+			Zone:    "myzone1",
+			Name:    "mig1",
+		},
+	}
+	mig2 = &gceMig{
+		gceRef: GceRef{
+			Project: "myprojid",
+			Zone:    "myzone2",
+			Name:    "mig2",
+		},
+	}
+
+	instance1 = GceInstance{
+		Instance: cloudprovider.Instance{
+			Id:     "gce://myprojid/myzone1/test-instance-1",
+			Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
+		},
+		Igm: mig1.GceRef(),
+	}
+	instance2 = GceInstance{
+		Instance: cloudprovider.Instance{
+			Id:     "gce://myprojid/myzone1/test-instance-2",
+			Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
+		},
+		Igm: mig1.GceRef(),
+	}
+	instance3 = GceInstance{
+		Instance: cloudprovider.Instance{
+			Id:     "gce://myprojid/myzone2/test-instance-3",
+			Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
+		},
+		Igm: mig2.GceRef(),
+	}
+
+	instance4 = GceInstance{
+		Instance: cloudprovider.Instance{
+			Id:     "gce://myprojid/myzone2/test-instance-4",
+			Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
+		},
+		Igm: GceRef{},
+	}
+
+	instance5 = GceInstance{
+		Instance: cloudprovider.Instance{
+			Id:     "gce://myprojid/myzone2/test-instance-5",
+			Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
+		},
+		Igm: GceRef{},
+	}
+
+	instance6 = GceInstance{
+		Instance: cloudprovider.Instance{
+			Id:     "gce://myprojid/myzone2/test-instance-6",
+			Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
+		},
+		Igm: mig2.GceRef(),
+	}
 )
 
 type mockAutoscalingGceClient struct {
@@ -204,7 +264,7 @@ func TestFillMigInstances(t *testing.T) {
 				fetchMigInstances: fetchMigInstancesWithCounter(newInstances, callCounter),
 			}
 
-			provider, ok := NewCachingMigInfoProvider(tc.cache, NewMigLister(tc.cache), client, mig.GceRef().Project, 1, time.Hour).(*cachingMigInfoProvider)
+			provider, ok := NewCachingMigInfoProvider(tc.cache, NewMigLister(tc.cache), client, mig.GceRef().Project, 1, time.Hour, false).(*cachingMigInfoProvider)
 			assert.True(t, ok)
 			provider.timeProvider = &fakeTime{now: timeNow}
 
@@ -349,7 +409,7 @@ func TestMigInfoProviderGetMigForInstance(t *testing.T) {
 				fetchMigs:         fetchMigsConst(nil),
 			}
 			migLister := NewMigLister(tc.cache)
-			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false)
 
 			mig, err := provider.GetMigForInstance(instanceRef)
 
@@ -432,7 +492,7 @@ func TestGetMigInstances(t *testing.T) {
 				fetchMigInstances: tc.fetchMigInstances,
 			}
 			migLister := NewMigLister(tc.cache)
-			provider, ok := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second).(*cachingMigInfoProvider)
+			provider, ok := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false).(*cachingMigInfoProvider)
 			assert.True(t, ok)
 			provider.timeProvider = &fakeTime{now: newRefreshTime}
 
@@ -465,30 +525,44 @@ func TestRegenerateMigInstancesCache(t *testing.T) {
 		{Instance: cloudprovider.Instance{Id: "gce://project/us-test1/base-instance-name-abcd"}, NumericId: 1},
 		{Instance: cloudprovider.Instance{Id: "gce://project/us-test1/base-instance-name-efgh"}, NumericId: 2},
 	}
+	mig1Instances := []GceInstance{instance1, instance2}
+	mig2Instances := []GceInstance{instance3, instance6}
 	otherInstances := []GceInstance{
 		{Instance: cloudprovider.Instance{Id: "gce://project/us-test1/other-base-instance-name-abcd"}},
 		{Instance: cloudprovider.Instance{Id: "gce://project/us-test1/other-base-instance-name-efgh"}},
 	}
 
-	var instancesRefs, otherInstancesRefs []GceRef
-	for _, instance := range instances {
-		instanceRef, err := GceRefFromProviderId(instance.Id)
-		assert.Nil(t, err)
-		instancesRefs = append(instancesRefs, instanceRef)
+	mig1Igm := &gce.InstanceGroupManager{
+		Zone:       mig1.GceRef().Zone,
+		Name:       mig1.GceRef().Name,
+		TargetSize: 2,
+		CurrentActions: &gce.InstanceGroupManagerActionsSummary{
+			Creating: 1,
+		},
 	}
-	for _, instance := range otherInstances {
-		instanceRef, err := GceRefFromProviderId(instance.Id)
-		assert.Nil(t, err)
-		otherInstancesRefs = append(otherInstancesRefs, instanceRef)
+	mig2Igm := &gce.InstanceGroupManager{
+		Zone:           mig2.GceRef().Zone,
+		Name:           mig2.GceRef().Name,
+		TargetSize:     2,
+		CurrentActions: &gce.InstanceGroupManagerActionsSummary{},
 	}
 
+	instancesRefs := toInstancesRefs(t, instances)
+	mig1InstancesRefs := toInstancesRefs(t, mig1Instances)
+	mig2InstancesRefs := toInstancesRefs(t, mig2Instances)
+	otherInstancesRefs := toInstancesRefs(t, otherInstances)
+
 	testCases := []struct {
-		name                   string
-		cache                  *GceCache
-		fetchMigInstances      func(GceRef) ([]GceInstance, error)
-		expectedErr            error
-		expectedMigInstances   map[GceRef][]GceInstance
-		expectedInstancesToMig map[GceRef]GceRef
+		name                              string
+		cache                             *GceCache
+		fetchMigInstances                 func(GceRef) ([]GceInstance, error)
+		fetchMigs                         func(string) ([]*gce.InstanceGroupManager, error)
+		fetchAllInstances                 func(string, string, string) ([]GceInstance, error)
+		bulkGceMigInstancesListingEnabled bool
+		projectId                         string
+		expectedErr                       error
+		expectedMigInstances              map[GceRef][]GceInstance
+		expectedInstancesToMig            map[GceRef]GceRef
 	}{
 		{
 			name: "fill empty cache for one mig",
@@ -498,6 +572,7 @@ func TestRegenerateMigInstancesCache(t *testing.T) {
 				instancesToMig: map[GceRef]GceRef{},
 			},
 			fetchMigInstances: fetchMigInstancesConst(instances),
+			projectId:         mig.GceRef().Project,
 			expectedMigInstances: map[GceRef][]GceInstance{
 				mig.GceRef(): instances,
 			},
@@ -520,6 +595,7 @@ func TestRegenerateMigInstancesCache(t *testing.T) {
 				mig.GceRef():      instances,
 				otherMig.GceRef(): otherInstances,
 			}),
+			projectId: mig.GceRef().Project,
 			expectedMigInstances: map[GceRef][]GceInstance{
 				mig.GceRef():      instances,
 				otherMig.GceRef(): otherInstances,
@@ -552,6 +628,7 @@ func TestRegenerateMigInstancesCache(t *testing.T) {
 				mig.GceRef():      instances,
 				otherMig.GceRef(): otherInstances,
 			}),
+			projectId: mig.GceRef().Project,
 			expectedMigInstances: map[GceRef][]GceInstance{
 				mig.GceRef(): instances,
 			},
@@ -575,6 +652,7 @@ func TestRegenerateMigInstancesCache(t *testing.T) {
 				},
 			},
 			fetchMigInstances: fetchMigInstancesConst(otherInstances),
+			projectId:         mig.GceRef().Project,
 			expectedMigInstances: map[GceRef][]GceInstance{
 				mig.GceRef(): otherInstances,
 			},
@@ -593,17 +671,95 @@ func TestRegenerateMigInstancesCache(t *testing.T) {
 				instancesToMig: map[GceRef]GceRef{},
 			},
 			fetchMigInstances: fetchMigInstancesFail,
+			projectId:         mig.GceRef().Project,
 			expectedErr:       errFetchMigInstances,
 		},
+		{
+			name: "bulkGceMigInstancesListingEnabled - fill empty cache for one mig - instances in creating/deleting state",
+			cache: &GceCache{
+				migs:                             map[GceRef]Mig{mig1.GceRef(): mig1},
+				instances:                        map[GceRef][]GceInstance{},
+				instancesToMig:                   map[GceRef]GceRef{},
+				migTargetSizeCache:               map[GceRef]int64{},
+				migBaseNameCache:                 map[GceRef]string{},
+				listManagedInstancesResultsCache: map[GceRef]string{},
+				instanceTemplateNameCache:        map[GceRef]InstanceTemplateName{},
+				migInstancesStateCache:           map[GceRef]map[cloudprovider.InstanceState]int64{},
+			},
+			fetchMigInstances:                 fetchMigInstancesConst(mig1Instances),
+			fetchMigs:                         fetchMigsConst([]*gce.InstanceGroupManager{mig1Igm}),
+			fetchAllInstances:                 fetchAllInstancesInZone(map[string][]GceInstance{"myzone1": {instance1, instance2}}),
+			bulkGceMigInstancesListingEnabled: true,
+			projectId:                         mig1.GceRef().Project,
+			expectedMigInstances: map[GceRef][]GceInstance{
+				mig1.GceRef(): mig1Instances,
+			},
+			expectedInstancesToMig: map[GceRef]GceRef{
+				mig1InstancesRefs[0]: mig1.GceRef(),
+				mig1InstancesRefs[1]: mig1.GceRef(),
+			},
+		},
+		{
+			name: "bulkGceMigInstancesListingEnabled - fill empty cache for one mig - number of instances are inconsistent in bulk listing result",
+			cache: &GceCache{
+				migs:                             map[GceRef]Mig{mig2.GceRef(): mig2},
+				instances:                        map[GceRef][]GceInstance{},
+				instancesToMig:                   map[GceRef]GceRef{},
+				migTargetSizeCache:               map[GceRef]int64{},
+				migBaseNameCache:                 map[GceRef]string{},
+				listManagedInstancesResultsCache: map[GceRef]string{},
+				instanceTemplateNameCache:        map[GceRef]InstanceTemplateName{},
+				migInstancesStateCache:           map[GceRef]map[cloudprovider.InstanceState]int64{},
+			},
+			fetchMigInstances: fetchMigInstancesConst(mig2Instances),
+			fetchMigs:         fetchMigsConst([]*gce.InstanceGroupManager{mig2Igm}),
+			// one instance is missing from the instances of igm2 in myzone2
+			fetchAllInstances:                 fetchAllInstancesInZone(map[string][]GceInstance{"myzone2": {instance3}}),
+			bulkGceMigInstancesListingEnabled: true,
+			projectId:                         mig2.GceRef().Project,
+			expectedMigInstances: map[GceRef][]GceInstance{
+				mig2.GceRef(): mig2Instances,
+			},
+			expectedInstancesToMig: map[GceRef]GceRef{
+				mig2InstancesRefs[0]: mig2.GceRef(),
+				mig2InstancesRefs[1]: mig2.GceRef(),
+			},
+		},
+		{
+			name: "bulkGceMigInstancesListingEnabled - fill empty cache for one mig - all instances in running state",
+			cache: &GceCache{
+				migs:                             map[GceRef]Mig{mig2.GceRef(): mig2},
+				instances:                        map[GceRef][]GceInstance{},
+				instancesToMig:                   map[GceRef]GceRef{},
+				migTargetSizeCache:               map[GceRef]int64{},
+				migBaseNameCache:                 map[GceRef]string{},
+				listManagedInstancesResultsCache: map[GceRef]string{},
+				instanceTemplateNameCache:        map[GceRef]InstanceTemplateName{},
+				migInstancesStateCache:           map[GceRef]map[cloudprovider.InstanceState]int64{},
+			},
+			fetchMigs:                         fetchMigsConst([]*gce.InstanceGroupManager{mig2Igm}),
+			fetchAllInstances:                 fetchAllInstancesInZone(map[string][]GceInstance{"myzone2": {instance3, instance6}}),
+			bulkGceMigInstancesListingEnabled: true,
+			projectId:                         mig2.GceRef().Project,
+			expectedMigInstances: map[GceRef][]GceInstance{
+				mig2.GceRef(): mig2Instances,
+			},
+			expectedInstancesToMig: map[GceRef]GceRef{
+				mig2InstancesRefs[0]: mig2.GceRef(),
+				mig2InstancesRefs[1]: mig2.GceRef(),
+			},
+		},
 	}
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			client := &mockAutoscalingGceClient{
 				fetchMigInstances: tc.fetchMigInstances,
+				fetchMigs:         tc.fetchMigs,
+				fetchAllInstances: tc.fetchAllInstances,
 			}
 			migLister := NewMigLister(tc.cache)
-			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, tc.projectId, 1, 0*time.Second, tc.bulkGceMigInstancesListingEnabled)
 			err := provider.RegenerateMigInstancesCache()
 
 			assert.Equal(t, tc.expectedErr, err)
@@ -615,6 +771,16 @@ func TestRegenerateMigInstancesCache(t *testing.T) {
 	}
 }
 
+func toInstancesRefs(t *testing.T, instances []GceInstance) []GceRef {
+	var refs []GceRef
+	for _, instance := range instances {
+		instanceRef, err := GceRefFromProviderId(instance.Id)
+		assert.Nil(t, err)
+		refs = append(refs, instanceRef)
+	}
+	return refs
+}
+
 func TestGetMigTargetSize(t *testing.T) {
 	targetSize := int64(42)
 	instanceGroupManager := &gce.InstanceGroupManager{
@@ -682,7 +848,7 @@ func TestGetMigTargetSize(t *testing.T) {
 				fetchMigTargetSize: tc.fetchMigTargetSize,
 			}
 			migLister := NewMigLister(tc.cache)
-			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false)
 
 			targetSize, err := provider.GetMigTargetSize(mig.GceRef())
 			cachedTargetSize, found := tc.cache.GetMigTargetSize(mig.GceRef())
@@ -764,7 +930,7 @@ func TestGetMigBasename(t *testing.T) {
 				fetchMigBasename: tc.fetchMigBasename,
 			}
 			migLister := NewMigLister(tc.cache)
-			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false)
 
 			basename, err := provider.GetMigBasename(mig.GceRef())
 			cachedBasename, found := tc.cache.GetMigBasename(mig.GceRef())
@@ -845,7 +1011,7 @@ func TestGetListManagedInstancesResults(t *testing.T) {
 				fetchListManagedInstancesResults: tc.fetchResults,
 			}
 			migLister := NewMigLister(tc.cache)
-			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false)
 
 			results, err := provider.GetListManagedInstancesResults(mig.GceRef())
 			cachedResults, found := tc.cache.GetListManagedInstancesResults(mig.GceRef())
@@ -940,7 +1106,7 @@ func TestGetMigInstanceTemplateName(t *testing.T) {
 				fetchMigTemplateName: tc.fetchMigTemplateName,
 			}
 			migLister := NewMigLister(tc.cache)
-			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false)
 
 			instanceTemplateName, err := provider.GetMigInstanceTemplateName(mig.GceRef())
 			cachedInstanceTemplateName, found := tc.cache.GetMigInstanceTemplateName(mig.GceRef())
@@ -1046,7 +1212,7 @@ func TestGetMigInstanceTemplate(t *testing.T) {
 				fetchMigTemplate:     tc.fetchMigTemplate,
 			}
 			migLister := NewMigLister(tc.cache)
-			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false)
 
 			template, err := provider.GetMigInstanceTemplate(mig.GceRef())
 			cachedTemplate, found := tc.cache.GetMigInstanceTemplate(mig.GceRef())
@@ -1252,7 +1418,7 @@ func TestGetMigInstanceKubeEnv(t *testing.T) {
 				fetchMigTemplate:     tc.fetchMigTemplate,
 			}
 			migLister := NewMigLister(tc.cache)
-			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false)
 
 			kubeEnv, err := provider.GetMigKubeEnv(mig.GceRef())
 			cachedKubeEnv, found := tc.cache.GetMigKubeEnv(mig.GceRef())
@@ -1347,7 +1513,7 @@ func TestGetMigMachineType(t *testing.T) {
 				fetchMachineType: tc.fetchMachineType,
 			}
 			migLister := NewMigLister(cache)
-			provider := NewCachingMigInfoProvider(cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second)
+			provider := NewCachingMigInfoProvider(cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second, false)
 			machine, err := provider.GetMigMachineType(mig.GceRef())
 			if tc.expectError {
 				assert.Error(t, err)
@@ -1436,6 +1602,310 @@ func TestMultipleGetMigInstanceCallsLimited(t *testing.T) {
 	}
 }
 
+func TestListInstancesInAllZonesWithMigs(t *testing.T) {
+	testCases := []struct {
+		name              string
+		migs              map[GceRef]Mig
+		fetchAllInstances func(string, string, string) ([]GceInstance, error)
+		wantInstances     []GceInstance
+		wantErr           bool
+	}{
+		{
+			name:              "instance fetching failed",
+			migs:              map[GceRef]Mig{mig1.GceRef(): mig1},
+			fetchAllInstances: fetchAllInstancesInZone(map[string][]GceInstance{}),
+			wantErr:           true,
+		},
+		{
+			name:              "Successfully list mig instances in a single zone",
+			migs:              map[GceRef]Mig{mig1.GceRef(): mig1},
+			fetchAllInstances: fetchAllInstancesInZone(map[string][]GceInstance{"myzone1": {instance1, instance2}, "myzone2": {instance3}}),
+			wantInstances:     []GceInstance{instance1, instance2},
+		},
+		{
+			name:              "Successfully list mig instances in multiple zones",
+			migs:              map[GceRef]Mig{mig1.GceRef(): mig1, mig2.GceRef(): mig2},
+			fetchAllInstances: fetchAllInstancesInZone(map[string][]GceInstance{"myzone1": {instance1, instance2}, "myzone2": {instance3}}),
+			wantInstances:     []GceInstance{instance1, instance2, instance3},
+		},
+		{
+			name:              "Successfully list mig instances in one zones and got errors in another",
+			migs:              map[GceRef]Mig{mig1.GceRef(): mig1, mig2.GceRef(): mig2},
+			fetchAllInstances: fetchAllInstancesInZone(map[string][]GceInstance{"myzone1": {instance1, instance2}}),
+			wantInstances:     []GceInstance{instance1, instance2},
+			wantErr:           true,
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			cache := GceCache{
+				migs: tc.migs,
+			}
+			client := &mockAutoscalingGceClient{
+				fetchAllInstances: tc.fetchAllInstances,
+			}
+			migLister := NewMigLister(&cache)
+			provider := &cachingMigInfoProvider{
+				cache:                  &cache,
+				migLister:              migLister,
+				gceClient:              client,
+				concurrentGceRefreshes: 1,
+			}
+			instances, err := provider.listInstancesInAllZonesWithMigs()
+
+			if tc.wantErr {
+				assert.NotNil(t, err)
+			} else {
+				assert.NoError(t, err)
+			}
+			assert.ElementsMatch(t, tc.wantInstances, instances)
+		})
+	}
+}
+
+func TestGroupInstancesToMigs(t *testing.T) {
+	testCases := []struct {
+		name      string
+		instances []GceInstance
+		want      map[GceRef][]GceInstance
+	}{
+		{
+			name: "no instances",
+			want: map[GceRef][]GceInstance{},
+		},
+		{
+			name:      "instances from multiple migs including unknown migs",
+			instances: []GceInstance{instance1, instance2, instance3, instance4, instance5},
+			want: map[GceRef][]GceInstance{
+				mig1.GceRef(): {instance1, instance2},
+				mig2.GceRef(): {instance3},
+				{}:            {instance4, instance5},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			groupedInstances := groupInstancesToMigs(tc.instances)
+			assert.Equal(t, tc.want, groupedInstances)
+		})
+	}
+}
+
+func TestIsMigInstancesConsistent(t *testing.T) {
+	testCases := []struct {
+		name                   string
+		mig                    Mig
+		migToInstances         map[GceRef][]GceInstance
+		migInstancesStateCache map[GceRef]map[cloudprovider.InstanceState]int64
+		want                   bool
+	}{
+		{
+			name:           "instance not found",
+			mig:            mig1,
+			migToInstances: map[GceRef][]GceInstance{},
+			want:           false,
+		},
+		{
+			name:           "instanceState not found",
+			mig:            mig1,
+			migToInstances: map[GceRef][]GceInstance{mig1.GceRef(): {instance1, instance2}},
+			want:           false,
+		},
+		{
+			name:           "inconsistent number of instances",
+			mig:            mig1,
+			migToInstances: map[GceRef][]GceInstance{mig1.GceRef(): {instance1, instance2}},
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {
+					cloudprovider.InstanceCreating: 2,
+					cloudprovider.InstanceDeleting: 3,
+					cloudprovider.InstanceRunning:  4,
+				},
+			},
+			want: false,
+		},
+		{
+			name:           "consistent number of instances",
+			mig:            mig1,
+			migToInstances: map[GceRef][]GceInstance{mig1.GceRef(): {instance1, instance2}},
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {
+					cloudprovider.InstanceCreating: 1,
+					cloudprovider.InstanceDeleting: 0,
+					cloudprovider.InstanceRunning:  1,
+				},
+			},
+			want: true,
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			cache := GceCache{
+				migInstancesStateCache: tc.migInstancesStateCache,
+			}
+			provider := &cachingMigInfoProvider{
+				cache: &cache,
+			}
+			got := provider.isMigInstancesConsistent(tc.mig, tc.migToInstances)
+			assert.Equal(t, tc.want, got)
+		})
+	}
+}
+
+func TestIsMigInCreatingOrDeletingInstanceState(t *testing.T) {
+	testCases := []struct {
+		name                   string
+		mig                    Mig
+		migInstancesStateCache map[GceRef]map[cloudprovider.InstanceState]int64
+		want                   bool
+	}{
+		{
+			name: "instanceState not found",
+			mig:  mig1,
+			want: false,
+		},
+		{
+			name: "in creating state",
+			mig:  mig1,
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {
+					cloudprovider.InstanceCreating: 2,
+					cloudprovider.InstanceDeleting: 0,
+					cloudprovider.InstanceRunning:  1,
+				},
+			},
+			want: true,
+		},
+		{
+			name: "in deleting state",
+			mig:  mig1,
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {
+					cloudprovider.InstanceCreating: 0,
+					cloudprovider.InstanceDeleting: 1,
+					cloudprovider.InstanceRunning:  0,
+				},
+			},
+			want: true,
+		},
+		{
+			name: "not in creating or deleting states",
+			mig:  mig1,
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {
+					cloudprovider.InstanceCreating: 0,
+					cloudprovider.InstanceDeleting: 0,
+					cloudprovider.InstanceRunning:  1,
+				},
+			},
+			want: false,
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			cache := GceCache{
+				migInstancesStateCache: tc.migInstancesStateCache,
+			}
+			provider := &cachingMigInfoProvider{
+				cache: &cache,
+			}
+			got := provider.isMigCreatingOrDeletingInstances(tc.mig)
+			assert.Equal(t, tc.want, got)
+		})
+	}
+}
+
+func TestUpdateMigInstancesCache(t *testing.T) {
+	testCases := []struct {
+		name                   string
+		migs                   map[GceRef]Mig
+		migToInstances         map[GceRef][]GceInstance
+		fetchMigInstances      []GceInstance
+		wantInstances          map[GceRef][]GceInstance
+		migInstancesStateCache map[GceRef]map[cloudprovider.InstanceState]int64
+	}{
+		{
+			name: "inconsistent mig instance state",
+			migs: map[GceRef]Mig{mig1.GceRef(): mig1},
+			migToInstances: map[GceRef][]GceInstance{
+				mig1.GceRef(): {instance1},
+			},
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {cloudprovider.InstanceRunning: 2, cloudprovider.InstanceDeleting: 0, cloudprovider.InstanceCreating: 0},
+			},
+			fetchMigInstances: []GceInstance{instance1, instance2},
+			wantInstances:     map[GceRef][]GceInstance{mig1.GceRef(): {instance1, instance2}},
+		},
+		{
+			name: "mig with instance in creating or deleting state",
+			migs: map[GceRef]Mig{mig1.GceRef(): mig1},
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {cloudprovider.InstanceRunning: 0, cloudprovider.InstanceDeleting: 0, cloudprovider.InstanceCreating: 2},
+			},
+			fetchMigInstances: []GceInstance{instance1, instance2},
+			wantInstances:     map[GceRef][]GceInstance{mig1.GceRef(): {instance1, instance2}},
+		},
+		{
+			name: "consistent mig instance state",
+			migs: map[GceRef]Mig{mig1.GceRef(): mig1},
+			migToInstances: map[GceRef][]GceInstance{
+				mig1.GceRef(): {instance1, instance2},
+			},
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {cloudprovider.InstanceRunning: 2, cloudprovider.InstanceDeleting: 0, cloudprovider.InstanceCreating: 0},
+			},
+			wantInstances: map[GceRef][]GceInstance{mig1.GceRef(): {instance1, instance2}},
+		},
+		{
+			name: "mix of consistent and inconsistent states",
+			migs: map[GceRef]Mig{mig1.GceRef(): mig1, mig2.GceRef(): mig2},
+			migToInstances: map[GceRef][]GceInstance{
+				mig1.GceRef(): {instance1, instance2},
+			},
+			migInstancesStateCache: map[GceRef]map[cloudprovider.InstanceState]int64{
+				mig1.GceRef(): {cloudprovider.InstanceRunning: 2, cloudprovider.InstanceDeleting: 0, cloudprovider.InstanceCreating: 0},
+				mig2.GceRef(): {cloudprovider.InstanceRunning: 1, cloudprovider.InstanceDeleting: 0, cloudprovider.InstanceCreating: 0},
+			},
+			fetchMigInstances: []GceInstance{instance3},
+			wantInstances: map[GceRef][]GceInstance{
+				mig1.GceRef(): {instance1, instance2},
+				mig2.GceRef(): {instance3},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			cache := GceCache{
+				migs:                   tc.migs,
+				instances:              make(map[GceRef][]GceInstance),
+				instancesUpdateTime:    make(map[GceRef]time.Time),
+				migBaseNameCache:       make(map[GceRef]string),
+				migInstancesStateCache: tc.migInstancesStateCache,
+				instancesToMig:         make(map[GceRef]GceRef),
+			}
+			migLister := NewMigLister(&cache)
+			client := &mockAutoscalingGceClient{
+				fetchMigInstances: fetchMigInstancesConst(tc.fetchMigInstances),
+			}
+			provider := &cachingMigInfoProvider{
+				cache:        &cache,
+				migLister:    migLister,
+				gceClient:    client,
+				timeProvider: &realTime{},
+			}
+			err := provider.updateMigInstancesCache(tc.migToInstances)
+			assert.NoError(t, err)
+			for migRef, want := range tc.wantInstances {
+				instances, found := cache.GetMigInstances(migRef)
+				assert.True(t, found)
+				assert.Equal(t, want, instances)
+			}
+		})
+	}
+}
+
 type fakeTime struct {
 	now time.Time
 }
@@ -1555,3 +2025,13 @@ func fetchMachineTypeConst(name string, cpu int64, mem int64) func(string, strin
 		}, nil
 	}
 }
+
+func fetchAllInstancesInZone(allInstances map[string][]GceInstance) func(string, string, string) ([]GceInstance, error) {
+	return func(project, zone, filter string) ([]GceInstance, error) {
+		instances, found := allInstances[zone]
+		if !found {
+			return nil, errors.New("")
+		}
+		return instances, nil
+	}
+}
diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go
index 12ced472c683..35f7e1005204 100644
--- a/cluster-autoscaler/config/autoscaling_options.go
+++ b/cluster-autoscaler/config/autoscaling_options.go
@@ -58,7 +58,7 @@ type NodeGroupAutoscalingOptions struct {
 
 // GCEOptions contain autoscaling options specific to GCE cloud provider.
 type GCEOptions struct {
-	// ConcurrentRefreshes is the maximum number of concurrently refreshed instance groups or instance templates.
+	// ConcurrentRefreshes is the maximum number of concurrently refreshed instance groups or instance templates or zones with mig instances
 	ConcurrentRefreshes int
 	// MigInstancesMinRefreshWaitTime is the minimum time which needs to pass before GCE MIG instances from a given MIG can be refreshed.
 	MigInstancesMinRefreshWaitTime time.Duration
@@ -66,6 +66,9 @@ type GCEOptions struct {
 	DomainUrl string
 	// LocalSSDDiskSizeProvider provides local ssd disk size based on machine type
 	LocalSSDDiskSizeProvider gce_localssdsize.LocalSSDSizeProvider
+	// BulkMigInstancesListingEnabled means that cluster instances should be listed in bulk instead of per mig.
+	// Instances of migs having instances in creating or deleting state are re-fetched using igm.ListInstances. Inconsistencies are handled by re-fetching using igm.ListInstances
+	BulkMigInstancesListingEnabled bool
 }
 
 const (
diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go
index da462f5a94f0..7084e901de5a 100644
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@@ -219,10 +219,10 @@ var (
 	awsUseStaticInstanceList  = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
 
 	// GCE specific flags
-	concurrentGceRefreshes            = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
-	gceMigInstancesMinRefreshWaitTime = flag.Duration("gce-mig-instances-min-refresh-wait-time", 5*time.Second, "The minimum time which needs to pass before GCE MIG instances from a given MIG can be refreshed.")
-	_                                 = flag.Bool("gce-expander-ephemeral-storage-support", true, "Whether scale-up takes ephemeral storage resources into account for GCE cloud provider (Deprecated, to be removed in 1.30+)")
-
+	concurrentGceRefreshes             = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
+	gceMigInstancesMinRefreshWaitTime  = flag.Duration("gce-mig-instances-min-refresh-wait-time", 5*time.Second, "The minimum time which needs to pass before GCE MIG instances from a given MIG can be refreshed.")
+	_                                  = flag.Bool("gce-expander-ephemeral-storage-support", true, "Whether scale-up takes ephemeral storage resources into account for GCE cloud provider (Deprecated, to be removed in 1.30+)")
+	bulkGceMigInstancesListingEnabled  = flag.Bool("bulk-mig-instances-listing-enabled", false, "Fetch GCE mig instances in bulk instead of per mig")
 	enableProfiling                    = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
 	clusterAPICloudConfigAuthoritative = flag.Bool("clusterapi-cloud-config-authoritative", false, "Treat the cloud-config flag authoritatively (do not fallback to using kubeconfig flag). ClusterAPI only")
 	cordonNodeBeforeTerminate          = flag.Bool("cordon-node-before-terminating", false, "Should CA cordon nodes before terminating during downscale process")
@@ -407,6 +407,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 			ConcurrentRefreshes:            *concurrentGceRefreshes,
 			MigInstancesMinRefreshWaitTime: *gceMigInstancesMinRefreshWaitTime,
 			LocalSSDDiskSizeProvider:       localssdsize.NewSimpleLocalSSDProvider(),
+			BulkMigInstancesListingEnabled: *bulkGceMigInstancesListingEnabled,
 		},
 		ClusterAPICloudConfigAuthoritative: *clusterAPICloudConfigAuthoritative,
 		CordonNodeBeforeTerminate:          *cordonNodeBeforeTerminate,

From fa22a80e889f6f62a2c925618cd0b2988fe40b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Julian=20T=C3=B6lle?= <julian.toelle@hetzner-cloud.de>
Date: Wed, 24 Apr 2024 11:25:00 +0200
Subject: [PATCH 28/30] fix(hetzner): missing error return in scale up/down

There is no Node Group/Autoscaling Group in Hetzner Cloud API, so the
Hetzner provider implemented this by manually creating as many servers
as needed.

The current code did not return any of the errors that could have
happened. Without any returned errors, cluster-autoscaler assumed that
everything was fine with the Node Group.

In cases where there is a temporary issue with one of the node groups
(ie. Location is unavailable, no leftover capacity for the requested
server type), cluster-autoscaler should consider this and try to scale
up a different Node Group. This will automatically happen once we return
an error, as cluster-autoscaler backs off from scaling Node Groups that
have recently returned errors.
---
 .../hetzner/hetzner_node_group.go             | 79 ++++++++++++++-----
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go
index b010dfbadc3e..4e0801a71482 100644
--- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go
+++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go
@@ -18,6 +18,7 @@ package hetzner
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"maps"
 	"math/rand"
@@ -91,12 +92,14 @@ func (n *hetznerNodeGroup) IncreaseSize(delta int) error {
 		return fmt.Errorf("delta must be positive, have: %d", delta)
 	}
 
-	targetSize := n.targetSize + delta
-	if targetSize > n.MaxSize() {
-		return fmt.Errorf("size increase is too large. current: %d desired: %d max: %d", n.targetSize, targetSize, n.MaxSize())
+	desiredTargetSize := n.targetSize + delta
+	if desiredTargetSize > n.MaxSize() {
+		return fmt.Errorf("size increase is too large. current: %d desired: %d max: %d", n.targetSize, desiredTargetSize, n.MaxSize())
 	}
 
-	klog.V(4).Infof("Scaling Instance Pool %s to %d", n.id, targetSize)
+	actualDelta := delta
+
+	klog.V(4).Infof("Scaling Instance Pool %s to %d", n.id, desiredTargetSize)
 
 	n.clusterUpdateMutex.Lock()
 	defer n.clusterUpdateMutex.Unlock()
@@ -109,25 +112,43 @@ func (n *hetznerNodeGroup) IncreaseSize(delta int) error {
 		return fmt.Errorf("server type %s not available in region %s", n.instanceType, n.region)
 	}
 
+	defer func() {
+		// create new servers cache
+		if _, err := n.manager.cachedServers.servers(); err != nil {
+			klog.Errorf("failed to update servers cache: %v", err)
+		}
+
+		// Update target size
+		n.resetTargetSize(actualDelta)
+	}()
+
+	// There is no "Server Group" in Hetzner Cloud, we need to create every
+	// server manually. This operation might fail for some of the servers
+	// because of quotas, rate limiting or server type availability. We need to
+	// collect the errors and inform cluster-autoscaler about this, so it can
+	// try other node groups if configured.
 	waitGroup := sync.WaitGroup{}
+	errsCh := make(chan error, delta)
 	for i := 0; i < delta; i++ {
 		waitGroup.Add(1)
 		go func() {
 			defer waitGroup.Done()
 			err := createServer(n)
 			if err != nil {
-				targetSize--
-				klog.Errorf("failed to create error: %v", err)
+				actualDelta--
+				errsCh <- err
 			}
 		}()
 	}
 	waitGroup.Wait()
+	close(errsCh)
 
-	n.targetSize = targetSize
-
-	// create new servers cache
-	if _, err := n.manager.cachedServers.servers(); err != nil {
-		klog.Errorf("failed to get servers: %v", err)
+	errs := make([]error, 0, delta)
+	for err = range errsCh {
+		errs = append(errs, err)
+	}
+	if len(errs) > 0 {
+		return fmt.Errorf("failed to create all servers: %w", errors.Join(errs...))
 	}
 
 	return nil
@@ -146,13 +167,26 @@ func (n *hetznerNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
 	n.clusterUpdateMutex.Lock()
 	defer n.clusterUpdateMutex.Unlock()
 
-	targetSize := n.targetSize - len(nodes)
+	delta := len(nodes)
+
+	targetSize := n.targetSize - delta
 	if targetSize < n.MinSize() {
 		return fmt.Errorf("size decrease is too large. current: %d desired: %d min: %d", n.targetSize, targetSize, n.MinSize())
 	}
 
-	waitGroup := sync.WaitGroup{}
+	actualDelta := delta
+
+	defer func() {
+		// create new servers cache
+		if _, err := n.manager.cachedServers.servers(); err != nil {
+			klog.Errorf("failed to update servers cache: %v", err)
+		}
+
+		n.resetTargetSize(-actualDelta)
+	}()
 
+	waitGroup := sync.WaitGroup{}
+	errsCh := make(chan error, len(nodes))
 	for _, node := range nodes {
 		waitGroup.Add(1)
 		go func(node *apiv1.Node) {
@@ -160,20 +194,23 @@ func (n *hetznerNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
 
 			err := n.manager.deleteByNode(node)
 			if err != nil {
-				klog.Errorf("failed to delete server ID %s error: %v", node.Name, err)
+				actualDelta--
+				errsCh <- fmt.Errorf("failed to delete server for node %q: %w", node.Name, err)
 			}
 
 			waitGroup.Done()
 		}(node)
 	}
 	waitGroup.Wait()
+	close(errsCh)
 
-	// create new servers cache
-	if _, err := n.manager.cachedServers.servers(); err != nil {
-		klog.Errorf("failed to get servers: %v", err)
+	errs := make([]error, 0, len(nodes))
+	for err := range errsCh {
+		errs = append(errs, err)
+	}
+	if len(errs) > 0 {
+		return fmt.Errorf("failed to delete all nodes: %w", errors.Join(errs...))
 	}
-
-	n.resetTargetSize(-len(nodes))
 
 	return nil
 }
@@ -561,8 +598,8 @@ func waitForServerAction(m *hetznerManager, serverName string, action *hcloud.Ac
 func (n *hetznerNodeGroup) resetTargetSize(expectedDelta int) {
 	servers, err := n.manager.allServers(n.id)
 	if err != nil {
-		klog.Errorf("failed to set node pool %s size, using delta %d error: %v", n.id, expectedDelta, err)
-		n.targetSize = n.targetSize - expectedDelta
+		klog.Warningf("failed to set node pool %s size, using delta %d error: %v", n.id, expectedDelta, err)
+		n.targetSize = n.targetSize + expectedDelta
 	} else {
 		klog.Infof("Set node group %s size from %d to %d, expected delta %d", n.id, n.targetSize, len(servers), expectedDelta)
 		n.targetSize = len(servers)

From aff0072adfdf27288060ebd8f26f6a9dac0d95af Mon Sep 17 00:00:00 2001
From: Damika Gamlath <damika@google.com>
Date: Thu, 4 Jul 2024 15:09:02 +0000
Subject: [PATCH 29/30] Use project name instead of project number in
 createIgmRef()

---
 .../gce/autoscaling_gce_client.go             |  7 +++++--
 .../gce/autoscaling_gce_client_test.go        | 20 +++++++++----------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
index 4dde60940700..2bb0db5b99a6 100644
--- a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
+++ b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
@@ -405,11 +405,11 @@ func externalToInternalInstance(gceInstance *gce.Instance, loggingQuota *klogx.Q
 			},
 		},
 		NumericId: gceInstance.Id,
-		Igm:       createIgmRef(gceInstance, loggingQuota),
+		Igm:       createIgmRef(gceInstance, ref.Project, loggingQuota),
 	}, nil
 }
 
-func createIgmRef(gceInstance *gce.Instance, loggingQuota *klogx.Quota) GceRef {
+func createIgmRef(gceInstance *gce.Instance, project string, loggingQuota *klogx.Quota) GceRef {
 	createdBy := ""
 	for _, item := range gceInstance.Metadata.Items {
 		if item.Key == "created-by" && item.Value != nil {
@@ -425,6 +425,9 @@ func createIgmRef(gceInstance *gce.Instance, loggingQuota *klogx.Quota) GceRef {
 		klogx.V(5).UpTo(loggingQuota).Infof("Unable to parse IGM for %v because of %v", gceInstance.SelfLink, err)
 		return GceRef{}
 	}
+	// project is overwritten to make it compatible with CA mig refs which uses project
+	// name instead of project number. igm url has project number not project name.
+	igmRef.Project = project
 	return igmRef
 }
 
diff --git a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
index b5e829ef2b7d..136494e83ce9 100644
--- a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
@@ -925,7 +925,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
 					},
 					NumericId: 11,
-					Igm:       GceRef{"893226960234", "zones", "test-igm1-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm1-grp"},
 				},
 			},
 		},
@@ -987,7 +987,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 					},
 					NumericId: 10,
-					Igm:       GceRef{"893226960234", "zones", "test-igm1-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm1-grp"},
 				},
 				{
 					Instance: cloudprovider.Instance{
@@ -995,7 +995,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 					},
 					NumericId: 11,
-					Igm:       GceRef{"893226960234", "zones", "test-igm1-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm1-grp"},
 				},
 			},
 		},
@@ -1084,7 +1084,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 					},
 					NumericId: 10,
-					Igm:       GceRef{"893226960234", "zones", "test-igm1-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm1-grp"},
 				},
 				{
 					Instance: cloudprovider.Instance{
@@ -1092,7 +1092,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 					},
 					NumericId: 11,
-					Igm:       GceRef{"893226960234", "zones", "test-igm2-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm2-grp"},
 				},
 				{
 					Instance: cloudprovider.Instance{
@@ -1100,7 +1100,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 					},
 					NumericId: 12,
-					Igm:       GceRef{"893226960234", "zones", "test-igm1-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm1-grp"},
 				},
 				{
 					Instance: cloudprovider.Instance{
@@ -1108,7 +1108,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 					},
 					NumericId: 13,
-					Igm:       GceRef{"893226960234", "zones", "test-igm1-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm1-grp"},
 				},
 				{
 					Instance: cloudprovider.Instance{
@@ -1116,7 +1116,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 					},
 					NumericId: 14,
-					Igm:       GceRef{"893226960234", "zones", "test-igm2-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm2-grp"},
 				},
 				{
 					Instance: cloudprovider.Instance{
@@ -1124,7 +1124,7 @@ func TestFetchAllInstances(t *testing.T) {
 						Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 					},
 					NumericId: 15,
-					Igm:       GceRef{"893226960234", "zones", "test-igm1-grp"},
+					Igm:       GceRef{"myprojid", "zones", "test-igm1-grp"},
 				},
 			},
 		},
@@ -1228,7 +1228,7 @@ func TestExternalToInternalInstance(t *testing.T) {
 					Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning},
 				},
 				NumericId: 10,
-				Igm:       GceRef{"893226960234", "zones", "test-igm1-grp"},
+				Igm:       GceRef{"myprojid", "zones", "test-igm1-grp"},
 			},
 		},
 	}

From 1ef1ca9734db24d032d2bb2b30dbc766e0926091 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20K=C5=82obuszewski?= <danielmk@google.com>
Date: Tue, 9 Jul 2024 11:21:17 +0200
Subject: [PATCH 30/30] Script for finding Cluster Autoscaler OWNERS

---
 cluster-autoscaler/hack/list-owners.py | 40 ++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 cluster-autoscaler/hack/list-owners.py

diff --git a/cluster-autoscaler/hack/list-owners.py b/cluster-autoscaler/hack/list-owners.py
new file mode 100644
index 000000000000..7b37993b5825
--- /dev/null
+++ b/cluster-autoscaler/hack/list-owners.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+
+# Copyright 2024 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" Python script to list all OWNERS of various parts of Cluster Autoscaler.
+
+Traverse all subdirectories and find OWNERS. This is useful for tagging people
+on broad announcements, for instance before a new patch release.
+"""
+
+import glob
+import yaml
+import sys
+
+files = glob.glob('**/OWNERS', recursive=True)
+owners = set()
+
+for fname in files:
+  with open(fname) as f:
+    parsed = yaml.safe_load(f)
+    if 'approvers' in parsed and parsed['approvers'] is not None:
+      for approver in parsed['approvers']:
+        owners.add(approver)
+    else:
+      print("No approvers found in {}: {}".format(fname, parsed), file=sys.stderr)
+
+for owner in sorted(owners):
+  print('@', owner, sep='')