From 4e6d2418c2840b8040b335433f49fd02aa625b9b Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Sun, 15 Sep 2024 21:38:22 -0700 Subject: [PATCH] Fix race condition with using newer NodePool for hash annotation --- pkg/controllers/provisioning/provisioner.go | 2 +- .../scheduling/nodeclaimtemplate.go | 25 +++++++++-------- pkg/controllers/provisioning/suite_test.go | 28 ++++++++++++++++++- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go index aafd3bcf68..ab56bdc13f 100644 --- a/pkg/controllers/provisioning/provisioner.go +++ b/pkg/controllers/provisioning/provisioner.go @@ -361,7 +361,7 @@ func (p *Provisioner) Create(ctx context.Context, n *scheduler.NodeClaim, opts . if err := latest.Spec.Limits.ExceededBy(latest.Status.Resources); err != nil { return "", err } - nodeClaim := n.ToNodeClaim(latest) + nodeClaim := n.ToNodeClaim() if err := p.kubeClient.Create(ctx, nodeClaim); err != nil { return "", err diff --git a/pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go b/pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go index 73e462076f..379bec28ce 100644 --- a/pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go +++ b/pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go @@ -23,6 +23,7 @@ import ( "github.com/samber/lo" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" v1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider" @@ -40,6 +41,7 @@ type NodeClaimTemplate struct { v1.NodeClaim NodePoolName string + NodePoolUUID types.UID InstanceTypeOptions cloudprovider.InstanceTypes Requirements scheduling.Requirements } @@ -48,36 +50,37 @@ func NewNodeClaimTemplate(nodePool *v1.NodePool) *NodeClaimTemplate { nct := &NodeClaimTemplate{ NodeClaim: *nodePool.Spec.Template.ToNodeClaim(), NodePoolName: nodePool.Name, + NodePoolUUID: nodePool.UID, Requirements: scheduling.NewRequirements(), } + nct.Annotations = lo.Assign(nct.Annotations, map[string]string{ + v1.NodePoolHashAnnotationKey: nodePool.Hash(), + v1.NodePoolHashVersionAnnotationKey: v1.NodePoolHashVersion, + }) nct.Labels = lo.Assign(nct.Labels, map[string]string{v1.NodePoolLabelKey: nodePool.Name}) nct.Requirements.Add(scheduling.NewNodeSelectorRequirementsWithMinValues(nct.Spec.Requirements...).Values()...) nct.Requirements.Add(scheduling.NewLabelRequirements(nct.Labels).Values()...) return nct } -func (i *NodeClaimTemplate) ToNodeClaim(nodePool *v1.NodePool) *v1.NodeClaim { +func (i *NodeClaimTemplate) ToNodeClaim() *v1.NodeClaim { // Order the instance types by price and only take the first 100 of them to decrease the instance type size in the requirements instanceTypes := lo.Slice(i.InstanceTypeOptions.OrderByPrice(i.Requirements), 0, MaxInstanceTypes) i.Requirements.Add(scheduling.NewRequirementWithFlexibility(corev1.LabelInstanceTypeStable, corev1.NodeSelectorOpIn, i.Requirements.Get(corev1.LabelInstanceTypeStable).MinValues, lo.Map(instanceTypes, func(i *cloudprovider.InstanceType, _ int) string { return i.Name })...)) - gvk := object.GVK(nodePool) nc := &v1.NodeClaim{ ObjectMeta: metav1.ObjectMeta{ GenerateName: fmt.Sprintf("%s-", i.NodePoolName), - Annotations: lo.Assign(i.Annotations, map[string]string{ - v1.NodePoolHashAnnotationKey: nodePool.Hash(), - v1.NodePoolHashVersionAnnotationKey: v1.NodePoolHashVersion, - }), - Labels: i.Labels, + Annotations: i.Annotations, + Labels: i.Labels, OwnerReferences: []metav1.OwnerReference{ { - APIVersion: gvk.GroupVersion().String(), - Kind: gvk.Kind, - Name: nodePool.Name, - UID: nodePool.UID, + APIVersion: object.GVK(&v1.NodePool{}).GroupVersion().String(), + Kind: object.GVK(&v1.NodePool{}).Kind, + Name: i.NodePoolName, + UID: i.NodePoolUUID, BlockOwnerDeletion: lo.ToPtr(true), }, }, diff --git a/pkg/controllers/provisioning/suite_test.go b/pkg/controllers/provisioning/suite_test.go index 80c7e3d81e..3d973fec08 100644 --- a/pkg/controllers/provisioning/suite_test.go +++ b/pkg/controllers/provisioning/suite_test.go @@ -29,6 +29,7 @@ import ( storagev1 "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" clock "k8s.io/utils/clock/testing" @@ -246,6 +247,31 @@ var _ = Describe("Provisioning", func() { ExpectScheduled(ctx, env.Client, pod) } }) + It("should not use a different NodePool hash on the NodeClaim if the NodePool changes during scheduling", func() { + // This test was added since we were generating the NodeClaim's NodePool hash from a NodePool that was re-retrieved + // after scheduling had been completed. This could have resulted in the hash not accurately reflecting the actual NodePool + // state that it was generated from + + nodePool := test.NodePool() + pod := test.UnschedulablePod() + hash := nodePool.Hash() + ExpectApplied(ctx, env.Client, nodePool, pod) + + results, err := prov.Schedule(ctx) + Expect(err).ToNot(HaveOccurred()) + + Expect(results.NewNodeClaims).To(HaveLen(1)) + Expect(results.NewNodeClaims[0].ToNodeClaim().Annotations).To(HaveKeyWithValue(v1.NodePoolHashAnnotationKey, hash)) + + nodeClaims, err := prov.CreateNodeClaims(ctx, results.NewNodeClaims) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeClaims).To(HaveLen(1)) + + nodeClaim := &v1.NodeClaim{} + Expect(env.Client.Get(ctx, types.NamespacedName{Name: nodeClaims[0]}, nodeClaim)).To(Succeed()) + + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodePoolHashAnnotationKey, hash)) + }) It("should schedule all pods on one inflight node when node is in deleting state", func() { nodePool := test.NodePool() its, err := cloudProvider.GetInstanceTypes(ctx, nodePool) @@ -286,7 +312,7 @@ var _ = Describe("Provisioning", func() { Expect(n.Node.Name).ToNot(Equal(node.Name)) } }) - It("should schedule based on the max resource requests of containers and initContainers with sidecar containers when initcontainer comes first", func() { + It("should schedule baased on the max resource requests of containers and initContainers with sidecar containers when initcontainer comes first", func() { if env.Version.Minor() < 29 { Skip("Native Sidecar containers is only on by default starting in K8s version >= 1.29.x") }