Skip to content

Commit

Permalink
Fix race condition with using newer NodePool for hash annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Sep 18, 2024
1 parent 118d63b commit 604a329
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pkg/controllers/provisioning/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ func (p *Provisioner) Create(ctx context.Context, n *scheduler.NodeClaim, opts .
if err := latest.Spec.Limits.ExceededBy(latest.Status.Resources); err != nil {
return "", err
}
nodeClaim := n.ToNodeClaim(latest)
nodeClaim := n.ToNodeClaim()

if err := p.kubeClient.Create(ctx, nodeClaim); err != nil {
return "", err
Expand Down
25 changes: 14 additions & 11 deletions pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/samber/lo"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/cloudprovider"
Expand All @@ -40,6 +41,7 @@ type NodeClaimTemplate struct {
v1.NodeClaim

NodePoolName string
NodePoolUUID types.UID
InstanceTypeOptions cloudprovider.InstanceTypes
Requirements scheduling.Requirements
}
Expand All @@ -48,36 +50,37 @@ func NewNodeClaimTemplate(nodePool *v1.NodePool) *NodeClaimTemplate {
nct := &NodeClaimTemplate{
NodeClaim: *nodePool.Spec.Template.ToNodeClaim(),
NodePoolName: nodePool.Name,
NodePoolUUID: nodePool.UID,
Requirements: scheduling.NewRequirements(),
}
nct.Annotations = lo.Assign(nct.Annotations, map[string]string{
v1.NodePoolHashAnnotationKey: nodePool.Hash(),
v1.NodePoolHashVersionAnnotationKey: v1.NodePoolHashVersion,
})
nct.Labels = lo.Assign(nct.Labels, map[string]string{v1.NodePoolLabelKey: nodePool.Name})
nct.Requirements.Add(scheduling.NewNodeSelectorRequirementsWithMinValues(nct.Spec.Requirements...).Values()...)
nct.Requirements.Add(scheduling.NewLabelRequirements(nct.Labels).Values()...)
return nct
}

func (i *NodeClaimTemplate) ToNodeClaim(nodePool *v1.NodePool) *v1.NodeClaim {
func (i *NodeClaimTemplate) ToNodeClaim() *v1.NodeClaim {
// Order the instance types by price and only take the first 100 of them to decrease the instance type size in the requirements
instanceTypes := lo.Slice(i.InstanceTypeOptions.OrderByPrice(i.Requirements), 0, MaxInstanceTypes)
i.Requirements.Add(scheduling.NewRequirementWithFlexibility(corev1.LabelInstanceTypeStable, corev1.NodeSelectorOpIn, i.Requirements.Get(corev1.LabelInstanceTypeStable).MinValues, lo.Map(instanceTypes, func(i *cloudprovider.InstanceType, _ int) string {
return i.Name
})...))

gvk := object.GVK(nodePool)
nc := &v1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
GenerateName: fmt.Sprintf("%s-", i.NodePoolName),
Annotations: lo.Assign(i.Annotations, map[string]string{
v1.NodePoolHashAnnotationKey: nodePool.Hash(),
v1.NodePoolHashVersionAnnotationKey: v1.NodePoolHashVersion,
}),
Labels: i.Labels,
Annotations: i.Annotations,
Labels: i.Labels,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: gvk.GroupVersion().String(),
Kind: gvk.Kind,
Name: nodePool.Name,
UID: nodePool.UID,
APIVersion: object.GVK(&v1.NodePool{}).GroupVersion().String(),
Kind: object.GVK(&v1.NodePool{}).Kind,
Name: i.NodePoolName,
UID: i.NodePoolUUID,
BlockOwnerDeletion: lo.ToPtr(true),
},
},
Expand Down
32 changes: 32 additions & 0 deletions pkg/controllers/provisioning/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
clock "k8s.io/utils/clock/testing"
Expand Down Expand Up @@ -246,6 +247,37 @@ var _ = Describe("Provisioning", func() {
ExpectScheduled(ctx, env.Client, pod)
}
})
It("should not use a different NodePool hash on the NodeClaim if the NodePool changes during scheduling", func() {
// This test was added since we were generating the NodeClaim's NodePool hash from a NodePool that was re-retrieved
// after scheduling had been completed. This could have resulted in the hash not accurately reflecting the actual NodePool
// state that it was generated from

nodePool := test.NodePool()
pod := test.UnschedulablePod()
hash := nodePool.Hash()
ExpectApplied(ctx, env.Client, nodePool, pod)

results, err := prov.Schedule(ctx)
Expect(err).ToNot(HaveOccurred())

Expect(results.NewNodeClaims).To(HaveLen(1))
Expect(results.NewNodeClaims[0].ToNodeClaim().Annotations).To(HaveKeyWithValue(v1.NodePoolHashAnnotationKey, hash))

nodePool.Spec.Template.Labels = lo.Assign(nodePool.Spec.Template.Labels, map[string]string{
"new-label": "new-value",
})
Expect(nodePool.Hash()).ToNot(Equal(hash))
ExpectApplied(ctx, env.Client, nodePool) // apply the changed NodePool but expect no change in the hash on the NodeClaim

nodeClaims, err := prov.CreateNodeClaims(ctx, results.NewNodeClaims)
Expect(err).ToNot(HaveOccurred())
Expect(nodeClaims).To(HaveLen(1))

nodeClaim := &v1.NodeClaim{}
Expect(env.Client.Get(ctx, types.NamespacedName{Name: nodeClaims[0]}, nodeClaim)).To(Succeed())

Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodePoolHashAnnotationKey, hash))
})
It("should schedule all pods on one inflight node when node is in deleting state", func() {
nodePool := test.NodePool()
its, err := cloudProvider.GetInstanceTypes(ctx, nodePool)
Expand Down

0 comments on commit 604a329

Please sign in to comment.