Skip to content

Commit

Permalink
feat: Drop the hostname requirement when handling PV topology for rel…
Browse files Browse the repository at this point in the history
…ease-v0.33.x (#1018) (#1082)
  • Loading branch information
jonathan-innis authored Mar 11, 2024
1 parent f3a7219 commit 8010b32
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 1 deletion.
103 changes: 103 additions & 0 deletions pkg/controllers/disruption/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,109 @@ var _ = Describe("Queue Limits", func() {
})
})

var _ = Describe("Simulate Scheduling", func() {
var nodePool *v1beta1.NodePool
BeforeEach(func() {
nodePool = test.NodePool()
})
It("can replace node with a local PV (ignoring hostname affinity)", func() {
nodeClaim, node := test.NodeClaimAndNode(v1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
v1beta1.NodePoolLabelKey: nodePool.Name,
v1.LabelInstanceTypeStable: mostExpensiveInstance.Name,
v1beta1.CapacityTypeLabelKey: mostExpensiveOffering.CapacityType,
v1.LabelTopologyZone: mostExpensiveOffering.Zone,
},
},
Status: v1beta1.NodeClaimStatus{
Allocatable: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("32"),
v1.ResourcePods: resource.MustParse("100"),
},
},
})
labels := map[string]string{
"app": "test",
}
// create our RS so we can link a pod to it
ss := test.StatefulSet()
ExpectApplied(ctx, env.Client, ss)

// StorageClass that references "no-provisioner" and is used for local volume storage
storageClass := test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "local-path",
},
Provisioner: lo.ToPtr("kubernetes.io/no-provisioner"),
})
persistentVolume := test.PersistentVolume(test.PersistentVolumeOptions{UseLocal: true})
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
// This PV is only valid for use against this node
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{node.Name},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
pod := test.Pod(test.PodOptions{
ObjectMeta: metav1.ObjectMeta{Labels: labels,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "apps/v1",
Kind: "StatefulSet",
Name: ss.Name,
UID: ss.UID,
Controller: ptr.Bool(true),
BlockOwnerDeletion: ptr.Bool(true),
},
},
},
PersistentVolumeClaims: []string{persistentVolumeClaim.Name},
})
ExpectApplied(ctx, env.Client, ss, pod, nodeClaim, node, nodePool, storageClass, persistentVolume, persistentVolumeClaim)

// bind pods to node
ExpectManualBinding(ctx, env.Client, pod, node)

// inform cluster state about nodes and nodeclaims
ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*v1.Node{node}, []*v1beta1.NodeClaim{nodeClaim})

// disruption won't delete the old node until the new node is ready
var wg sync.WaitGroup
ExpectTriggerVerifyAction(&wg)
ExpectMakeNewNodeClaimsReady(ctx, env.Client, &wg, cluster, cloudProvider, 1)
ExpectReconcileSucceeded(ctx, disruptionController, types.NamespacedName{})
wg.Wait()

// Process the item but still expect it to be in the queue, since it's replacements aren't created
ExpectReconcileSucceeded(ctx, queue, types.NamespacedName{})

// Cascade any deletion of the nodeClaim to the node
ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaim)

// Expect that the new nodeClaim was created, and it's different than the original
// We should succeed in getting a replacement, since we assume that the node affinity requirement will be invalid
// once we spin-down the old node
ExpectNotFound(ctx, env.Client, nodeClaim, node)
nodeclaims := ExpectNodeClaims(ctx, env.Client)
nodes := ExpectNodes(ctx, env.Client)
Expect(nodeclaims).To(HaveLen(1))
Expect(nodes).To(HaveLen(1))
Expect(nodeclaims[0].Name).ToNot(Equal(nodeClaim.Name))
Expect(nodes[0].Name).ToNot(Equal(node.Name))
})
})

var _ = Describe("Disruption Taints", func() {
var nodePool *v1beta1.NodePool
var nodeClaim *v1beta1.NodeClaim
Expand Down
9 changes: 8 additions & 1 deletion pkg/controllers/provisioning/scheduling/volumetopology.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,14 @@ func (v *VolumeTopology) getPersistentVolumeRequirements(ctx context.Context, po
var requirements []v1.NodeSelectorRequirement
if len(pv.Spec.NodeAffinity.Required.NodeSelectorTerms) > 0 {
// Terms are ORed, only use the first term
requirements = append(requirements, pv.Spec.NodeAffinity.Required.NodeSelectorTerms[0].MatchExpressions...)
requirements = pv.Spec.NodeAffinity.Required.NodeSelectorTerms[0].MatchExpressions
// If we are using a Local volume or a HostPath volume, then we should ignore the Hostname affinity
// on it because re-scheduling this pod to a new node means not using the same Hostname affinity that we currently have
if pv.Spec.Local != nil || pv.Spec.HostPath != nil {
requirements = lo.Reject(requirements, func(n v1.NodeSelectorRequirement, _ int) bool {
return n.Key == v1.LabelHostname
})
}
}
return requirements, nil
}
Expand Down
111 changes: 111 additions & 0 deletions pkg/controllers/provisioning/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1309,6 +1309,117 @@ var _ = Describe("Provisioning", func() {
node := ExpectScheduled(ctx, env.Client, pod)
Expect(node.Labels).To(HaveKeyWithValue(v1.LabelTopologyZone, "test-zone-3"))
})
DescribeTable("should ignore hostname affinity scheduling when using local path volumes",
func(volumeOptions test.PersistentVolumeOptions) {
// StorageClass that references "no-provisioner" and is used for local volume storage
storageClass = test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "local-path",
},
Provisioner: lo.ToPtr("kubernetes.io/no-provisioner"),
})
// Create a PersistentVolume that is using a random node name for its affinity
persistentVolume := test.PersistentVolume(volumeOptions)
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
ExpectApplied(ctx, env.Client, test.NodePool(), storageClass, persistentVolumeClaim, persistentVolume)
pod := test.UnschedulablePod(test.PodOptions{
PersistentVolumeClaims: []string{persistentVolumeClaim.Name},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
// Expect that we are still able to schedule this pod to a node, even though we had a hostname affinity on it
ExpectScheduled(ctx, env.Client, pod)
},
Entry("when using local volumes", test.PersistentVolumeOptions{UseLocal: true}),
Entry("when using hostpath volumes", test.PersistentVolumeOptions{UseHostPath: true}),
)
DescribeTable("should ignore hostname affinity scheduling when using local path volumes (ephemeral volume)",
func(volumeOptions test.PersistentVolumeOptions) {
// StorageClass that references "no-provisioner" and is used for local volume storage
storageClass = test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "local-path",
},
Provisioner: lo.ToPtr("kubernetes.io/no-provisioner"),
})
pod := test.UnschedulablePod(test.PodOptions{
EphemeralVolumeTemplates: []test.EphemeralVolumeTemplateOptions{
{
StorageClassName: &storageClass.Name,
},
},
})
persistentVolume := test.PersistentVolume(volumeOptions)
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s", pod.Name, pod.Spec.Volumes[0].Name),
},
VolumeName: persistentVolume.Name,
StorageClassName: &storageClass.Name,
})
ExpectApplied(ctx, env.Client, test.NodePool(), storageClass, pod, persistentVolumeClaim, persistentVolume)
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
ExpectScheduled(ctx, env.Client, pod)
},
Entry("when using local volumes", test.PersistentVolumeOptions{UseLocal: true}),
Entry("when using hostpath volumes", test.PersistentVolumeOptions{UseHostPath: true}),
)
It("should not ignore hostname affinity when using non-local path volumes", func() {
// This PersistentVolume is going to use a standard CSI volume for provisioning
persistentVolume := test.PersistentVolume()
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
ExpectApplied(ctx, env.Client, test.NodePool(), storageClass, persistentVolumeClaim, persistentVolume)
pod := test.UnschedulablePod(test.PodOptions{
PersistentVolumeClaims: []string{persistentVolumeClaim.Name},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
// Expect that this pod can't schedule because we have a hostname affinity, and we don't currently have a pod that we can schedule to
ExpectNotScheduled(ctx, env.Client, pod)
})
It("should not schedule if volume zones are incompatible", func() {
persistentVolume := test.PersistentVolume(test.PersistentVolumeOptions{Zones: []string{"test-zone-3"}})
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
Expand Down
64 changes: 64 additions & 0 deletions pkg/test/statefulsets.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package test

import (
"fmt"

"github.com/imdario/mergo"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/pkg/ptr"
)

type StatefulSetOptions struct {
metav1.ObjectMeta
Labels map[string]string
Replicas int32
PodOptions PodOptions
}

func StatefulSet(overrides ...StatefulSetOptions) *appsv1.StatefulSet {
options := StatefulSetOptions{}
for _, opts := range overrides {
if err := mergo.Merge(&options, opts, mergo.WithOverride); err != nil {
panic(fmt.Sprintf("Failed to merge deployment options: %s", err))
}
}

objectMeta := NamespacedObjectMeta(options.ObjectMeta)

if options.PodOptions.Image == "" {
options.PodOptions.Image = "public.ecr.aws/eks-distro/kubernetes/pause:3.2"
}
if options.PodOptions.Labels == nil {
options.PodOptions.Labels = map[string]string{
"app": objectMeta.Name,
}
}
pod := Pod(options.PodOptions)
return &appsv1.StatefulSet{
ObjectMeta: objectMeta,
Spec: appsv1.StatefulSetSpec{
Replicas: ptr.Int32(options.Replicas),
Selector: &metav1.LabelSelector{MatchLabels: options.PodOptions.Labels},
Template: v1.PodTemplateSpec{
ObjectMeta: ObjectMeta(options.PodOptions.ObjectMeta),
Spec: pod.Spec,
},
},
}
}
14 changes: 14 additions & 0 deletions pkg/test/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ type PersistentVolumeOptions struct {
StorageClassName string
Driver string
UseAWSInTreeDriver bool
UseLocal bool
UseHostPath bool
}

func PersistentVolume(overrides ...PersistentVolumeOptions) *v1.PersistentVolume {
Expand All @@ -44,6 +46,18 @@ func PersistentVolume(overrides ...PersistentVolumeOptions) *v1.PersistentVolume
// Determine the PersistentVolumeSource based on the options
var source v1.PersistentVolumeSource
switch {
case options.UseLocal:
source = v1.PersistentVolumeSource{
Local: &v1.LocalVolumeSource{
Path: "/mnt/local-disks",
},
}
case options.UseHostPath:
source = v1.PersistentVolumeSource{
HostPath: &v1.HostPathVolumeSource{
Path: "/mnt/local-disks",
},
}
case options.UseAWSInTreeDriver:
source = v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{
Expand Down

0 comments on commit 8010b32

Please sign in to comment.