diff --git a/assets/overlays/aws-ebs/base/csidriver.yaml b/assets/overlays/aws-ebs/base/csidriver.yaml index 1b16edddb..80871b7d5 100644 --- a/assets/overlays/aws-ebs/base/csidriver.yaml +++ b/assets/overlays/aws-ebs/base/csidriver.yaml @@ -14,3 +14,4 @@ spec: seLinuxMount: true volumeLifecycleModes: - Persistent + nodeAllocatableUpdatePeriodSeconds: ${NODE_ALLOCATABLE_UPDATE_PERIOD_SECONDS} diff --git a/assets/overlays/aws-ebs/generated/hypershift/csidriver.yaml b/assets/overlays/aws-ebs/generated/hypershift/csidriver.yaml index 4440f9e58..fb42d2459 100644 --- a/assets/overlays/aws-ebs/generated/hypershift/csidriver.yaml +++ b/assets/overlays/aws-ebs/generated/hypershift/csidriver.yaml @@ -13,6 +13,7 @@ metadata: spec: attachRequired: true fsGroupPolicy: File + nodeAllocatableUpdatePeriodSeconds: ${NODE_ALLOCATABLE_UPDATE_PERIOD_SECONDS} podInfoOnMount: false requiresRepublish: false seLinuxMount: true diff --git a/assets/overlays/aws-ebs/generated/standalone/csidriver.yaml b/assets/overlays/aws-ebs/generated/standalone/csidriver.yaml index 4440f9e58..fb42d2459 100644 --- a/assets/overlays/aws-ebs/generated/standalone/csidriver.yaml +++ b/assets/overlays/aws-ebs/generated/standalone/csidriver.yaml @@ -13,6 +13,7 @@ metadata: spec: attachRequired: true fsGroupPolicy: File + nodeAllocatableUpdatePeriodSeconds: ${NODE_ALLOCATABLE_UPDATE_PERIOD_SECONDS} podInfoOnMount: false requiresRepublish: false seLinuxMount: true diff --git a/pkg/driver/aws-ebs/aws_ebs.go b/pkg/driver/aws-ebs/aws_ebs.go index 2e2e6a6b7..2f1a67f3c 100644 --- a/pkg/driver/aws-ebs/aws_ebs.go +++ b/pkg/driver/aws-ebs/aws_ebs.go @@ -183,6 +183,9 @@ func GetAWSEBSOperatorControllerConfig(ctx context.Context, flavour generator.Cl if featureGates.Enabled(configv1.FeatureGateName("VolumeAttributesClass")) { cfg.AddDeploymentHookBuilders(c, withVolumeAttributesClassHook) } + if featureGates.Enabled(configv1.FeatureGateName("MutableCSINodeAllocatableCount")) { + cfg.AddDeploymentHookBuilders(c, withMutableCSINodeAllocatableCount) + } cfg.AddDaemonSetHookBuilders(c, withCABundleDaemonSetHook) cfg.AddStorageClassHookBuilders(c, withKMSKeyHook) @@ -208,6 +211,15 @@ func GetAWSEBSOperatorControllerConfig(ctx context.Context, flavour generator.Cl cfg.ExtraControlPlaneControllers = append(cfg.ExtraControlPlaneControllers, ctrl) } + cfg.ExtraReplacementsFunc = func() []string { + value := "" + if featureGates.Enabled(configv1.FeatureGateName("MutableCSINodeAllocatableCount")) { + value = "600" + } + klog.V(4).Infof("Using NODE_ALLOCATABLE_UPDATE_PERIOD_SECONDS: %q", value) + return []string{"${NODE_ALLOCATABLE_UPDATE_PERIOD_SECONDS}", value} + } + if flavour == generator.FlavourHyperShift { volumeTagController := NewEBSVolumeTagsController(cfg.GetControllerName("EBSVolumeTagsController"), c, c.EventRecorder) cfg.ExtraControlPlaneControllers = append(cfg.ExtraControlPlaneControllers, volumeTagController) @@ -500,3 +512,20 @@ func withVolumeAttributesClassHook(c *clients.Clients) (dc.DeploymentHookFunc, [ } return hook, nil } + +// withMutableCSINodeAllocatableCount enables the MutableCSINodeAllocatableCount feature gate in the external attacher +// TODO: remove when MutableCSINodeAllocatableCount is GA +func withMutableCSINodeAllocatableCount(c *clients.Clients) (dc.DeploymentHookFunc, []factory.Informer) { + hook := func(spec *opv1.OperatorSpec, deployment *appsv1.Deployment) error { + fgArgument := "--feature-gates=MutableCSINodeAllocatableCount=true" + for i := range deployment.Spec.Template.Spec.Containers { + container := &deployment.Spec.Template.Spec.Containers[i] + if container.Name != "csi-attacher" { + continue + } + container.Args = append(container.Args, fgArgument) + } + return nil + } + return hook, nil +} diff --git a/pkg/driver/aws-ebs/aws_ebs_test.go b/pkg/driver/aws-ebs/aws_ebs_test.go index a721f7861..746a51036 100644 --- a/pkg/driver/aws-ebs/aws_ebs_test.go +++ b/pkg/driver/aws-ebs/aws_ebs_test.go @@ -363,3 +363,40 @@ func Test_WithKMSKeyHook(t *testing.T) { }) } } + +func Test_WithMutableCSINodeAllocatableCount(t *testing.T) { + cr := clients.GetFakeOperatorCR() + c := clients.NewFakeClients("clusters-test", cr) + + hook, _ := withMutableCSINodeAllocatableCount(c) + deployment := getTestDeployment() + // Arrange - inject custom infrastructure + + // Act + err := hook(&cr.Spec.OperatorSpec, deployment) + if err != nil { + t.Fatalf("unexpected hook error: %v", err) + } + + // Assert + found := false + expectedFeatureGatesArg := "--feature-gates=MutableCSINodeAllocatableCount=true" + for _, container := range deployment.Spec.Template.Spec.Containers { + if container.Name == "csi-attacher" { + found = true + // Collect env vars from struct EnvVar to map[string]string + featureGatesArg := "" + for _, arg := range container.Args { + if strings.HasPrefix(arg, "--feature-gates") { + featureGatesArg = arg + } + } + if featureGatesArg != expectedFeatureGatesArg { + t.Errorf("expected csi-driver feature gates argument %s, got %s", expectedFeatureGatesArg, featureGatesArg) + } + } + } + if !found { + t.Errorf("container csi-attacher not found") + } +} diff --git a/vendor/github.com/openshift/library-go/pkg/operator/resource/resourceapply/storage.go b/vendor/github.com/openshift/library-go/pkg/operator/resource/resourceapply/storage.go index d44a5d571..de7f6005d 100644 --- a/vendor/github.com/openshift/library-go/pkg/operator/resource/resourceapply/storage.go +++ b/vendor/github.com/openshift/library-go/pkg/operator/resource/resourceapply/storage.go @@ -135,9 +135,10 @@ func storageClassNeedsRecreate(oldSC, newSC *storagev1.StorageClass) bool { return false } -// ApplyCSIDriver merges objectmeta, does not worry about anything else +// ApplyCSIDriver merges objectmeta and tries to update spec if any of the required fields were cleared by the API server. +// It assumes they were cleared due to a feature gate not enabled in the API server and it will be enabled soon. +// When used by StaticResourceController, it will retry periodically and eventually save the spec with the field. func ApplyCSIDriver(ctx context.Context, client storageclientv1.CSIDriversGetter, recorder events.Recorder, requiredOriginal *storagev1.CSIDriver) (*storagev1.CSIDriver, bool, error) { - required := requiredOriginal.DeepCopy() if required.Annotations == nil { required.Annotations = map[string]string{} @@ -173,14 +174,32 @@ func ApplyCSIDriver(ctx context.Context, client storageclientv1.CSIDriversGetter } } - metadataModified := false + needsUpdate := false + // Most CSIDriver fields are immutable. Any change to them should trigger Delete() + Create() calls. + needsRecreate := false + existingCopy := existing.DeepCopy() - resourcemerge.EnsureObjectMeta(&metadataModified, &existingCopy.ObjectMeta, required.ObjectMeta) + // Metadata change should need just Update() call. + resourcemerge.EnsureObjectMeta(&needsUpdate, &existingCopy.ObjectMeta, required.ObjectMeta) requiredSpecHash := required.Annotations[specHashAnnotation] existingSpecHash := existing.Annotations[specHashAnnotation] - sameSpec := requiredSpecHash == existingSpecHash - if sameSpec && !metadataModified { + // If spec changes, assume whole re-create is needed. + needsRecreate = requiredSpecHash != existingSpecHash + + // TODO: remove when CSIDriver spec.nodeAllocatableUpdatePeriodSeconds is enabled by default + // (https://github.com/kubernetes/enhancements/tree/master/keps/sig-storage/4876-mutable-csinode-allocatable) + if !needsRecreate && !alphaFieldsSaved(existingCopy, required) { + // The required spec is the same as in previous succesful call, however, + // the API server must have cleared some alpha/beta fields in it. + // Try to save the object again. In case the fields are cleared again, + // the called (typically StaticResourceController) must retry periodically. + // Assumption: the alpha fields are **mutable**, so only Update() is needed. + klog.V(4).Infof("Detected CSIDriver %q field cleared by the API server, updating", required.Name) + needsUpdate = true + } + + if !needsUpdate && !needsRecreate { return existing, false, nil } @@ -188,16 +207,16 @@ func ApplyCSIDriver(ctx context.Context, client storageclientv1.CSIDriversGetter klog.Infof("CSIDriver %q changes: %v", required.Name, JSONPatchNoError(existing, existingCopy)) } - if sameSpec { - // Update metadata by a simple Update call + if !needsRecreate { + // only needsUpdate is true, update the object by a simple Update call actual, err := client.CSIDrivers().Update(ctx, existingCopy, metav1.UpdateOptions{}) resourcehelper.ReportUpdateEvent(recorder, required, err) return actual, true, err } + // needsRecreate is true, needsUpdate does not matter. Delete and re-create the object. existingCopy.Spec = required.Spec existingCopy.ObjectMeta.ResourceVersion = "" - // Spec is read-only after creation. Delete and re-create the object err = client.CSIDrivers().Delete(ctx, existingCopy.Name, metav1.DeleteOptions{}) resourcehelper.ReportDeleteEvent(recorder, existingCopy, err, "Deleting CSIDriver to re-create it with updated parameters") if err != nil && !apierrors.IsNotFound(err) { @@ -214,10 +233,21 @@ func ApplyCSIDriver(ctx context.Context, client storageclientv1.CSIDriversGetter } else if err != nil { err = fmt.Errorf("failed to re-create CSIDriver %s: %s", existingCopy.Name, err) } - resourcehelper.ReportCreateEvent(recorder, existingCopy, err) + resourcehelper.ReportCreateEvent(recorder, actual, err) return actual, true, err } +// alphaFieldsSaved checks that all required fields in the CSIDriver required spec are present and equal in the actual spec. +func alphaFieldsSaved(actual, required *storagev1.CSIDriver) bool { + // DeepDerivative checks that all fields in "required" are present and equal in "actual" + // Fields not present in "required" are ignored. + equal := equality.Semantic.DeepDerivative(required.Spec, actual.Spec) + if !equal { + klog.V(4).Infof("JSAF alphaFieldsCleared: required: %+v, actual: %+v", required.Spec, actual.Spec) + } + return equal +} + func validateRequiredCSIDriverLabels(required *storagev1.CSIDriver) error { supportsEphemeralVolumes := false for _, mode := range required.Spec.VolumeLifecycleModes {