diff --git a/cmd/main.go b/cmd/main.go index 797ef2893a..5ed2c38353 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -67,6 +67,7 @@ func main() { driver.WithExtraVolumeTags(options.ControllerOptions.ExtraVolumeTags), driver.WithMode(options.DriverMode), driver.WithVolumeAttachLimit(options.NodeOptions.VolumeAttachLimit), + driver.WithVolumeAttachLimitFromMetadata(options.NodeOptions.DisableVolumeAttachLimitFromMetadata), driver.WithKubernetesClusterID(options.ControllerOptions.KubernetesClusterID), driver.WithAwsSdkDebugLog(options.ControllerOptions.AwsSdkDebugLog), driver.WithWarnOnInvalidTag(options.ControllerOptions.WarnOnInvalidTag), diff --git a/cmd/options/node_options.go b/cmd/options/node_options.go index 1374016f1d..d61c5ea6a4 100644 --- a/cmd/options/node_options.go +++ b/cmd/options/node_options.go @@ -31,8 +31,22 @@ type NodeOptions struct { // itself (dynamically discovering the maximum number of attachable volume per EC2 machine type, see also // https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347). VolumeAttachLimit int64 + + // DisableVolumeAttachLimitFromMetadata disables the volume attach limit calculation from instances metadata. + // When false (default), the CSI driver calculates the volume attach limit as: + // the number supported by the instance type - nr. of network cards - nr. of volumes attached to the instance + // at the boot time. + // The network cards are counted only on relevant instance types (e.g. Nitro instances). + // Nr. of volumes attached to the instance at the boot time may include CSI volumes, which is not correct + // and breaks attach limit calculation when a node is rebooted without draining. + // See https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/1808 for details. + // When true, the CSI driver calculates the volume attach limit as: + // the number supported by the instance type - nr. of network cards - 1 for the root volume. + // This behavior is useful when a node can be rebooted without draining with CSI volumes attached. + DisableVolumeAttachLimitFromMetadata bool } func (o *NodeOptions) AddFlags(fs *flag.FlagSet) { fs.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes. If not specified, the value is approximated from the instance type.") + fs.BoolVar(&o.DisableVolumeAttachLimitFromMetadata, "disable-volume-attach-limit-from-metadata", false, "Disable volume attach limit calculation from instance metadata.") } diff --git a/cmd/options_test.go b/cmd/options_test.go index cf50237065..92bc8af72a 100644 --- a/cmd/options_test.go +++ b/cmd/options_test.go @@ -51,6 +51,9 @@ func TestGetOptions(t *testing.T) { awsSdkDebugFlagValue := true VolumeAttachLimitFlagName := "volume-attach-limit" var VolumeAttachLimit int64 = 42 + disableVolumeAttachLimitFromMetadataFlagName := "disable-volume-attach-limit-from-metadata" + disableVolumeAttachLimitFromMetadata := false + userAgentExtraFlag := "user-agent-extra" userAgentExtraFlagValue := "test" otelTracingFlagName := "enable-otel-tracing" @@ -130,6 +133,13 @@ func TestGetOptions(t *testing.T) { if options.NodeOptions.VolumeAttachLimit != VolumeAttachLimit { t.Fatalf("expected VolumeAttachLimit to be %d but it is %d", VolumeAttachLimit, options.NodeOptions.VolumeAttachLimit) } + disableVolumeAttachLimitFromMetadataFlag :=flagSet.Lookup(disableVolumeAttachLimitFromMetadataFlagName) + if disableVolumeAttachLimitFromMetadataFlag == nil { + t.Fatalf("expected %q flag to be added but it is not", disableVolumeAttachLimitFromMetadataFlagName) + } + if options.NodeOptions.DisableVolumeAttachLimitFromMetadata != disableVolumeAttachLimitFromMetadata { + t.Fatalf("expected disableVolumeAttachLimitFromMetadata to be %t but it is %t", disableVolumeAttachLimitFromMetadata, options.NodeOptions.DisableVolumeAttachLimitFromMetadata) + } } return options diff --git a/pkg/driver/driver.go b/pkg/driver/driver.go index ca8d0440bc..173b5f9450 100644 --- a/pkg/driver/driver.go +++ b/pkg/driver/driver.go @@ -64,16 +64,17 @@ type Driver struct { } type DriverOptions struct { - endpoint string - extraTags map[string]string - mode Mode - volumeAttachLimit int64 - kubernetesClusterID string - awsSdkDebugLog bool - batching bool - warnOnInvalidTag bool - userAgentExtra string - otelTracing bool + endpoint string + extraTags map[string]string + mode Mode + volumeAttachLimit int64 + disableVolumeAttachFromMetadata bool + kubernetesClusterID string + awsSdkDebugLog bool + batching bool + warnOnInvalidTag bool + userAgentExtra string + otelTracing bool } func NewDriver(options ...func(*DriverOptions)) (*Driver, error) { @@ -218,6 +219,14 @@ func WithVolumeAttachLimit(volumeAttachLimit int64) func(*DriverOptions) { } } +func WithVolumeAttachLimitFromMetadata(disabled bool) func(*DriverOptions) { + return func(o *DriverOptions) { + if disabled { + o.disableVolumeAttachFromMetadata = disabled + } + } +} + func WithBatching(enableBatching bool) func(*DriverOptions) { return func(o *DriverOptions) { o.batching = enableBatching diff --git a/pkg/driver/node.go b/pkg/driver/node.go index e64c8ef59f..d930787706 100644 --- a/pkg/driver/node.go +++ b/pkg/driver/node.go @@ -783,7 +783,12 @@ func (d *nodeService) getVolumesLimit() int64 { isNitro := cloud.IsNitroInstanceType(instanceType) availableAttachments := cloud.GetMaxAttachments(isNitro) - blockVolumes := d.metadata.GetNumBlockDeviceMappings() + + var blockVolumesFromMetadata int + if !d.driverOptions.disableVolumeAttachFromMetadata { + blockVolumesFromMetadata = d.metadata.GetNumBlockDeviceMappings() + } + dedicatedLimit := cloud.GetDedicatedLimitForInstanceType(instanceType) maxEBSAttachments, ok := cloud.GetEBSLimitForInstanceType(instanceType) if ok { @@ -798,7 +803,7 @@ func (d *nodeService) getVolumesLimit() int64 { nvmeInstanceStoreVolumes := cloud.GetNVMeInstanceStoreVolumesForInstanceType(instanceType) availableAttachments = availableAttachments - enis - nvmeInstanceStoreVolumes } - availableAttachments = availableAttachments - blockVolumes - 1 // -1 for root device + availableAttachments = availableAttachments - blockVolumesFromMetadata - 1 // -1 for root device if availableAttachments <= 0 { availableAttachments = 1 } diff --git a/pkg/driver/node_test.go b/pkg/driver/node_test.go index fe707a54d2..2edbd50e97 100644 --- a/pkg/driver/node_test.go +++ b/pkg/driver/node_test.go @@ -2020,16 +2020,17 @@ func TestNodeGetInfo(t *testing.T) { validOutpostArn, _ := arn.Parse(strings.ReplaceAll("arn:aws:outposts:us-west-2:111111111111:outpost/op-0aaa000a0aaaa00a0", "outpost/", "")) emptyOutpostArn := arn.ARN{} testCases := []struct { - name string - instanceID string - instanceType string - availabilityZone string - region string - attachedENIs int - blockDevices int - volumeAttachLimit int64 - expMaxVolumes int64 - outpostArn arn.ARN + name string + instanceID string + instanceType string + availabilityZone string + region string + attachedENIs int + blockDevices int + volumeAttachLimit int64 + disableVolumeAttachLimitFromMetadata bool + expMaxVolumes int64 + outpostArn arn.ARN }{ { name: "non-nitro instance success normal", @@ -2162,6 +2163,19 @@ func TestNodeGetInfo(t *testing.T) { expMaxVolumes: 1, outpostArn: emptyOutpostArn, }, + { + name: "nitro instance already attached max EBS volumes ignoring mapped volumes in metadata", + instanceID: "i-123456789abcdef01", + instanceType: "t3.xlarge", + availabilityZone: "us-west-2b", + region: "us-west-2", + volumeAttachLimit: -1, + disableVolumeAttachLimitFromMetadata: true, + attachedENIs: 1, + blockDevices: 27, + expMaxVolumes: 26, + outpostArn: emptyOutpostArn, + }, { name: "non-nitro instance already attached max EBS volumes", instanceID: "i-123456789abcdef01", @@ -2174,6 +2188,19 @@ func TestNodeGetInfo(t *testing.T) { expMaxVolumes: 1, outpostArn: emptyOutpostArn, }, + { + name: "non-nitro instance ignoring mapped volumes in metadata", + instanceID: "i-123456789abcdef01", + instanceType: "m5.xlarge", + availabilityZone: "us-west-2b", + region: "us-west-2", + volumeAttachLimit: -1, + disableVolumeAttachLimitFromMetadata: true, + attachedENIs: 1, + blockDevices: 39, + expMaxVolumes: 26, + outpostArn: emptyOutpostArn, + }, { name: "nitro instance already attached max ENIs", instanceID: "i-123456789abcdef01", @@ -2186,6 +2213,19 @@ func TestNodeGetInfo(t *testing.T) { expMaxVolumes: 1, outpostArn: emptyOutpostArn, }, + { + name: "nitro instance already attached max ENIs ignoring mapped volumes in metadata", + instanceID: "i-123456789abcdef01", + instanceType: "t3.xlarge", + availabilityZone: "us-west-2b", + region: "us-west-2", + volumeAttachLimit: -1, + disableVolumeAttachLimitFromMetadata: true, // Has no effect on ENIs counting + attachedENIs: 27, + blockDevices: 1, + expMaxVolumes: 1, + outpostArn: emptyOutpostArn, + }, { name: "nitro instance with dedicated limit", instanceID: "i-123456789abcdef01", @@ -2226,7 +2266,8 @@ func TestNodeGetInfo(t *testing.T) { defer mockCtl.Finish() driverOptions := &DriverOptions{ - volumeAttachLimit: tc.volumeAttachLimit, + volumeAttachLimit: tc.volumeAttachLimit, + disableVolumeAttachFromMetadata: tc.disableVolumeAttachLimitFromMetadata, } mockMounter := NewMockMounter(mockCtl) @@ -2240,7 +2281,9 @@ func TestNodeGetInfo(t *testing.T) { if tc.volumeAttachLimit < 0 { mockMetadata.EXPECT().GetInstanceType().Return(tc.instanceType) - mockMetadata.EXPECT().GetNumBlockDeviceMappings().Return(tc.blockDevices) + if !tc.disableVolumeAttachLimitFromMetadata { + mockMetadata.EXPECT().GetNumBlockDeviceMappings().Return(tc.blockDevices) + } if cloud.IsNitroInstanceType(tc.instanceType) && cloud.GetDedicatedLimitForInstanceType(tc.instanceType) == 0 { mockMetadata.EXPECT().GetNumAttachedENIs().Return(tc.attachedENIs) }