Skip to content

Commit

Permalink
Add option to disable discovery of attached volumes from instance met…
Browse files Browse the repository at this point in the history
…adata

Add --disable-volume-attach-limit-from-metadata to disable counting of
attached volumes from instance metadata.

Instance metadata contains block volumes that were attached at the time the
instance was started and then it's never updated. In case a node was
rebooted with CSI volumes attached, the metadata includes these CSI volumes
and the CSI driver "reserves" an attachment slot for them forever, even
when they are detached later.

The option disables counting of attached volumes when computing the attach
limit, because it's not able to distinguish a CSI volume (should not have a
reserved attachment, because scheduler will count it on its own) and a root
disk / volume attached by the system admin (should have a reserved
attachment).
  • Loading branch information
jsafrane committed Feb 1, 2024
1 parent bb2e6f3 commit 6a38956
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 24 deletions.
1 change: 1 addition & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func main() {
driver.WithExtraVolumeTags(options.ControllerOptions.ExtraVolumeTags),
driver.WithMode(options.DriverMode),
driver.WithVolumeAttachLimit(options.NodeOptions.VolumeAttachLimit),
driver.WithVolumeAttachLimitFromMetadata(options.NodeOptions.DisableVolumeAttachLimitFromMetadata),
driver.WithKubernetesClusterID(options.ControllerOptions.KubernetesClusterID),
driver.WithAwsSdkDebugLog(options.ControllerOptions.AwsSdkDebugLog),
driver.WithWarnOnInvalidTag(options.ControllerOptions.WarnOnInvalidTag),
Expand Down
14 changes: 14 additions & 0 deletions cmd/options/node_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,22 @@ type NodeOptions struct {
// itself (dynamically discovering the maximum number of attachable volume per EC2 machine type, see also
// https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347).
VolumeAttachLimit int64

// DisableVolumeAttachLimitFromMetadata disables the volume attach limit calculation from instances metadata.
// When false (default), the CSI driver calculates the volume attach limit as:
// the number supported by the instance type - nr. of network cards - nr. of volumes attached to the instance
// at the boot time.
// The network cards are counted only on relevant instance types (e.g. Nitro instances).
// Nr. of volumes attached to the instance at the boot time may include CSI volumes, which is not correct
// and breaks attach limit calculation when a node is rebooted without draining.
// See https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/1808 for details.
// When true, the CSI driver calculates the volume attach limit as:
// the number supported by the instance type - nr. of network cards - 1 for the root volume.
// This behavior is useful when a node can be rebooted without draining with CSI volumes attached.
DisableVolumeAttachLimitFromMetadata bool
}

func (o *NodeOptions) AddFlags(fs *flag.FlagSet) {
fs.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes. If not specified, the value is approximated from the instance type.")
fs.BoolVar(&o.DisableVolumeAttachLimitFromMetadata, "disable-volume-attach-limit-from-metadata", false, "Disable volume attach limit calculation from instance metadata.")
}
10 changes: 10 additions & 0 deletions cmd/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ func TestGetOptions(t *testing.T) {
awsSdkDebugFlagValue := true
VolumeAttachLimitFlagName := "volume-attach-limit"
var VolumeAttachLimit int64 = 42
disableVolumeAttachLimitFromMetadataFlagName := "disable-volume-attach-limit-from-metadata"
disableVolumeAttachLimitFromMetadata := false

userAgentExtraFlag := "user-agent-extra"
userAgentExtraFlagValue := "test"
otelTracingFlagName := "enable-otel-tracing"
Expand Down Expand Up @@ -130,6 +133,13 @@ func TestGetOptions(t *testing.T) {
if options.NodeOptions.VolumeAttachLimit != VolumeAttachLimit {
t.Fatalf("expected VolumeAttachLimit to be %d but it is %d", VolumeAttachLimit, options.NodeOptions.VolumeAttachLimit)
}
disableVolumeAttachLimitFromMetadataFlag :=flagSet.Lookup(disableVolumeAttachLimitFromMetadataFlagName)
if disableVolumeAttachLimitFromMetadataFlag == nil {
t.Fatalf("expected %q flag to be added but it is not", disableVolumeAttachLimitFromMetadataFlagName)
}
if options.NodeOptions.DisableVolumeAttachLimitFromMetadata != disableVolumeAttachLimitFromMetadata {
t.Fatalf("expected disableVolumeAttachLimitFromMetadata to be %t but it is %t", disableVolumeAttachLimitFromMetadata, options.NodeOptions.DisableVolumeAttachLimitFromMetadata)
}
}

return options
Expand Down
29 changes: 19 additions & 10 deletions pkg/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,17 @@ type Driver struct {
}

type DriverOptions struct {
endpoint string
extraTags map[string]string
mode Mode
volumeAttachLimit int64
kubernetesClusterID string
awsSdkDebugLog bool
batching bool
warnOnInvalidTag bool
userAgentExtra string
otelTracing bool
endpoint string
extraTags map[string]string
mode Mode
volumeAttachLimit int64
disableVolumeAttachFromMetadata bool
kubernetesClusterID string
awsSdkDebugLog bool
batching bool
warnOnInvalidTag bool
userAgentExtra string
otelTracing bool
}

func NewDriver(options ...func(*DriverOptions)) (*Driver, error) {
Expand Down Expand Up @@ -218,6 +219,14 @@ func WithVolumeAttachLimit(volumeAttachLimit int64) func(*DriverOptions) {
}
}

func WithVolumeAttachLimitFromMetadata(disabled bool) func(*DriverOptions) {
return func(o *DriverOptions) {
if disabled {
o.disableVolumeAttachFromMetadata = disabled
}
}
}

func WithBatching(enableBatching bool) func(*DriverOptions) {
return func(o *DriverOptions) {
o.batching = enableBatching
Expand Down
9 changes: 7 additions & 2 deletions pkg/driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,12 @@ func (d *nodeService) getVolumesLimit() int64 {

isNitro := cloud.IsNitroInstanceType(instanceType)
availableAttachments := cloud.GetMaxAttachments(isNitro)
blockVolumes := d.metadata.GetNumBlockDeviceMappings()

var blockVolumesFromMetadata int
if !d.driverOptions.disableVolumeAttachFromMetadata {
blockVolumesFromMetadata = d.metadata.GetNumBlockDeviceMappings()
}

dedicatedLimit := cloud.GetDedicatedLimitForInstanceType(instanceType)
maxEBSAttachments, ok := cloud.GetEBSLimitForInstanceType(instanceType)
if ok {
Expand All @@ -798,7 +803,7 @@ func (d *nodeService) getVolumesLimit() int64 {
nvmeInstanceStoreVolumes := cloud.GetNVMeInstanceStoreVolumesForInstanceType(instanceType)
availableAttachments = availableAttachments - enis - nvmeInstanceStoreVolumes
}
availableAttachments = availableAttachments - blockVolumes - 1 // -1 for root device
availableAttachments = availableAttachments - blockVolumesFromMetadata - 1 // -1 for root device
if availableAttachments <= 0 {
availableAttachments = 1
}
Expand Down
67 changes: 55 additions & 12 deletions pkg/driver/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2020,16 +2020,17 @@ func TestNodeGetInfo(t *testing.T) {
validOutpostArn, _ := arn.Parse(strings.ReplaceAll("arn:aws:outposts:us-west-2:111111111111:outpost/op-0aaa000a0aaaa00a0", "outpost/", ""))
emptyOutpostArn := arn.ARN{}
testCases := []struct {
name string
instanceID string
instanceType string
availabilityZone string
region string
attachedENIs int
blockDevices int
volumeAttachLimit int64
expMaxVolumes int64
outpostArn arn.ARN
name string
instanceID string
instanceType string
availabilityZone string
region string
attachedENIs int
blockDevices int
volumeAttachLimit int64
disableVolumeAttachLimitFromMetadata bool
expMaxVolumes int64
outpostArn arn.ARN
}{
{
name: "non-nitro instance success normal",
Expand Down Expand Up @@ -2162,6 +2163,19 @@ func TestNodeGetInfo(t *testing.T) {
expMaxVolumes: 1,
outpostArn: emptyOutpostArn,
},
{
name: "nitro instance already attached max EBS volumes ignoring mapped volumes in metadata",
instanceID: "i-123456789abcdef01",
instanceType: "t3.xlarge",
availabilityZone: "us-west-2b",
region: "us-west-2",
volumeAttachLimit: -1,
disableVolumeAttachLimitFromMetadata: true,
attachedENIs: 1,
blockDevices: 27,
expMaxVolumes: 26,
outpostArn: emptyOutpostArn,
},
{
name: "non-nitro instance already attached max EBS volumes",
instanceID: "i-123456789abcdef01",
Expand All @@ -2174,6 +2188,19 @@ func TestNodeGetInfo(t *testing.T) {
expMaxVolumes: 1,
outpostArn: emptyOutpostArn,
},
{
name: "non-nitro instance ignoring mapped volumes in metadata",
instanceID: "i-123456789abcdef01",
instanceType: "m5.xlarge",
availabilityZone: "us-west-2b",
region: "us-west-2",
volumeAttachLimit: -1,
disableVolumeAttachLimitFromMetadata: true,
attachedENIs: 1,
blockDevices: 39,
expMaxVolumes: 26,
outpostArn: emptyOutpostArn,
},
{
name: "nitro instance already attached max ENIs",
instanceID: "i-123456789abcdef01",
Expand All @@ -2186,6 +2213,19 @@ func TestNodeGetInfo(t *testing.T) {
expMaxVolumes: 1,
outpostArn: emptyOutpostArn,
},
{
name: "nitro instance already attached max ENIs ignoring mapped volumes in metadata",
instanceID: "i-123456789abcdef01",
instanceType: "t3.xlarge",
availabilityZone: "us-west-2b",
region: "us-west-2",
volumeAttachLimit: -1,
disableVolumeAttachLimitFromMetadata: true, // Has no effect on ENIs counting
attachedENIs: 27,
blockDevices: 1,
expMaxVolumes: 1,
outpostArn: emptyOutpostArn,
},
{
name: "nitro instance with dedicated limit",
instanceID: "i-123456789abcdef01",
Expand Down Expand Up @@ -2226,7 +2266,8 @@ func TestNodeGetInfo(t *testing.T) {
defer mockCtl.Finish()

driverOptions := &DriverOptions{
volumeAttachLimit: tc.volumeAttachLimit,
volumeAttachLimit: tc.volumeAttachLimit,
disableVolumeAttachFromMetadata: tc.disableVolumeAttachLimitFromMetadata,
}

mockMounter := NewMockMounter(mockCtl)
Expand All @@ -2240,7 +2281,9 @@ func TestNodeGetInfo(t *testing.T) {

if tc.volumeAttachLimit < 0 {
mockMetadata.EXPECT().GetInstanceType().Return(tc.instanceType)
mockMetadata.EXPECT().GetNumBlockDeviceMappings().Return(tc.blockDevices)
if !tc.disableVolumeAttachLimitFromMetadata {
mockMetadata.EXPECT().GetNumBlockDeviceMappings().Return(tc.blockDevices)
}
if cloud.IsNitroInstanceType(tc.instanceType) && cloud.GetDedicatedLimitForInstanceType(tc.instanceType) == 0 {
mockMetadata.EXPECT().GetNumAttachedENIs().Return(tc.attachedENIs)
}
Expand Down

0 comments on commit 6a38956

Please sign in to comment.