diff --git a/Makefile b/Makefile index 418d1a4532..2d45031232 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,8 @@ test/coverage: rm cover.out filtered_cover.out # TODO: Re-enable sanity tests +# sanity tests have been disabled with the removal of NewFakeDriver, which was previously created to instantiate a fake driver utilized for testing. +# to re-enable tests, implement sanity tests creating a new driver instance by injecting mocked dependencies. #.PHONY: test-sanity #test-sanity: # go test -v -race ./tests/sanity/... diff --git a/cmd/main.go b/cmd/main.go index fa68fb000f..2d1df0d33e 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -36,7 +36,6 @@ import ( ) var ( - osExit = os.Exit featureGate = featuregate.NewFeatureGate() ) @@ -104,7 +103,7 @@ func main() { klog.FlushAndExit(klog.ExitFlushTimeout, 1) } fmt.Println(versionInfo) - osExit(0) + os.Exit(0) } if *toStderr { @@ -123,7 +122,7 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if shutdownErr := exporter.Shutdown(ctx); shutdownErr != nil { - klog.ErrorS(err, "could not shutdown otel exporter") + klog.ErrorS(exporterErr, "could not shutdown otel exporter") } }() } @@ -142,13 +141,12 @@ func main() { } metadata, metadataErr := metadata.NewMetadataService(cfg, region) if metadataErr != nil { - klog.ErrorS(err, "Could not determine region from any metadata service. The region can be manually supplied via the AWS_REGION environment variable.") + klog.ErrorS(metadataErr, "Could not determine region from any metadata service. The region can be manually supplied via the AWS_REGION environment variable.") panic(err) } region = metadata.GetRegion() } - klog.InfoS("batching", "status", options.Batching) cloud, err := cloud.NewCloud(region, options.AwsSdkDebugLog, options.UserAgentExtra, options.Batching) if err != nil { klog.ErrorS(err, "failed to create cloud service") diff --git a/docs/options.md b/docs/options.md index fd1ef08063..4a7d5f2060 100644 --- a/docs/options.md +++ b/docs/options.md @@ -5,6 +5,7 @@ There are a couple of driver options that can be passed as arguments when starti | Option argument | value sample | default | Description | |-----------------------------|---------------------------------------------------|-----------------------------------------------------|---------------------| | endpoint | tcp://127.0.0.1:10000/ | unix:///var/lib/csi/sockets/pluginproxy/csi.sock | The socket on which the driver will listen for CSI RPCs| +| http-endpoint | :8080 | | The TCP network address where the HTTP server for metrics will listen (example: `:8080`). The default is empty string, which means the server is disabled.| | volume-attach-limit | 1,2,3 ... | -1 | Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes. If not specified, the value is approximated from the instance type| | extra-tags | key1=value1,key2=value2 | | Tags attached to each dynamically provisioned resource| | k8s-tag-cluster-id | aws-cluster-id-1 | | ID of the Kubernetes cluster used for tagging provisioned EBS volumes| @@ -14,3 +15,5 @@ There are a couple of driver options that can be passed as arguments when starti | enable-otel-tracing | true | false | If set to true, the driver will enable opentelemetry tracing. Might need [additional env variables](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration) to export the traces to the right collector| | batching | true | true | If set to true, the driver will enable batching of API calls. This is especially helpful for improving performance in workloads that are sensitive to EC2 rate limits at the cost of a small increase to worst-case latency| | modify-volume-request-handler-timeout | 10s | 2s | Timeout for the window in which volume modification calls must be received in order for them to coalesce into a single volume modification call to AWS. If changing this, be aware that the ebs-csi-controller's csi-resizer and volumemodifier containers both have timeouts on the calls they make, if this value exceeds those timeouts it will cause them to always fail and fall into a retry loop, so adjust those values accordingly. +| warn-on-invalid-tag | true | false | To warn on invalid tags, instead of returning an error| +|reserved-volume-attachments | 2 | -1 | Number of volume attachments reserved for system use. Not used when --volume-attach-limit is specified. When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot and may include not only system disks but also CSI volumes.| diff --git a/pkg/driver/options.go b/pkg/driver/options.go index 45d185747b..1c9ab8fd36 100644 --- a/pkg/driver/options.go +++ b/pkg/driver/options.go @@ -24,37 +24,80 @@ import ( cliflag "k8s.io/component-base/cli/flag" ) +// Options contains options and configuration settings for the driver. type Options struct { - Mode Mode - Endpoint string - HttpEndpoint string - EnableOtelTracing bool - ExtraTags map[string]string - ExtraVolumeTags map[string]string - KubernetesClusterID string - AwsSdkDebugLog bool - WarnOnInvalidTag bool - UserAgentExtra string - Batching bool + Mode Mode + + // #### Server options #### + + //Endpoint is the endpoint for the CSI driver server + Endpoint string + // HttpEndpoint is the TCP network address where the HTTP server for metrics will listen + HttpEndpoint string + // EnableOtelTracing is a flag to enable opentelemetry tracing for the driver + EnableOtelTracing bool + + // #### Controller options #### + + // ExtraTags is a map of tags that will be attached to each dynamically provisioned + // resource. + ExtraTags map[string]string + // ExtraVolumeTags is a map of tags that will be attached to each dynamically provisioned + // volume. + // DEPRECATED: Use ExtraTags instead. + ExtraVolumeTags map[string]string + // ID of the kubernetes cluster. + KubernetesClusterID string + // flag to enable sdk debug log + AwsSdkDebugLog bool + // flag to warn on invalid tag, instead of returning an error + WarnOnInvalidTag bool + // flag to set user agent + UserAgentExtra string + // flag to enable batching of API calls + Batching bool + // flag to set the timeout for volume modification requests to be coalesced into a single + // volume modification call to AWS. ModifyVolumeRequestHandlerTimeout time.Duration - VolumeAttachLimit int64 - ReservedVolumeAttachments int + + // #### Node options ##### + + // VolumeAttachLimit specifies the value that shall be reported as "maximum number of attachable volumes" + // in CSINode objects. It is similar to https://kubernetes.io/docs/concepts/storage/storage-limits/#custom-limits + // which allowed administrators to specify custom volume limits by configuring the kube-scheduler. Also, each AWS + // machine type has different volume limits. By default, the EBS CSI driver parses the machine type name and then + // decides the volume limit. However, this is only a rough approximation and not good enough in most cases. + // Specifying the volume attach limit via command line is the alternative until a more sophisticated solution presents + // itself (dynamically discovering the maximum number of attachable volume per EC2 machine type, see also + // https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347). + VolumeAttachLimit int64 + // ReservedVolumeAttachments specifies number of volume attachments reserved for system use. + // Typically 1 for the root disk, but may be larger when more system disks are attached to nodes. + // This option is not used when --volume-attach-limit is specified. + // When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot + // and may include not only system disks but also CSI volumes (and therefore it may be wrong). + ReservedVolumeAttachments int } -func (o *Options) AddFlags(fs *flag.FlagSet) { - fs.StringVar(&o.Endpoint, "endpoint", DefaultCSIEndpoint, "Endpoint for the CSI driver server") - fs.StringVar(&o.HttpEndpoint, "http-endpoint", "", "The TCP network address where the HTTP server for metrics will listen (example: `:8080`). The default is empty string, which means the server is disabled.") - fs.BoolVar(&o.EnableOtelTracing, "enable-otel-tracing", false, "To enable opentelemetry tracing for the driver. The tracing is disabled by default. Configure the exporter endpoint with OTEL_EXPORTER_OTLP_ENDPOINT and other env variables, see https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration.") - fs.Var(cliflag.NewMapStringString(&o.ExtraTags), "extra-tags", "Extra tags to attach to each dynamically provisioned resource. It is a comma separated list of key value pairs like '=,='") - fs.Var(cliflag.NewMapStringString(&o.ExtraVolumeTags), "extra-volume-tags", "DEPRECATED: Please use --extra-tags instead. Extra volume tags to attach to each dynamically provisioned volume. It is a comma separated list of key value pairs like '=,='") - fs.StringVar(&o.KubernetesClusterID, "k8s-tag-cluster-id", "", "ID of the Kubernetes cluster used for tagging provisioned EBS volumes (optional).") - fs.BoolVar(&o.AwsSdkDebugLog, "aws-sdk-debug-log", false, "To enable the aws sdk debug log level (default to false).") - fs.BoolVar(&o.WarnOnInvalidTag, "warn-on-invalid-tag", false, "To warn on invalid tags, instead of returning an error") - fs.StringVar(&o.UserAgentExtra, "user-agent-extra", "", "Extra string appended to user agent.") - fs.BoolVar(&o.Batching, "batching", false, "To enable batching of API calls. This is especially helpful for improving performance in workloads that are sensitive to EC2 rate limits.") - fs.DurationVar(&o.ModifyVolumeRequestHandlerTimeout, "modify-volume-request-handler-timeout", DefaultModifyVolumeRequestHandlerTimeout, "Timeout for the window in which volume modification calls must be received in order for them to coalesce into a single volume modification call to AWS. This must be lower than the csi-resizer and volumemodifier timeouts") - fs.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes and overrides --reserved-volume-attachments. If not specified, the value is approximated from the instance type.") - fs.IntVar(&o.ReservedVolumeAttachments, "reserved-volume-attachments", -1, "Number of volume attachments reserved for system use. Not used when --volume-attach-limit is specified. The total amount of volume attachments for a node is computed as: - - . When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot and may include not only system disks but also CSI volumes.") +func (o *Options) AddFlags(f *flag.FlagSet) { + // Server options + f.StringVar(&o.Endpoint, "endpoint", DefaultCSIEndpoint, "Endpoint for the CSI driver server") + f.StringVar(&o.HttpEndpoint, "http-endpoint", "", "The TCP network address where the HTTP server for metrics will listen (example: `:8080`). The default is empty string, which means the server is disabled.") + f.BoolVar(&o.EnableOtelTracing, "enable-otel-tracing", false, "To enable opentelemetry tracing for the driver. The tracing is disabled by default. Configure the exporter endpoint with OTEL_EXPORTER_OTLP_ENDPOINT and other env variables, see https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration.") + + // Controller options + f.Var(cliflag.NewMapStringString(&o.ExtraTags), "extra-tags", "Extra tags to attach to each dynamically provisioned resource. It is a comma separated list of key value pairs like '=,='") + f.Var(cliflag.NewMapStringString(&o.ExtraVolumeTags), "extra-volume-tags", "DEPRECATED: Please use --extra-tags instead. Extra volume tags to attach to each dynamically provisioned volume. It is a comma separated list of key value pairs like '=,='") + f.StringVar(&o.KubernetesClusterID, "k8s-tag-cluster-id", "", "ID of the Kubernetes cluster used for tagging provisioned EBS volumes (optional).") + f.BoolVar(&o.AwsSdkDebugLog, "aws-sdk-debug-log", false, "To enable the aws sdk debug log level (default to false).") + f.BoolVar(&o.WarnOnInvalidTag, "warn-on-invalid-tag", false, "To warn on invalid tags, instead of returning an error") + f.StringVar(&o.UserAgentExtra, "user-agent-extra", "", "Extra string appended to user agent.") + f.BoolVar(&o.Batching, "batching", false, "To enable batching of API calls. This is especially helpful for improving performance in workloads that are sensitive to EC2 rate limits.") + f.DurationVar(&o.ModifyVolumeRequestHandlerTimeout, "modify-volume-request-handler-timeout", DefaultModifyVolumeRequestHandlerTimeout, "Timeout for the window in which volume modification calls must be received in order for them to coalesce into a single volume modification call to AWS. This must be lower than the csi-resizer and volumemodifier timeouts") + + // Node options + f.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes and overrides --reserved-volume-attachments. If not specified, the value is approximated from the instance type.") + f.IntVar(&o.ReservedVolumeAttachments, "reserved-volume-attachments", -1, "Number of volume attachments reserved for system use. Not used when --volume-attach-limit is specified. The total amount of volume attachments for a node is computed as: - - . When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot and may include not only system disks but also CSI volumes.") } func (o *Options) Validate() error { diff --git a/pkg/driver/options_test.go b/pkg/driver/options_test.go new file mode 100644 index 0000000000..1b33c6d482 --- /dev/null +++ b/pkg/driver/options_test.go @@ -0,0 +1,143 @@ +package driver + +import ( + "testing" + "time" + + flag "github.com/spf13/pflag" +) + +func TestAddFlags(t *testing.T) { + o := &Options{} + + f := flag.NewFlagSet("test", flag.ExitOnError) + o.AddFlags(f) + + if err := f.Set("endpoint", "custom-endpoint"); err != nil { + t.Errorf("error setting endpoint: %v", err) + } + if err := f.Set("http-endpoint", ":8080"); err != nil { + t.Errorf("error setting http-endpoint: %v", err) + } + if err := f.Set("enable-otel-tracing", "true"); err != nil { + t.Errorf("error setting enable-otel-tracing: %v", err) + } + if err := f.Set("extra-tags", "key1=value1,key2=value2"); err != nil { + t.Errorf("error setting extra-tags: %v", err) + } + if err := f.Set("k8s-tag-cluster-id", "cluster-123"); err != nil { + t.Errorf("error setting k8s-tag-cluster-id: %v", err) + } + if err := f.Set("aws-sdk-debug-log", "true"); err != nil { + t.Errorf("error setting aws-sdk-debug-log: %v", err) + } + if err := f.Set("warn-on-invalid-tag", "true"); err != nil { + t.Errorf("error setting warn-on-invalid-tag: %v", err) + } + if err := f.Set("user-agent-extra", "extra-info"); err != nil { + t.Errorf("error setting user-agent-extra: %v", err) + } + if err := f.Set("batching", "true"); err != nil { + t.Errorf("error setting batching: %v", err) + } + if err := f.Set("modify-volume-request-handler-timeout", "1m"); err != nil { + t.Errorf("error setting modify-volume-request-handler-timeout: %v", err) + } + if err := f.Set("volume-attach-limit", "10"); err != nil { + t.Errorf("error setting volume-attach-limit: %v", err) + } + if err := f.Set("reserved-volume-attachments", "5"); err != nil { + t.Errorf("error setting reserved-volume-attachments: %v", err) + } + + if o.Endpoint != "custom-endpoint" { + t.Errorf("unexpected Endpoint: got %s, want custom-endpoint", o.Endpoint) + } + if o.HttpEndpoint != ":8080" { + t.Errorf("unexpected HttpEndpoint: got %s, want :8080", o.HttpEndpoint) + } + if !o.EnableOtelTracing { + t.Error("unexpected EnableOtelTracing: got false, want true") + } + if len(o.ExtraTags) != 2 || o.ExtraTags["key1"] != "value1" || o.ExtraTags["key2"] != "value2" { + t.Errorf("unexpected ExtraTags: got %v, want map[key1:value1 key2:value2]", o.ExtraTags) + } + if o.KubernetesClusterID != "cluster-123" { + t.Errorf("unexpected KubernetesClusterID: got %s, want cluster-123", o.KubernetesClusterID) + } + if !o.AwsSdkDebugLog { + t.Error("unexpected AwsSdkDebugLog: got false, want true") + } + if !o.WarnOnInvalidTag { + t.Error("unexpected WarnOnInvalidTag: got false, want true") + } + if o.UserAgentExtra != "extra-info" { + t.Errorf("unexpected UserAgentExtra: got %s, want extra-info", o.UserAgentExtra) + } + if !o.Batching { + t.Error("unexpected Batching: got false, want true") + } + if o.ModifyVolumeRequestHandlerTimeout != time.Minute { + t.Errorf("unexpected ModifyVolumeRequestHandlerTimeout: got %v, want 1m", o.ModifyVolumeRequestHandlerTimeout) + } + if o.VolumeAttachLimit != 10 { + t.Errorf("unexpected VolumeAttachLimit: got %d, want 10", o.VolumeAttachLimit) + } + if o.ReservedVolumeAttachments != 5 { + t.Errorf("unexpected ReservedVolumeAttachments: got %d, want 5", o.ReservedVolumeAttachments) + } +} + +func TestValidate(t *testing.T) { + tests := []struct { + name string + volumeAttachLimit int64 + reservedAttachments int + expectedErr bool + errMsg string + }{ + { + name: "both options not set", + volumeAttachLimit: -1, + reservedAttachments: -1, + expectedErr: false, + }, + { + name: "volumeAttachLimit set", + volumeAttachLimit: 10, + reservedAttachments: -1, + expectedErr: false, + }, + { + name: "reservedVolumeAttachments set", + volumeAttachLimit: -1, + reservedAttachments: 2, + expectedErr: false, + }, + { + name: "both options set", + volumeAttachLimit: 10, + reservedAttachments: 2, + expectedErr: true, + errMsg: "only one of --volume-attach-limit and --reserved-volume-attachments may be specified", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + o := &Options{ + VolumeAttachLimit: tt.volumeAttachLimit, + ReservedVolumeAttachments: tt.reservedAttachments, + } + + err := o.Validate() + if (err != nil) != tt.expectedErr { + t.Errorf("Options.Validate() error = %v, wantErr %v", err, tt.expectedErr) + } + + if err != nil && err.Error() != tt.errMsg { + t.Errorf("Options.Validate() error message = %v, wantErrMsg %v", err.Error(), tt.errMsg) + } + }) + } +}