Skip to content

Commit

Permalink
Address feedback
Browse files Browse the repository at this point in the history
Signed-off-by: torredil <[email protected]>
  • Loading branch information
torredil committed Apr 12, 2024
1 parent c249057 commit e6c731a
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 32 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ test/coverage:
rm cover.out filtered_cover.out

# TODO: Re-enable sanity tests
# sanity tests have been disabled with the removal of NewFakeDriver, which was previously created to instantiate a fake driver utilized for testing.
# to re-enable tests, implement sanity tests creating a new driver instance by injecting mocked dependencies.
#.PHONY: test-sanity
#test-sanity:
# go test -v -race ./tests/sanity/...
Expand Down
8 changes: 3 additions & 5 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import (
)

var (
osExit = os.Exit
featureGate = featuregate.NewFeatureGate()
)

Expand Down Expand Up @@ -104,7 +103,7 @@ func main() {
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
fmt.Println(versionInfo)
osExit(0)
os.Exit(0)
}

if *toStderr {
Expand All @@ -123,7 +122,7 @@ func main() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if shutdownErr := exporter.Shutdown(ctx); shutdownErr != nil {
klog.ErrorS(err, "could not shutdown otel exporter")
klog.ErrorS(exporterErr, "could not shutdown otel exporter")
}
}()
}
Expand All @@ -142,13 +141,12 @@ func main() {
}
metadata, metadataErr := metadata.NewMetadataService(cfg, region)
if metadataErr != nil {
klog.ErrorS(err, "Could not determine region from any metadata service. The region can be manually supplied via the AWS_REGION environment variable.")
klog.ErrorS(metadataErr, "Could not determine region from any metadata service. The region can be manually supplied via the AWS_REGION environment variable.")
panic(err)
}
region = metadata.GetRegion()
}

klog.InfoS("batching", "status", options.Batching)
cloud, err := cloud.NewCloud(region, options.AwsSdkDebugLog, options.UserAgentExtra, options.Batching)
if err != nil {
klog.ErrorS(err, "failed to create cloud service")
Expand Down
3 changes: 3 additions & 0 deletions docs/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ There are a couple of driver options that can be passed as arguments when starti
| Option argument | value sample | default | Description |
|-----------------------------|---------------------------------------------------|-----------------------------------------------------|---------------------|
| endpoint | tcp://127.0.0.1:10000/ | unix:///var/lib/csi/sockets/pluginproxy/csi.sock | The socket on which the driver will listen for CSI RPCs|
| http-endpoint | :8080 | | The TCP network address where the HTTP server for metrics will listen (example: `:8080`). The default is empty string, which means the server is disabled.|
| volume-attach-limit | 1,2,3 ... | -1 | Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes. If not specified, the value is approximated from the instance type|
| extra-tags | key1=value1,key2=value2 | | Tags attached to each dynamically provisioned resource|
| k8s-tag-cluster-id | aws-cluster-id-1 | | ID of the Kubernetes cluster used for tagging provisioned EBS volumes|
Expand All @@ -14,3 +15,5 @@ There are a couple of driver options that can be passed as arguments when starti
| enable-otel-tracing | true | false | If set to true, the driver will enable opentelemetry tracing. Might need [additional env variables](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration) to export the traces to the right collector|
| batching | true | true | If set to true, the driver will enable batching of API calls. This is especially helpful for improving performance in workloads that are sensitive to EC2 rate limits at the cost of a small increase to worst-case latency|
| modify-volume-request-handler-timeout | 10s | 2s | Timeout for the window in which volume modification calls must be received in order for them to coalesce into a single volume modification call to AWS. If changing this, be aware that the ebs-csi-controller's csi-resizer and volumemodifier containers both have timeouts on the calls they make, if this value exceeds those timeouts it will cause them to always fail and fall into a retry loop, so adjust those values accordingly.
| warn-on-invalid-tag | true | false | To warn on invalid tags, instead of returning an error|
|reserved-volume-attachments | 2 | -1 | Number of volume attachments reserved for system use. Not used when --volume-attach-limit is specified. When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot and may include not only system disks but also CSI volumes.|
97 changes: 70 additions & 27 deletions pkg/driver/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,37 +24,80 @@ import (
cliflag "k8s.io/component-base/cli/flag"
)

// Options contains options and configuration settings for the driver.
type Options struct {
Mode Mode
Endpoint string
HttpEndpoint string
EnableOtelTracing bool
ExtraTags map[string]string
ExtraVolumeTags map[string]string
KubernetesClusterID string
AwsSdkDebugLog bool
WarnOnInvalidTag bool
UserAgentExtra string
Batching bool
Mode Mode

// #### Server options ####

//Endpoint is the endpoint for the CSI driver server
Endpoint string
// HttpEndpoint is the TCP network address where the HTTP server for metrics will listen
HttpEndpoint string
// EnableOtelTracing is a flag to enable opentelemetry tracing for the driver
EnableOtelTracing bool

// #### Controller options ####

// ExtraTags is a map of tags that will be attached to each dynamically provisioned
// resource.
ExtraTags map[string]string
// ExtraVolumeTags is a map of tags that will be attached to each dynamically provisioned
// volume.
// DEPRECATED: Use ExtraTags instead.
ExtraVolumeTags map[string]string
// ID of the kubernetes cluster.
KubernetesClusterID string
// flag to enable sdk debug log
AwsSdkDebugLog bool
// flag to warn on invalid tag, instead of returning an error
WarnOnInvalidTag bool
// flag to set user agent
UserAgentExtra string
// flag to enable batching of API calls
Batching bool
// flag to set the timeout for volume modification requests to be coalesced into a single
// volume modification call to AWS.
ModifyVolumeRequestHandlerTimeout time.Duration
VolumeAttachLimit int64
ReservedVolumeAttachments int

// #### Node options #####

// VolumeAttachLimit specifies the value that shall be reported as "maximum number of attachable volumes"
// in CSINode objects. It is similar to https://kubernetes.io/docs/concepts/storage/storage-limits/#custom-limits
// which allowed administrators to specify custom volume limits by configuring the kube-scheduler. Also, each AWS
// machine type has different volume limits. By default, the EBS CSI driver parses the machine type name and then
// decides the volume limit. However, this is only a rough approximation and not good enough in most cases.
// Specifying the volume attach limit via command line is the alternative until a more sophisticated solution presents
// itself (dynamically discovering the maximum number of attachable volume per EC2 machine type, see also
// https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347).
VolumeAttachLimit int64
// ReservedVolumeAttachments specifies number of volume attachments reserved for system use.
// Typically 1 for the root disk, but may be larger when more system disks are attached to nodes.
// This option is not used when --volume-attach-limit is specified.
// When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot
// and may include not only system disks but also CSI volumes (and therefore it may be wrong).
ReservedVolumeAttachments int
}

func (o *Options) AddFlags(fs *flag.FlagSet) {
fs.StringVar(&o.Endpoint, "endpoint", DefaultCSIEndpoint, "Endpoint for the CSI driver server")
fs.StringVar(&o.HttpEndpoint, "http-endpoint", "", "The TCP network address where the HTTP server for metrics will listen (example: `:8080`). The default is empty string, which means the server is disabled.")
fs.BoolVar(&o.EnableOtelTracing, "enable-otel-tracing", false, "To enable opentelemetry tracing for the driver. The tracing is disabled by default. Configure the exporter endpoint with OTEL_EXPORTER_OTLP_ENDPOINT and other env variables, see https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration.")
fs.Var(cliflag.NewMapStringString(&o.ExtraTags), "extra-tags", "Extra tags to attach to each dynamically provisioned resource. It is a comma separated list of key value pairs like '<key1>=<value1>,<key2>=<value2>'")
fs.Var(cliflag.NewMapStringString(&o.ExtraVolumeTags), "extra-volume-tags", "DEPRECATED: Please use --extra-tags instead. Extra volume tags to attach to each dynamically provisioned volume. It is a comma separated list of key value pairs like '<key1>=<value1>,<key2>=<value2>'")
fs.StringVar(&o.KubernetesClusterID, "k8s-tag-cluster-id", "", "ID of the Kubernetes cluster used for tagging provisioned EBS volumes (optional).")
fs.BoolVar(&o.AwsSdkDebugLog, "aws-sdk-debug-log", false, "To enable the aws sdk debug log level (default to false).")
fs.BoolVar(&o.WarnOnInvalidTag, "warn-on-invalid-tag", false, "To warn on invalid tags, instead of returning an error")
fs.StringVar(&o.UserAgentExtra, "user-agent-extra", "", "Extra string appended to user agent.")
fs.BoolVar(&o.Batching, "batching", false, "To enable batching of API calls. This is especially helpful for improving performance in workloads that are sensitive to EC2 rate limits.")
fs.DurationVar(&o.ModifyVolumeRequestHandlerTimeout, "modify-volume-request-handler-timeout", DefaultModifyVolumeRequestHandlerTimeout, "Timeout for the window in which volume modification calls must be received in order for them to coalesce into a single volume modification call to AWS. This must be lower than the csi-resizer and volumemodifier timeouts")
fs.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes and overrides --reserved-volume-attachments. If not specified, the value is approximated from the instance type.")
fs.IntVar(&o.ReservedVolumeAttachments, "reserved-volume-attachments", -1, "Number of volume attachments reserved for system use. Not used when --volume-attach-limit is specified. The total amount of volume attachments for a node is computed as: <nr. of attachments for corresponding instance type> - <number of NICs, if relevant to the instance type> - <reserved-volume-attachments value>. When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot and may include not only system disks but also CSI volumes.")
func (o *Options) AddFlags(f *flag.FlagSet) {
// Server options
f.StringVar(&o.Endpoint, "endpoint", DefaultCSIEndpoint, "Endpoint for the CSI driver server")
f.StringVar(&o.HttpEndpoint, "http-endpoint", "", "The TCP network address where the HTTP server for metrics will listen (example: `:8080`). The default is empty string, which means the server is disabled.")
f.BoolVar(&o.EnableOtelTracing, "enable-otel-tracing", false, "To enable opentelemetry tracing for the driver. The tracing is disabled by default. Configure the exporter endpoint with OTEL_EXPORTER_OTLP_ENDPOINT and other env variables, see https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration.")

// Controller options
f.Var(cliflag.NewMapStringString(&o.ExtraTags), "extra-tags", "Extra tags to attach to each dynamically provisioned resource. It is a comma separated list of key value pairs like '<key1>=<value1>,<key2>=<value2>'")
f.Var(cliflag.NewMapStringString(&o.ExtraVolumeTags), "extra-volume-tags", "DEPRECATED: Please use --extra-tags instead. Extra volume tags to attach to each dynamically provisioned volume. It is a comma separated list of key value pairs like '<key1>=<value1>,<key2>=<value2>'")
f.StringVar(&o.KubernetesClusterID, "k8s-tag-cluster-id", "", "ID of the Kubernetes cluster used for tagging provisioned EBS volumes (optional).")
f.BoolVar(&o.AwsSdkDebugLog, "aws-sdk-debug-log", false, "To enable the aws sdk debug log level (default to false).")
f.BoolVar(&o.WarnOnInvalidTag, "warn-on-invalid-tag", false, "To warn on invalid tags, instead of returning an error")
f.StringVar(&o.UserAgentExtra, "user-agent-extra", "", "Extra string appended to user agent.")
f.BoolVar(&o.Batching, "batching", false, "To enable batching of API calls. This is especially helpful for improving performance in workloads that are sensitive to EC2 rate limits.")
f.DurationVar(&o.ModifyVolumeRequestHandlerTimeout, "modify-volume-request-handler-timeout", DefaultModifyVolumeRequestHandlerTimeout, "Timeout for the window in which volume modification calls must be received in order for them to coalesce into a single volume modification call to AWS. This must be lower than the csi-resizer and volumemodifier timeouts")

// Node options
f.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes and overrides --reserved-volume-attachments. If not specified, the value is approximated from the instance type.")
f.IntVar(&o.ReservedVolumeAttachments, "reserved-volume-attachments", -1, "Number of volume attachments reserved for system use. Not used when --volume-attach-limit is specified. The total amount of volume attachments for a node is computed as: <nr. of attachments for corresponding instance type> - <number of NICs, if relevant to the instance type> - <reserved-volume-attachments value>. When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot and may include not only system disks but also CSI volumes.")
}

func (o *Options) Validate() error {
Expand Down
143 changes: 143 additions & 0 deletions pkg/driver/options_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package driver

import (
"testing"
"time"

flag "github.com/spf13/pflag"
)

func TestAddFlags(t *testing.T) {
o := &Options{}

f := flag.NewFlagSet("test", flag.ExitOnError)
o.AddFlags(f)

if err := f.Set("endpoint", "custom-endpoint"); err != nil {
t.Errorf("error setting endpoint: %v", err)
}
if err := f.Set("http-endpoint", ":8080"); err != nil {
t.Errorf("error setting http-endpoint: %v", err)
}
if err := f.Set("enable-otel-tracing", "true"); err != nil {
t.Errorf("error setting enable-otel-tracing: %v", err)
}
if err := f.Set("extra-tags", "key1=value1,key2=value2"); err != nil {
t.Errorf("error setting extra-tags: %v", err)
}
if err := f.Set("k8s-tag-cluster-id", "cluster-123"); err != nil {
t.Errorf("error setting k8s-tag-cluster-id: %v", err)
}
if err := f.Set("aws-sdk-debug-log", "true"); err != nil {
t.Errorf("error setting aws-sdk-debug-log: %v", err)
}
if err := f.Set("warn-on-invalid-tag", "true"); err != nil {
t.Errorf("error setting warn-on-invalid-tag: %v", err)
}
if err := f.Set("user-agent-extra", "extra-info"); err != nil {
t.Errorf("error setting user-agent-extra: %v", err)
}
if err := f.Set("batching", "true"); err != nil {
t.Errorf("error setting batching: %v", err)
}
if err := f.Set("modify-volume-request-handler-timeout", "1m"); err != nil {
t.Errorf("error setting modify-volume-request-handler-timeout: %v", err)
}
if err := f.Set("volume-attach-limit", "10"); err != nil {
t.Errorf("error setting volume-attach-limit: %v", err)
}
if err := f.Set("reserved-volume-attachments", "5"); err != nil {
t.Errorf("error setting reserved-volume-attachments: %v", err)
}

if o.Endpoint != "custom-endpoint" {
t.Errorf("unexpected Endpoint: got %s, want custom-endpoint", o.Endpoint)
}
if o.HttpEndpoint != ":8080" {
t.Errorf("unexpected HttpEndpoint: got %s, want :8080", o.HttpEndpoint)
}
if !o.EnableOtelTracing {
t.Error("unexpected EnableOtelTracing: got false, want true")
}
if len(o.ExtraTags) != 2 || o.ExtraTags["key1"] != "value1" || o.ExtraTags["key2"] != "value2" {
t.Errorf("unexpected ExtraTags: got %v, want map[key1:value1 key2:value2]", o.ExtraTags)
}
if o.KubernetesClusterID != "cluster-123" {
t.Errorf("unexpected KubernetesClusterID: got %s, want cluster-123", o.KubernetesClusterID)
}
if !o.AwsSdkDebugLog {
t.Error("unexpected AwsSdkDebugLog: got false, want true")
}
if !o.WarnOnInvalidTag {
t.Error("unexpected WarnOnInvalidTag: got false, want true")
}
if o.UserAgentExtra != "extra-info" {
t.Errorf("unexpected UserAgentExtra: got %s, want extra-info", o.UserAgentExtra)
}
if !o.Batching {
t.Error("unexpected Batching: got false, want true")
}
if o.ModifyVolumeRequestHandlerTimeout != time.Minute {
t.Errorf("unexpected ModifyVolumeRequestHandlerTimeout: got %v, want 1m", o.ModifyVolumeRequestHandlerTimeout)
}
if o.VolumeAttachLimit != 10 {
t.Errorf("unexpected VolumeAttachLimit: got %d, want 10", o.VolumeAttachLimit)
}
if o.ReservedVolumeAttachments != 5 {
t.Errorf("unexpected ReservedVolumeAttachments: got %d, want 5", o.ReservedVolumeAttachments)
}
}

func TestValidate(t *testing.T) {
tests := []struct {
name string
volumeAttachLimit int64
reservedAttachments int
expectedErr bool
errMsg string
}{
{
name: "both options not set",
volumeAttachLimit: -1,
reservedAttachments: -1,
expectedErr: false,
},
{
name: "volumeAttachLimit set",
volumeAttachLimit: 10,
reservedAttachments: -1,
expectedErr: false,
},
{
name: "reservedVolumeAttachments set",
volumeAttachLimit: -1,
reservedAttachments: 2,
expectedErr: false,
},
{
name: "both options set",
volumeAttachLimit: 10,
reservedAttachments: 2,
expectedErr: true,
errMsg: "only one of --volume-attach-limit and --reserved-volume-attachments may be specified",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
o := &Options{
VolumeAttachLimit: tt.volumeAttachLimit,
ReservedVolumeAttachments: tt.reservedAttachments,
}

err := o.Validate()
if (err != nil) != tt.expectedErr {
t.Errorf("Options.Validate() error = %v, wantErr %v", err, tt.expectedErr)
}

if err != nil && err.Error() != tt.errMsg {
t.Errorf("Options.Validate() error message = %v, wantErrMsg %v", err.Error(), tt.errMsg)
}
})
}
}

0 comments on commit e6c731a

Please sign in to comment.