From 25c9b9bd27bff94f5983bd780b22395c09dcefd7 Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Wed, 28 Feb 2024 15:11:00 -0600 Subject: [PATCH] Merge master into release-1.16 branch for v1.16.4 release (#2816) --- .github/PULL_REQUEST_TEMPLATE.md | 20 +-- .github/workflows/deps.yml | 4 +- .github/workflows/weekly-cron-tests.yaml | 7 +- Makefile | 2 +- README.md | 15 ++- charts/aws-vpc-cni/README.md | 10 +- cmd/aws-vpc-cni/main.go | 40 +++++- cmd/aws-vpc-cni/main_test.go | 37 ++++++ cmd/cni-metrics-helper/README.md | 45 ++++--- cmd/routed-eni-cni-plugin/cni.go | 3 +- go.mod | 4 +- go.sum | 4 +- pkg/ipamd/datastore/data_store.go | 4 +- pkg/ipamd/ipamd.go | 21 +-- pkg/networkutils/network.go | 54 ++++---- pkg/networkutils/network_test.go | 22 ++-- scripts/generate-cni-yaml.sh | 2 +- scripts/lib/cluster.sh | 3 +- scripts/run-integration-tests.sh | 5 +- scripts/run-static-canary.sh | 2 +- scripts/test/config/test-cluster.yaml | 7 +- test/agent/Dockerfile | 2 +- test/integration/cni/host_networking_test.go | 122 +++++++++++------- .../ipv6/ipv6_host_networking_test.go | 120 ++++++++++------- 24 files changed, 349 insertions(+), 206 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e1662b6789..134d4960eb 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -12,23 +12,27 @@ Add one of the following: bug cleanup +dependency update documentation feature +improvement +release workflow +testing --> -**Which issue does this PR fix**: +**Which issue does this PR fix?**: + -**What does this PR do / Why do we need it**: - - -**If an issue # is not available please add repro steps and logs from IPAMD/CNI showing the issue**: - +**What does this PR do / Why do we need it?**: **Testing done on this change**: + + **Will this PR introduce any new dependencies?**: diff --git a/.github/workflows/deps.yml b/.github/workflows/deps.yml index 8612ad4068..c17655ce72 100644 --- a/.github/workflows/deps.yml +++ b/.github/workflows/deps.yml @@ -26,14 +26,14 @@ jobs: - id: govulncheck uses: ./.github/actions/govulncheck with: - go-version-input: 1.21.6 + go-version-input: 1.21.7 go-version-file: go.mod cache: false repo-checkout: false - id: govulncheck-tests-agent uses: ./.github/actions/govulncheck with: - go-version-input: 1.21.6 + go-version-input: 1.21.7 go-version-file: test/agent/go.mod cache: false repo-checkout: false diff --git a/.github/workflows/weekly-cron-tests.yaml b/.github/workflows/weekly-cron-tests.yaml index 458d7e08c2..57c3b93b9e 100644 --- a/.github/workflows/weekly-cron-tests.yaml +++ b/.github/workflows/weekly-cron-tests.yaml @@ -44,7 +44,6 @@ jobs: RUN_CNI_INTEGRATION_TESTS: false PERFORMANCE_TEST_S3_BUCKET_NAME: cni-performance-tests RUN_PERFORMANCE_TESTS: true - RUN_TESTER_LB_ADDONS: true run: | ./scripts/run-integration-tests.sh - name: Run kops tests @@ -54,9 +53,8 @@ jobs: ROLE_ARN: ${{ secrets.EKS_CLUSTER_ROLE_ARN }} RUN_CNI_INTEGRATION_TESTS: false RUN_KOPS_TEST: true - RUN_TESTER_LB_ADDONS: true - K8S_VERSION: 1.29.0-alpha.2 - KOPS_VERSION: v1.29.0-alpha.2 + K8S_VERSION: 1.29.0-alpha.3 + KOPS_VERSION: v1.29.0-alpha.3 run: | ./scripts/run-integration-tests.sh if: always() @@ -67,7 +65,6 @@ jobs: ROLE_ARN: ${{ secrets.EKS_CLUSTER_ROLE_ARN }} RUN_CNI_INTEGRATION_TESTS: false RUN_BOTTLEROCKET_TEST: true - RUN_TESTER_LB_ADDONS: true run: | ./scripts/run-integration-tests.sh if: always() diff --git a/Makefile b/Makefile index f94cf95aed..205ecd5dd4 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ # VERSION is the source revision that executables and images are built from. VERSION ?= $(shell git describe --tags --always --dirty || echo "unknown") # GOLANG_IMAGE is the building golang container image used. -GOLANG_IMAGE ?= public.ecr.aws/eks-distro-build-tooling/golang:1.21.6-7-gcc-al2 +GOLANG_IMAGE ?= public.ecr.aws/eks-distro-build-tooling/golang:1.21.7-8-gcc-al2 # BASE_IMAGE_CNI is the base layer image for the primary AWS VPC CNI plugin container BASE_IMAGE_CNI ?= public.ecr.aws/eks-distro-build-tooling/eks-distro-minimal-base-iptables:latest.2 # BASE_IMAGE_CNI_INIT is the base layer image for the AWS VPC CNI init container diff --git a/README.md b/README.md index 1eafcf0e7d..78c279426a 100644 --- a/README.md +++ b/README.md @@ -226,7 +226,7 @@ Type: Integer as a String Default: 9001 -Used to configure the MTU size for attached ENIs. The valid range is from `576` to `9001`. +Used to configure the MTU size for attached ENIs. The valid range for IPv4 is from `576` to `9001`, while the valid range for IPv6 is from `1280` to `9001`. #### `AWS_VPC_K8S_CNI_EXTERNALSNAT` @@ -267,6 +267,15 @@ Default: empty Specify a comma-separated list of IPv4 CIDRs to exclude from SNAT. For every item in the list an `iptables` rule and off\-VPC IP rule will be applied. If an item is not a valid ipv4 range it will be skipped. This should be used when `AWS_VPC_K8S_CNI_EXTERNALSNAT=false`. +#### `POD_MTU` (v1.16.4+) + +Type: Integer as a String + +*Note*: If unset, the default value is derived from `AWS_VPC_ENI_MTU`, which defaults to `9001`. +Default: 9001 + +Used to configure the MTU size for pod virtual interfaces. The valid range for IPv4 is from `576` to `9001`, while the valid range for IPv6 is from `1280` to `9001`. + #### `WARM_ENI_TARGET` Type: Integer as a String @@ -589,7 +598,7 @@ Setting `ANNOTATE_POD_IP` to `true` will allow IPAMD to add an annotation `vpc.a There is a known [issue](https://github.com/kubernetes/kubernetes/issues/39113) with kubelet taking time to update `Pod.Status.PodIP` leading to calico being blocked on programming the policy. Setting `ANNOTATE_POD_IP` to `true` will enable AWS VPC CNI plugin to add Pod IP as an annotation to the pod spec to address this race condition. -To annotate the pod with pod IP, you will have to add "patch" permission for pods resource in aws-node clusterrole. You can use the below command - +To annotate the pod with pod IP, you will have to add `patch` permission for pods resource in aws-node clusterrole. You can use the below command - ``` cat << EOF > append.yaml @@ -606,6 +615,8 @@ EOF kubectl apply -f <(cat <(kubectl get clusterrole aws-node -o yaml) append.yaml) ``` +NOTE: Adding `patch` permissions to the `aws-node` Daemonset increases the security scope for the plugin, so add this permission only after performing a proper security assessment of the tradeoffs. + #### `ENABLE_IPv4` (v1.10.0+) Type: Boolean as a String diff --git a/charts/aws-vpc-cni/README.md b/charts/aws-vpc-cni/README.md index c2e7b5fbd5..a3237b6f4a 100644 --- a/charts/aws-vpc-cni/README.md +++ b/charts/aws-vpc-cni/README.md @@ -108,9 +108,8 @@ $ helm install aws-vpc-cni --namespace kube-system eks/aws-vpc-cni --values valu ## Adopting the existing aws-node resources in an EKS cluster -If you do not want to delete the existing aws-node resources in your cluster that run the aws-vpc-cni and then install this helm chart, you can adopt the resources into a release instead. Refer to the script below to import existing resources into helm. Once you have annotated and labeled all the resources this chart specifies, enable the `originalMatchLabels` flag. If you have been careful this should not diff and leave all the resources unmodified and now under management of helm. +If you do not want to delete the existing aws-node resources in your cluster that run the aws-vpc-cni and then install this helm chart, you can adopt the resources into a release instead. Refer to the script below to import existing resources into helm. Once you have annotated and labeled all the resources this chart specifies, enable the `originalMatchLabels` flag. If you have been careful, this should not diff and leave all the resources unmodified and now under management of helm. -WARNING: Substitute YOUR_HELM_RELEASE_NAME_HERE with the name of your helm release. ``` #!/usr/bin/env bash @@ -118,15 +117,18 @@ set -euo pipefail for kind in daemonSet clusterRole clusterRoleBinding serviceAccount; do echo "setting annotations and labels on $kind/aws-node" - kubectl -n kube-system annotate --overwrite $kind aws-node meta.helm.sh/release-name=YOUR_HELM_RELEASE_NAME_HERE + kubectl -n kube-system annotate --overwrite $kind aws-node meta.helm.sh/release-name=aws-vpc-cni kubectl -n kube-system annotate --overwrite $kind aws-node meta.helm.sh/release-namespace=kube-system kubectl -n kube-system label --overwrite $kind aws-node app.kubernetes.io/managed-by=Helm done -kubectl -n kube-system annotate --overwrite configmap amazon-vpc-cni meta.helm.sh/release-name=YOUR_HELM_RELEASE_NAME_HERE +kubectl -n kube-system annotate --overwrite configmap amazon-vpc-cni meta.helm.sh/release-name=aws-vpc-cni kubectl -n kube-system annotate --overwrite configmap amazon-vpc-cni meta.helm.sh/release-namespace=kube-system kubectl -n kube-system label --overwrite configmap amazon-vpc-cni app.kubernetes.io/managed-by=Helm +Kubernetes recommends using server-side apply for more control over the field manager. After adopting the chart resources, you can run the following command to apply the chart: +``` +helm template aws-vpc-cni --include-crds --namespace kube-system eks/aws-vpc-cni --set originalMatchLabels=true | kubectl apply --server-side --force-conflicts --field-manager Helm -f - ``` ## Migrate from Helm v2 to Helm v3 diff --git a/cmd/aws-vpc-cni/main.go b/cmd/aws-vpc-cni/main.go index 7736be90f0..063d766fdd 100644 --- a/cmd/aws-vpc-cni/main.go +++ b/cmd/aws-vpc-cni/main.go @@ -66,7 +66,9 @@ const ( defaultAWSconflistFile = "/app/10-aws.conflist" tmpAWSconflistFile = "/tmp/10-aws.conflist" defaultVethPrefix = "eni" - defaultMTU = "9001" + defaultMTU = 9001 + minMTUv4 = 576 + minMTUv6 = 1280 defaultEnablePodEni = false defaultPodSGEnforcingMode = "strict" defaultPluginLogFile = "/var/log/aws-routed-eni/plugin.log" @@ -88,6 +90,7 @@ const ( envHostCniConfDirPath = "HOST_CNI_CONFDIR_PATH" envVethPrefix = "AWS_VPC_K8S_CNI_VETHPREFIX" envEniMTU = "AWS_VPC_ENI_MTU" + envPodMTU = "POD_MTU" envEnablePodEni = "ENABLE_POD_ENI" envPodSGEnforcingMode = "POD_SECURITY_GROUP_ENFORCING_MODE" envPluginLogFile = "AWS_VPC_K8S_PLUGIN_LOG_FILE" @@ -278,7 +281,10 @@ func generateJSON(jsonFile string, outFile string, getPrimaryIP func(ipv4 bool) } } vethPrefix := utils.GetEnv(envVethPrefix, defaultVethPrefix) - mtu := utils.GetEnv(envEniMTU, defaultMTU) + // Derive pod MTU from ENI MTU by default (note that values have already been validated) + eniMTU := utils.GetEnv(envEniMTU, strconv.Itoa(defaultMTU)) + // If pod MTU environment variable is set, overwrite ENI MTU. + podMTU := utils.GetEnv(envPodMTU, eniMTU) podSGEnforcingMode := utils.GetEnv(envPodSGEnforcingMode, defaultPodSGEnforcingMode) pluginLogFile := utils.GetEnv(envPluginLogFile, defaultPluginLogFile) pluginLogLevel := utils.GetEnv(envPluginLogLevel, defaultPluginLogLevel) @@ -286,7 +292,7 @@ func generateJSON(jsonFile string, outFile string, getPrimaryIP func(ipv4 bool) netconf := string(byteValue) netconf = strings.Replace(netconf, "__VETHPREFIX__", vethPrefix, -1) - netconf = strings.Replace(netconf, "__MTU__", mtu, -1) + netconf = strings.Replace(netconf, "__MTU__", podMTU, -1) netconf = strings.Replace(netconf, "__PODSGENFORCINGMODE__", podSGEnforcingMode, -1) netconf = strings.Replace(netconf, "__PLUGINLOGFILE__", pluginLogFile, -1) netconf = strings.Replace(netconf, "__PLUGINLOGLEVEL__", pluginLogLevel, -1) @@ -385,6 +391,11 @@ func validateEnvVars() bool { return false } + // Validate MTU value for ENIs and pods + if !validateMTU(envEniMTU) || !validateMTU(envPodMTU) { + return false + } + prefixDelegationEn := utils.GetBoolAsStringEnvVar(envEnPrefixDelegation, defaultEnPrefixDelegation) warmIPTarget := utils.GetEnv(envWarmIPTarget, "0") warmPrefixTarget := utils.GetEnv(envWarmPrefixTarget, "0") @@ -398,6 +409,29 @@ func validateEnvVars() bool { return true } +func validateMTU(envVar string) bool { + // Validate MTU range based on IP address family + enabledIPv6 := utils.GetBoolAsStringEnvVar(envEnIPv6, defaultEnableIPv6) + + mtu, err, input := utils.GetIntFromStringEnvVar(envVar, defaultMTU) + if err != nil { + log.Errorf("%s MUST be a valid integer. %s is invalid", envVar, input) + return false + } + if enabledIPv6 { + if mtu < minMTUv6 || mtu > defaultMTU { + log.Errorf("%s cannot be less than 1280 or greater than 9001 in IPv6. %s is invalid", envVar, input) + return false + } + } else { + if mtu < minMTUv4 || mtu > defaultMTU { + log.Errorf("%s cannot be less than 576 or greater than 9001 in IPv4. %s is invalid", envVar, input) + return false + } + } + return true +} + func main() { os.Exit(_main()) } diff --git a/cmd/aws-vpc-cni/main_test.go b/cmd/aws-vpc-cni/main_test.go index 477fa927b0..1b7387f3c9 100644 --- a/cmd/aws-vpc-cni/main_test.go +++ b/cmd/aws-vpc-cni/main_test.go @@ -47,3 +47,40 @@ func TestGenerateJSONPlusBandwidthAndTuning(t *testing.T) { err := generateJSON(awsConflist, devNull, getPrimaryIPMock) assert.NoError(t, err) } + +func TestMTUValidation(t *testing.T) { + // By default, ENI MTU and pod MTU should be valid + assert.True(t, validateMTU(envEniMTU)) + assert.True(t, validateMTU(envPodMTU)) + + // Non-integer values should fail + _ = os.Setenv(envEniMTU, "true") + _ = os.Setenv(envPodMTU, "abc") + assert.False(t, validateMTU(envEniMTU)) + assert.False(t, validateMTU(envPodMTU)) + + // Integer values within IPv4 range should succeed + _ = os.Setenv(envEniMTU, "5000") + _ = os.Setenv(envPodMTU, "3000") + assert.True(t, validateMTU(envEniMTU)) + assert.True(t, validateMTU(envPodMTU)) + + // Integer values outside IPv4 range should fail + _ = os.Setenv(envEniMTU, "10000") + _ = os.Setenv(envPodMTU, "500") + assert.False(t, validateMTU(envEniMTU)) + assert.False(t, validateMTU(envPodMTU)) + + // Integer values within IPv6 range should succeed + _ = os.Setenv(envEnIPv6, "true") + _ = os.Setenv(envEniMTU, "5000") + _ = os.Setenv(envPodMTU, "3000") + assert.True(t, validateMTU(envEniMTU)) + assert.True(t, validateMTU(envPodMTU)) + + // Integer values outside IPv6 range should fail + _ = os.Setenv(envEniMTU, "10000") + _ = os.Setenv(envPodMTU, "1200") + assert.False(t, validateMTU(envEniMTU)) + assert.False(t, validateMTU(envPodMTU)) +} diff --git a/cmd/cni-metrics-helper/README.md b/cmd/cni-metrics-helper/README.md index 45676573d1..9b41be0297 100644 --- a/cmd/cni-metrics-helper/README.md +++ b/cmd/cni-metrics-helper/README.md @@ -15,24 +15,33 @@ The following diagram shows how `cni-metrics-helper` works in a cluster: As you can see in the diagram, the `cni-metrics-helper` connects to the API Server over https (`tcp/443`), and another connection is created from the API Server to the worker node over http (`tcp/61678`). If you deploy Amazon EKS with the recommended security groups from [Restricting cluster traffic](https://docs.aws.amazon.com/eks/latest/userguide/sec-group-reqs.html#security-group-restricting-cluster-traffic), then make sure that a security group is in place that allows the inbound connection from the API Server to the worker nodes over `tcp/61678`. Adding the CNI metrics helper will publish the following metrics to CloudWatch: -``` -"addReqCount", -"assignIPAddresses", -"awsAPIErr", -"awsAPILatency", -"awsUtilErr", -"delReqCount", -"eniAllocated", -"eniMaxAvailable", -"ipamdActionInProgress", -"ipamdErr", -"maxIPAddresses", -"podENIErr", -"reconcileCount", -"totalIPAddresses", -"totalIPv4Prefixes", -"totalAssignedIPv4sPerCidr" -``` + +| Metric | Description | Statistic[^1] | +| ------ | ----------- | ------------- | +| addReqCount | The number of CNI ADD requests that require an IP address | Sum | +| assignIPAddresses | The number of IP addresses assigned to pods | Sum | +| awsAPIErr | The number of times AWS API returns an error | Sum | +| awsAPILatency | AWS API call latency in ms | Max | +| awsUtilErr | The number of errors not handled in awsutils library | Sum | +| delReqCount | The number of CNI DEL requests | Sum | +| eniAllocated | The number of ENIs allocated | Sum | +| eniMaxAvailable | The maximum number of ENIs that can be attached to this instance, accounting for unmanaged ENIs | Sum | +| ipamdActionInProgress | The number of ipamd actions in progress | Sum | +| ipamdErr | The number of errors encountered in ipamd | Sum | +| maxIPAddresses | The maximum number of IP addresses that can be allocated to the instance | Sum | +| podENIErr | The number of errors encountered while managing ENIs for pods | Sum | +| reconcileCount | The number of times ipamd reconciles on ENIs and IP/Prefix addresses | Sum | +| totalIPAddresses | The number of IPs allocated for pods | Sum | +| totalIPv4Prefixes | The total number of IPv4 prefixes | Sum | +| totalAssignedIPv4sPerCidr | The total number of IP addresses assigned per cidr | Sum | +| forceRemoveENI | The number of ENIs force removed while they had assigned pods | Sum | +| forceRemoveIPs | The number of IPs force removed while they had assigned pods | Sum | +| ec2ApiReqCount | The number of requests made to EC2 APIs by CNI | Sum | +| ec2ApiErrCount | The number of failed EC2 API requests | Sum | + +[^1]: This column indicates how the metric has been aggregated across all nodes + Sum: For datapoints from all nodes, this is the summation of those datapoints + Max: For datapoints from all nodes, this is the maximum value of those datapoints ## Using IRSA As per [AWS EKS Security Best Practice](https://docs.aws.amazon.com/eks/latest/userguide/best-practices-security.html), if you are using IRSA for pods then following requirements must be satisfied to succesfully publish metrics to CloudWatch diff --git a/cmd/routed-eni-cni-plugin/cni.go b/cmd/routed-eni-cni-plugin/cni.go index a0619920cb..0e38f65b23 100644 --- a/cmd/routed-eni-cni-plugin/cni.go +++ b/cmd/routed-eni-cni-plugin/cni.go @@ -144,7 +144,8 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap return errors.Wrap(err, "add cmd: failed to load k8s config from arg") } - mtu := networkutils.GetEthernetMTU(conf.MTU) + // Derive pod MTU. Note that the value has already been validated. + mtu := networkutils.GetPodMTU(conf.MTU) log.Debugf("MTU value set is %d:", mtu) // Set up a connection to the ipamD server. diff --git a/go.mod b/go.mod index d06c5a7b11..479d9b69a3 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,6 @@ require ( github.com/coreos/go-iptables v0.7.0 github.com/go-logr/logr v1.4.1 github.com/golang/mock v1.6.0 - github.com/golang/protobuf v1.5.3 github.com/google/go-cmp v0.6.0 github.com/onsi/ginkgo/v2 v2.14.0 github.com/onsi/gomega v1.30.0 @@ -31,7 +30,7 @@ require ( google.golang.org/grpc v1.61.0 gopkg.in/natefinch/lumberjack.v2 v2.2.1 gopkg.in/yaml.v2 v2.4.0 - helm.sh/helm/v3 v3.14.0 + helm.sh/helm/v3 v3.14.2 k8s.io/api v0.29.0 k8s.io/apimachinery v0.29.0 k8s.io/cli-runtime v0.29.0 @@ -82,6 +81,7 @@ require ( github.com/gobwas/glob v0.2.3 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.3 // indirect github.com/google/btree v1.0.1 // indirect github.com/google/gnostic-models v0.6.9-0.20230804172637-c7be7c783f49 // indirect github.com/google/gofuzz v1.2.0 // indirect diff --git a/go.sum b/go.sum index cab9ad8eb4..252f1117b5 100644 --- a/go.sum +++ b/go.sum @@ -604,8 +604,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o= gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g= -helm.sh/helm/v3 v3.14.0 h1:TaZIH6uOchn7L27ptwnnuHJiFrT/BsD4dFdp/HLT2nM= -helm.sh/helm/v3 v3.14.0/go.mod h1:2itvvDv2WSZXTllknfQo6j7u3VVgMAvm8POCDgYH424= +helm.sh/helm/v3 v3.14.2 h1:V71fv+NGZv0icBlr+in1MJXuUIHCiPG1hW9gEBISTIA= +helm.sh/helm/v3 v3.14.2/go.mod h1:2itvvDv2WSZXTllknfQo6j7u3VVgMAvm8POCDgYH424= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= k8s.io/api v0.29.0 h1:NiCdQMY1QOp1H8lfRyeEf8eOwV6+0xA6XEE44ohDX2A= diff --git a/pkg/ipamd/datastore/data_store.go b/pkg/ipamd/datastore/data_store.go index ee4c5d1e9e..ba49b98bc3 100644 --- a/pkg/ipamd/datastore/data_store.go +++ b/pkg/ipamd/datastore/data_store.go @@ -978,8 +978,8 @@ func (ds *DataStore) GetENINeedsIP(maxIPperENI int, skipPrimary bool) *ENI { ds.lock.Lock() defer ds.lock.Unlock() for _, eni := range ds.eniPool { - if skipPrimary && eni.IsPrimary { - ds.log.Debugf("Skip the primary ENI for need IP check") + if (skipPrimary && eni.IsPrimary) || eni.IsTrunk { + ds.log.Debugf("Skip needs IP check for trunk ENI of primary ENI when Custom Networking is enabled") continue } if len(eni.AvailableIPv4Cidrs) < maxIPperENI { diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index 3689bb052b..4698f20081 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -455,12 +455,12 @@ func (c *IPAMContext) nodeInit() error { return err } - if c.enablePodENI { - // Try to patch CNINode with Security Groups for Pods feature. - c.tryEnableSecurityGroupsForPods(ctx) - } - if c.enableIPv6 { + // Security Groups for Pods cannot be enabled for IPv4 at this point, as Custom Networking must be enabled first. + if c.enablePodENI { + // Try to patch CNINode with Security Groups for Pods feature. + c.tryEnableSecurityGroupsForPods(ctx) + } // We will not support upgrading/converting an existing IPv4 cluster to operate in IPv6 mode. So, we will always // start with a clean slate in IPv6 mode. We also do not have to deal with dynamic update of Prefix Delegation // feature in IPv6 mode as we do not support (yet) a non-PD v6 option. In addition, we do not support custom @@ -540,6 +540,11 @@ func (c *IPAMContext) nodeInit() error { } } + // Now that Custom Networking is (potentially) enabled, Security Groups for Pods can be enabled for IPv4 nodes. + if c.enablePodENI { + c.tryEnableSecurityGroupsForPods(ctx) + } + // On node init, check if datastore pool needs to be increased. If so, attach CIDRs from existing ENIs and attach new ENIs. datastorePoolTooLow, _ := c.isDatastorePoolTooLow() if !c.disableENIProvisioning && datastorePoolTooLow { @@ -655,11 +660,7 @@ func (c *IPAMContext) updateIPPoolIfRequired(ctx context.Context) { log.Debugf("IP stats - total IPs: %d, assigned IPs: %d, cooldown IPs: %d", stats.TotalIPs, stats.AssignedIPs, stats.CooldownIPs) if datastorePoolTooLow { - // Allow for rapid scale up to decrease time it takes for pod to retrieve an ip - // but conservative scale down to account for pod churn - for datastorePoolStillTooLow := datastorePoolTooLow; datastorePoolStillTooLow; datastorePoolStillTooLow, _ = c.isDatastorePoolTooLow() { - c.increaseDatastorePool(ctx) - } + c.increaseDatastorePool(ctx) } else if c.isDatastorePoolTooHigh(stats) { c.decreaseDatastorePool(decreaseIPPoolInterval) } diff --git a/pkg/networkutils/network.go b/pkg/networkutils/network.go index 4189c9cbe0..00ff6af202 100644 --- a/pkg/networkutils/network.go +++ b/pkg/networkutils/network.go @@ -29,6 +29,7 @@ import ( "github.com/coreos/go-iptables/iptables" "github.com/aws/amazon-vpc-cni-k8s/pkg/sgpp" + "github.com/aws/amazon-vpc-cni-k8s/utils" "k8s.io/apimachinery/pkg/util/sets" @@ -115,7 +116,9 @@ const ( defaultConnmark = 0x80 // envMTU gives a way to configure the MTU size for new ENIs attached. Range is from 576 to 9001. - envMTU = "AWS_VPC_ENI_MTU" + envMTU = "AWS_VPC_ENI_MTU" + defaultMTU = 9001 + minMTUv4 = 576 // envVethPrefix is the environment variable to configure the prefix of the host side veth device names envVethPrefix = "AWS_VPC_K8S_CNI_VETHPREFIX" @@ -126,10 +129,6 @@ const ( // envEnIpv6Egress is the environment variable to enable IPv6 egress support on EKS v4 cluster envEnIpv6Egress = "ENABLE_V6_EGRESS" - // Range of MTU for each ENI and veth pair. Defaults to maximumMTU - minimumMTU = 576 - maximumMTU = 9001 - // number of retries to add a route maxRetryRouteAdd = 5 @@ -198,7 +197,7 @@ func New() NetworkAPIs { typeOfSNAT: typeOfSNAT(), nodePortSupportEnabled: nodePortSupportEnabled(), mainENIMark: getConnmark(), - mtu: GetEthernetMTU(""), + mtu: GetEthernetMTU(), vethPrefix: getVethPrefixName(), podSGEnforcingMode: sgpp.LoadEnforcingModeFromEnv(), @@ -849,7 +848,7 @@ func GetConfigForDebug() map[string]interface{} { envExcludeSNATCIDRs: parseCIDRString(envExcludeSNATCIDRs), envExternalSNAT: useExternalSNAT(), envExternalServiceCIDRs: parseCIDRString(envExternalServiceCIDRs), - envMTU: GetEthernetMTU(""), + envMTU: GetEthernetMTU(), envVethPrefix: getVethPrefixName(), envNodePortSupport: nodePortSupportEnabled(), envRandomizeSNAT: typeOfSNAT(), @@ -1293,32 +1292,25 @@ func (n *linuxNetwork) UpdateExternalServiceIpRules(ruleList []netlink.Rule, ext return nil } -// GetEthernetMTU gets the MTU setting from AWS_VPC_ENI_MTU if set, or takes the passed in string. Defaults to 9001 if not set. -func GetEthernetMTU(envMTUValue string) int { - inputStr, found := os.LookupEnv(envMTU) - if found { - envMTUValue = inputStr +// GetEthernetMTU returns the MTU value to program for ENIs. Note that the value was already validated during container initialization. +func GetEthernetMTU() int { + mtu, _, _ := utils.GetIntFromStringEnvVar(envMTU, defaultMTU) + return mtu +} + +// GetPodMTU validates the pod MTU value. If an invalid value is passed, the default is used. +func GetPodMTU(podMTU string) int { + mtu, err := strconv.Atoi(podMTU) + if err != nil { + log.Errorf("Failed to parse pod MTU %s: %v", mtu, err) + return defaultMTU } - if envMTUValue != "" { - mtu, err := strconv.Atoi(envMTUValue) - if err != nil { - log.Errorf("Failed to parse %s will use %d: %v", envMTU, maximumMTU, err.Error()) - return maximumMTU - } - // Restrict range between jumbo frame and the maximum required size to assemble. - // Details in https://tools.ietf.org/html/rfc879 and - // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/network_mtu.html - if mtu < minimumMTU { - log.Errorf("%s is too low: %d. Will use %d", envMTU, mtu, minimumMTU) - return minimumMTU - } - if mtu > maximumMTU { - log.Errorf("%s is too high: %d. Will use %d", envMTU, mtu, maximumMTU) - return maximumMTU - } - return mtu + + // Only IPv4 bounds can be enforced, but note that the conflist value is already validated during container initialization. + if mtu < minMTUv4 || mtu > defaultMTU { + return defaultMTU } - return maximumMTU + return mtu } // getVethPrefixName gets the name prefix of the veth devices based on the AWS_VPC_K8S_CNI_VETHPREFIX environment variable diff --git a/pkg/networkutils/network_test.go b/pkg/networkutils/network_test.go index 906b060a4b..fd5950ab84 100644 --- a/pkg/networkutils/network_test.go +++ b/pkg/networkutils/network_test.go @@ -419,19 +419,21 @@ func TestSetupHostNetworkNodePortDisabledAndSNATEnabled(t *testing.T) { }, mockIptables.(*mock_iptables.MockIptables).DataplaneState) } -func TestLoadMTUFromEnvTooLow(t *testing.T) { - os.Setenv(envMTU, "1") - assert.Equal(t, GetEthernetMTU(""), minimumMTU) -} +func TestGetEthernetMTU(t *testing.T) { + assert.Equal(t, GetEthernetMTU(), defaultMTU) -func TestLoadMTUFromEnv1500(t *testing.T) { - os.Setenv(envMTU, "1500") - assert.Equal(t, GetEthernetMTU(""), 1500) + os.Setenv(envMTU, "5000") + assert.Equal(t, GetEthernetMTU(), 5000) } -func TestLoadMTUFromEnvTooHigh(t *testing.T) { - os.Setenv(envMTU, "65536") - assert.Equal(t, GetEthernetMTU(""), maximumMTU) +func TestGetPodMTU(t *testing.T) { + // For any invalid value, return the default MTU + assert.Equal(t, GetPodMTU("abc"), defaultMTU) + assert.Equal(t, GetPodMTU("500"), defaultMTU) + assert.Equal(t, GetPodMTU("10000"), defaultMTU) + + // For any valid value, return the value + assert.Equal(t, GetPodMTU("8000"), 8000) } func TestLoadExcludeSNATCIDRsFromEnv(t *testing.T) { diff --git a/scripts/generate-cni-yaml.sh b/scripts/generate-cni-yaml.sh index 3741ce10d7..4e8fca7817 100755 --- a/scripts/generate-cni-yaml.sh +++ b/scripts/generate-cni-yaml.sh @@ -4,7 +4,7 @@ set -euo pipefail SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" PLATFORM=$(uname | tr '[:upper:]' '[:lower:]') -HELM_VERSION="3.14.0" +HELM_VERSION="3.14.2" NAMESPACE="kube-system" MAKEFILEPATH=$SCRIPTPATH/../Makefile diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index 3dc769de08..2a39a56b82 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -12,7 +12,7 @@ function load_deveks_cluster_details() { echo "loading cluster details $CLUSTER_NAME" PROVIDER_ID=$(kubectl get nodes --kubeconfig $KUBE_CONFIG_PATH -ojson | jq -r '.items[0].spec.providerID') INSTANCE_ID=${PROVIDER_ID##*/} - VPC_ID=$(aws ec2 describe-instances --instance-ids ${INSTANCE_ID} --no-cli-pager | jq -r '.Reservations[].Instances[].VpcId') + VPC_ID=$(aws ec2 describe-instances --instance-ids ${INSTANCE_ID} | jq -r '.Reservations[].Instances[].VpcId') } function down-test-cluster() { @@ -92,7 +92,6 @@ function up-kops-cluster { --cloud aws \ --zones ${AWS_DEFAULT_REGION}a,${AWS_DEFAULT_REGION}b \ --networking amazonvpc \ - --container-runtime containerd \ --node-count 2 \ --node-size c5.xlarge \ --ssh-public-key=~/.ssh/devopsinuse.pub \ diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index d2c66e39d9..9734991d5f 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -26,7 +26,6 @@ ARCH=$(go env GOARCH) : "${BUILD:=true}" : "${RUN_CNI_INTEGRATION_TESTS:=true}" : "${RUN_CONFORMANCE:=false}" -: "${RUN_TESTER_LB_ADDONS:=false}" : "${RUN_KOPS_TEST:=false}" : "${RUN_BOTTLEROCKET_TEST:=false}" : "${RUN_PERFORMANCE_TESTS:=false}" @@ -193,7 +192,7 @@ echo "TIMELINE: Upping test cluster took $UP_CLUSTER_DURATION seconds." # Fetch VPC_ID from created cluster if [[ "$RUN_KOPS_TEST" == true ]]; then INSTANCE_ID=$(kubectl get nodes -l node-role.kubernetes.io/node -o jsonpath='{range .items[*]}{@.metadata.name}{"\n"}' | head -1) - VPC_ID=$(aws ec2 describe-instances --instance-ids "$INSTANCE_ID" --no-cli-pager | jq -r '.Reservations[].Instances[].VpcId' ) + VPC_ID=$(aws ec2 describe-instances --instance-ids "$INSTANCE_ID" | jq -r '.Reservations[].Instances[].VpcId' ) else DESCRIBE_CLUSTER_OP=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$AWS_DEFAULT_REGION") VPC_ID=$(echo "$DESCRIBE_CLUSTER_OP" | jq -r '.cluster.resourcesVpcConfig.vpcId') @@ -285,7 +284,7 @@ if [[ "$DEPROVISION" == true ]]; then if [[ "$RUN_KOPS_TEST" == true ]]; then down-kops-cluster elif [[ "$RUN_BOTTLEROCKET_TEST" == true ]]; then - eksctl delete cluster $CLUSTER_NAME + eksctl delete cluster $CLUSTER_NAME --disable-nodegroup-eviction emit_cloudwatch_metric "bottlerocket_test_status" "1" elif [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then eksctl delete cluster $CLUSTER_NAME diff --git a/scripts/run-static-canary.sh b/scripts/run-static-canary.sh index e70b23fb9f..b5e1520bd4 100755 --- a/scripts/run-static-canary.sh +++ b/scripts/run-static-canary.sh @@ -21,7 +21,7 @@ function run_ginkgo_test() { local focus=$1 echo "Running ginkgo tests with focus: $focus" - (CGO_ENABLED=0 ginkgo $EXTRA_GINKGO_FLAGS --no-color --focus="$focus" -v --timeout 30m --fail-on-pending $GINKGO_TEST_BUILD/cni.test -- \ + (CGO_ENABLED=0 ginkgo $EXTRA_GINKGO_FLAGS --no-color --focus="$focus" -v --timeout 10m --fail-on-pending $GINKGO_TEST_BUILD/cni.test -- \ --cluster-kubeconfig="$KUBE_CONFIG_PATH" \ --cluster-name="$CLUSTER_NAME" \ --aws-region="$REGION" \ diff --git a/scripts/test/config/test-cluster.yaml b/scripts/test/config/test-cluster.yaml index 908619a417..92676c4af8 100644 --- a/scripts/test/config/test-cluster.yaml +++ b/scripts/test/config/test-cluster.yaml @@ -29,4 +29,9 @@ managedNodeGroups: volumeSize: 40 releaseVersion: "" tags: - group: amazon-vpc-cni-k8s-x86 \ No newline at end of file + group: amazon-vpc-cni-k8s-x86 +availabilityZones: + - us-west-2a + - us-west-2b + - us-west-2c + - us-west-2d \ No newline at end of file diff --git a/test/agent/Dockerfile b/test/agent/Dockerfile index 4b828713f6..6d65310070 100644 --- a/test/agent/Dockerfile +++ b/test/agent/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/eks-distro-build-tooling/golang:1.21.6-7-gcc-al2 as builder +FROM public.ecr.aws/eks-distro-build-tooling/golang:1.21.7-8-gcc-al2 as builder WORKDIR /workspace ENV GOPROXY direct diff --git a/test/integration/cni/host_networking_test.go b/test/integration/cni/host_networking_test.go index 58d93ae81b..89114f6e79 100644 --- a/test/integration/cni/host_networking_test.go +++ b/test/integration/cni/host_networking_test.go @@ -17,12 +17,11 @@ import ( "strconv" "time" - v1 "k8s.io/api/core/v1" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" "github.com/aws/amazon-vpc-cni-k8s/test/integration/common" + v1 "k8s.io/api/core/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -31,13 +30,15 @@ import ( // TODO: Instead of passing the list of pods to the test helper, have the test helper get the pod on node const ( NEW_MTU_VAL = 1300 + NEW_POD_MTU = 1280 NEW_VETH_PREFIX = "veth" + podLabelKey = "app" + podLabelVal = "host-networking-test" ) +var err error + var _ = Describe("test host networking", func() { - var err error - var podLabelKey = "app" - var podLabelVal = "host-networking-test" // For host networking tests, increase WARM_IP_TARGET to prevent long IPAMD warmup. BeforeEach(func() { @@ -57,6 +58,10 @@ var _ = Describe("test host networking", func() { "AWS_VPC_ENI_MTU": DEFAULT_MTU_VAL, "AWS_VPC_K8S_CNI_VETHPREFIX": DEFAULT_VETH_PREFIX, }) + k8sUtils.RemoveVarFromDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, + utils.AwsNodeNamespace, utils.AwsNodeName, map[string]struct{}{ + "POD_MTU": {}, + }) // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the latest VETH prefix and MTU. // Otherwise, the stale value can cause failures in future test cases. time.Sleep(utils.PollIntervalMedium) @@ -104,51 +109,14 @@ var _ = Describe("test host networking", func() { common.ValidateHostNetworking(common.NetworkingTearDownSucceeds, input, primaryNode.Name, f) }) - It("Validate Host Networking setup after changing MTU and Veth Prefix", func() { - deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Replicas(maxIPPerInterface*2). - PodLabel(podLabelKey, podLabelVal). - NodeName(primaryNode.Name). - Build() - - By("Configuring Veth Prefix and MTU value on aws-node daemonset") - k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ - "AWS_VPC_ENI_MTU": strconv.Itoa(NEW_MTU_VAL), - "AWS_VPC_K8S_CNI_VETHPREFIX": NEW_VETH_PREFIX, + Context("Validate Host Networking setup after changing Veth Prefix and", func() { + It("ENI MTU", func() { + mtuValidationTest(false, NEW_MTU_VAL) + }) + It("POD MTU", func() { + Skip("Skip this test until v1.16.4 is released") + mtuValidationTest(true, NEW_POD_MTU) }) - // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the new VETH prefix and MTU. - time.Sleep(utils.PollIntervalMedium) - - By("creating a deployment to launch pods") - deployment, err = f.K8sResourceManagers.DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout) - Expect(err).ToNot(HaveOccurred()) - - By("getting the list of pods using IP from primary and secondary ENI") - interfaceTypeToPodList := - common.GetPodsOnPrimaryAndSecondaryInterface(primaryNode, podLabelKey, podLabelVal, f) - - By("generating the pod networking validation input to be passed to tester") - podNetworkingValidationInput := common.GetPodNetworkingValidationInput(interfaceTypeToPodList, vpcCIDRs) - podNetworkingValidationInput.VethPrefix = NEW_VETH_PREFIX - podNetworkingValidationInput.ValidateMTU = true - podNetworkingValidationInput.MTU = NEW_MTU_VAL - input, err := podNetworkingValidationInput.Serialize() - Expect(err).NotTo(HaveOccurred()) - - By("validating host networking setup is setup correctly with MTU check as well") - common.ValidateHostNetworking(common.NetworkingSetupSucceeds, input, primaryNode.Name, f) - - By("deleting the deployment to test teardown") - err = f.K8sResourceManagers.DeploymentManager(). - DeleteAndWaitTillDeploymentIsDeleted(deployment) - Expect(err).ToNot(HaveOccurred()) - - By("waiting to allow CNI to tear down networking for terminated pods") - time.Sleep(time.Second * 60) - - By("validating host networking is teared down correctly") - common.ValidateHostNetworking(common.NetworkingTearDownSucceeds, input, primaryNode.Name, f) }) }) @@ -205,3 +173,59 @@ var _ = Describe("test host networking", func() { }) }) }) + +func mtuValidationTest(usePodMTU bool, mtuVal int) { + deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Replicas(maxIPPerInterface*2). + PodLabel(podLabelKey, podLabelVal). + NodeName(primaryNode.Name). + Build() + + if usePodMTU { + By("Configuring Veth Prefix and Pod MTU value on aws-node daemonset") + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ + "AWS_VPC_ENI_MTU": strconv.Itoa(NEW_MTU_VAL), + "POD_MTU": strconv.Itoa(NEW_POD_MTU), + "AWS_VPC_K8S_CNI_VETHPREFIX": NEW_VETH_PREFIX, + }) + } else { + By("Configuring Veth Prefix and ENI MTU value on aws-node daemonset") + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ + "AWS_VPC_ENI_MTU": strconv.Itoa(NEW_MTU_VAL), + "AWS_VPC_K8S_CNI_VETHPREFIX": NEW_VETH_PREFIX, + }) + } + // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the new VETH prefix and MTU. + time.Sleep(utils.PollIntervalMedium) + + By("creating a deployment to launch pods") + deployment, err = f.K8sResourceManagers.DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout) + Expect(err).ToNot(HaveOccurred()) + + By("getting the list of pods using IP from primary and secondary ENI") + interfaceTypeToPodList := + common.GetPodsOnPrimaryAndSecondaryInterface(primaryNode, podLabelKey, podLabelVal, f) + + By("generating the pod networking validation input to be passed to tester") + podNetworkingValidationInput := common.GetPodNetworkingValidationInput(interfaceTypeToPodList, vpcCIDRs) + podNetworkingValidationInput.VethPrefix = NEW_VETH_PREFIX + podNetworkingValidationInput.ValidateMTU = true + podNetworkingValidationInput.MTU = mtuVal + input, err := podNetworkingValidationInput.Serialize() + Expect(err).NotTo(HaveOccurred()) + + By("validating host networking setup is setup correctly with MTU check as well") + common.ValidateHostNetworking(common.NetworkingSetupSucceeds, input, primaryNode.Name, f) + + By("deleting the deployment to test teardown") + err = f.K8sResourceManagers.DeploymentManager(). + DeleteAndWaitTillDeploymentIsDeleted(deployment) + Expect(err).ToNot(HaveOccurred()) + + By("waiting to allow CNI to tear down networking for terminated pods") + time.Sleep(time.Second * 60) + + By("validating host networking is teared down correctly") + common.ValidateHostNetworking(common.NetworkingTearDownSucceeds, input, primaryNode.Name, f) +} diff --git a/test/integration/ipv6/ipv6_host_networking_test.go b/test/integration/ipv6/ipv6_host_networking_test.go index e698993b9f..ffb9b1a548 100644 --- a/test/integration/ipv6/ipv6_host_networking_test.go +++ b/test/integration/ipv6/ipv6_host_networking_test.go @@ -41,16 +41,19 @@ const ( const ( AWS_VPC_ENI_MTU = "AWS_VPC_ENI_MTU" AWS_VPC_K8S_CNI_VETHPREFIX = "AWS_VPC_K8S_CNI_VETHPREFIX" + POD_MTU = "POD_MTU" NEW_MTU_VAL = 1300 + NEW_POD_MTU = 1280 NEW_VETH_PREFIX = "veth" DEFAULT_MTU_VAL = "9001" DEFAULT_VETH_PREFIX = "eni" + podLabelKey = "app" + podLabelVal = "host-networking-test" ) +var err error + var _ = Describe("[CANARY] test ipv6 host netns setup", func() { - var err error - var podLabelKey = "app" - var podLabelVal = "host-networking-test" Context("when pods using IP from primary ENI are created", func() { AfterEach(func() { @@ -58,6 +61,10 @@ var _ = Describe("[CANARY] test ipv6 host netns setup", func() { AWS_VPC_ENI_MTU: DEFAULT_MTU_VAL, AWS_VPC_K8S_CNI_VETHPREFIX: DEFAULT_VETH_PREFIX, }) + k8sUtils.RemoveVarFromDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, + utils.AwsNodeNamespace, utils.AwsNodeName, map[string]struct{}{ + "POD_MTU": {}, + }) // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the latest VETH prefix and MTU. // Otherwise, the stale value can cause failures in future test cases. time.Sleep(utils.PollIntervalMedium) @@ -98,51 +105,14 @@ var _ = Describe("[CANARY] test ipv6 host netns setup", func() { ValidateHostNetworking(NetworkingTearDownSucceeds, input) }) - It("Validate host netns setup after changing MTU and Veth Prefix", func() { - deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Replicas(2). - PodLabel(podLabelKey, podLabelVal). - NodeName(primaryNode.Name). - Build() - - By("Configuring Veth Prefix and MTU value on aws-node daemonset") - k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ - AWS_VPC_ENI_MTU: strconv.Itoa(NEW_MTU_VAL), - AWS_VPC_K8S_CNI_VETHPREFIX: NEW_VETH_PREFIX, + Context("Validate Host Networking setup after changing Veth Prefix and", func() { + It("ENI MTU", func() { + mtuValidationTest(false, NEW_MTU_VAL) + }) + It("POD MTU", func() { + Skip("Skip this test until v1.16.4 is released") + mtuValidationTest(true, NEW_POD_MTU) }) - // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the new VETH prefix and MTU. - time.Sleep(utils.PollIntervalMedium) - - By("creating a deployment to launch pods") - deployment, err = f.K8sResourceManagers.DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout) - Expect(err).ToNot(HaveOccurred()) - - By("getting the list of pods using IP from primary and secondary ENI") - interfaceTypeToPodList := - GetIPv6Pods(podLabelKey, podLabelVal) - - By("generating the pod networking validation input to be passed to tester") - podNetworkingValidationInput := GetIPv6PodNetworkingValidationInput(interfaceTypeToPodList) - podNetworkingValidationInput.VethPrefix = NEW_VETH_PREFIX - podNetworkingValidationInput.ValidateMTU = true - podNetworkingValidationInput.MTU = NEW_MTU_VAL - input, err := podNetworkingValidationInput.Serialize() - Expect(err).NotTo(HaveOccurred()) - - By("validating host networking setup is setup correctly with MTU check as well") - ValidateHostNetworking(NetworkingSetupSucceeds, input) - - By("deleting the deployment to test teardown") - err = f.K8sResourceManagers.DeploymentManager(). - DeleteAndWaitTillDeploymentIsDeleted(deployment) - Expect(err).ToNot(HaveOccurred()) - - By("waiting to allow CNI to tear down networking for terminated pods") - time.Sleep(time.Second * 60) - - By("validating host networking is teared down correctly") - ValidateHostNetworking(NetworkingTearDownSucceeds, input) }) }) @@ -277,3 +247,59 @@ func GetIPv6PodNetworkingValidationInput(podList v1.PodList) input.PodNetworking } return ip } + +func mtuValidationTest(usePodMTU bool, mtuVal int) { + deployment := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Replicas(2). + PodLabel(podLabelKey, podLabelVal). + NodeName(primaryNode.Name). + Build() + + if usePodMTU { + By("Configuring Veth Prefix and Pod MTU value on aws-node daemonset") + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ + AWS_VPC_ENI_MTU: strconv.Itoa(NEW_MTU_VAL), + POD_MTU: strconv.Itoa(NEW_POD_MTU), + AWS_VPC_K8S_CNI_VETHPREFIX: NEW_VETH_PREFIX, + }) + } else { + By("Configuring Veth Prefix and ENI MTU value on aws-node daemonset") + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ + AWS_VPC_ENI_MTU: strconv.Itoa(NEW_MTU_VAL), + AWS_VPC_K8S_CNI_VETHPREFIX: NEW_VETH_PREFIX, + }) + } + // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the new VETH prefix and MTU. + time.Sleep(utils.PollIntervalMedium) + + By("creating a deployment to launch pods") + deployment, err = f.K8sResourceManagers.DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout) + Expect(err).ToNot(HaveOccurred()) + + By("getting the list of pods using IP from primary and secondary ENI") + interfaceTypeToPodList := + GetIPv6Pods(podLabelKey, podLabelVal) + + By("generating the pod networking validation input to be passed to tester") + podNetworkingValidationInput := GetIPv6PodNetworkingValidationInput(interfaceTypeToPodList) + podNetworkingValidationInput.VethPrefix = NEW_VETH_PREFIX + podNetworkingValidationInput.ValidateMTU = true + podNetworkingValidationInput.MTU = mtuVal + input, err := podNetworkingValidationInput.Serialize() + Expect(err).NotTo(HaveOccurred()) + + By("validating host networking setup is setup correctly with MTU check as well") + ValidateHostNetworking(NetworkingSetupSucceeds, input) + + By("deleting the deployment to test teardown") + err = f.K8sResourceManagers.DeploymentManager(). + DeleteAndWaitTillDeploymentIsDeleted(deployment) + Expect(err).ToNot(HaveOccurred()) + + By("waiting to allow CNI to tear down networking for terminated pods") + time.Sleep(time.Second * 60) + + By("validating host networking is teared down correctly") + ValidateHostNetworking(NetworkingTearDownSucceeds, input) +}