Skip to content

Commit

Permalink
Merge pull request #2028 from ConnorJC3/faster-clusters
Browse files Browse the repository at this point in the history
`Makefile` & `hack/` Process Improvements
  • Loading branch information
k8s-ci-robot authored May 2, 2024
2 parents ee6b018 + de840fe commit 09ce01b
Show file tree
Hide file tree
Showing 12 changed files with 200 additions and 121 deletions.
19 changes: 10 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ ALL_OS_ARCH_OSVERSION=$(foreach os, $(ALL_OS), ${ALL_OS_ARCH_OSVERSION_${os}})

CLUSTER_NAME?=ebs-csi-e2e.k8s.local
CLUSTER_TYPE?=kops
WINDOWS?=false
WINDOWS_HOSTPROCESS?=false

# split words on hyphen, access by 1-index
word-hyphen = $(word $2,$(subst -, ,$1))
Expand Down Expand Up @@ -92,7 +90,7 @@ test/coverage:
# go test -v -race ./tests/sanity/...

.PHONY: tools
tools: bin/aws bin/ct bin/eksctl bin/ginkgo bin/golangci-lint bin/helm bin/kops bin/kubetest2 bin/mockgen bin/shfmt
tools: bin/aws bin/ct bin/eksctl bin/ginkgo bin/golangci-lint bin/gomplate bin/helm bin/kops bin/kubetest2 bin/mockgen bin/shfmt

.PHONY: update
update: update/gofmt update/kustomize update/mockgen update/gomod update/shfmt
Expand All @@ -106,9 +104,17 @@ verify: verify/govet verify/golangci-lint verify/update
all-push: all-image-registry push-manifest

.PHONY: cluster/create
cluster/create: bin/kops bin/eksctl bin/aws
cluster/create: bin/kops bin/eksctl bin/aws bin/gomplate
./hack/e2e/create-cluster.sh

.PHONY: cluster/kubeconfig
cluster/kubeconfig:
@./hack/e2e/kubeconfig.sh

.PHONY: cluster/image
cluster/image: bin/aws
./hack/e2e/build-image.sh

.PHONY: cluster/delete
cluster/delete: bin/kops bin/eksctl
./hack/e2e/delete-cluster.sh
Expand Down Expand Up @@ -137,11 +143,6 @@ e2e/external: bin/helm bin/kubetest2
COLLECT_METRICS="true" \
./hack/e2e/run.sh

.PHONY: e2e/external-arm64
e2e/external-arm64: bin/helm bin/kubetest2
IMAGE_ARCH="arm64" \
./hack/e2e/run.sh

.PHONY: e2e/external-windows
e2e/external-windows: bin/helm bin/kubetest2
WINDOWS=true \
Expand Down
47 changes: 42 additions & 5 deletions docs/makefile.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,54 @@ export CLUSTER_TYPE="eksctl"
make cluster/create
```

### `make cluster/image`

Builds an image for use in the E2E tests. This will automatically build the most appropriate image (for example, skipping Windows builds unless `WINDOWS` is set to `true`).

#### Example: Build a standard image

```bash
make cluster/image
```

#### Example: Build an arm64 image

```bash
export IMAGE_ARCH="arm64"
make cluster/image
```

#### Example: Build a Windows-compatible image

```bash
export WINDOWS="true"
make cluster/image
```

### `make cluster/kubeconfig`

Prints the `KUBECONFIG` environment variable for a cluster. You must pass the same `CLUSTER_TYPE` and `CLUSTER_NAME` as used when creating the cluster. This command must be `eval`ed to import the environment variables into your shell.

#### Example: Export the `KUBECONFIG` for a default cluster

```bash
eval "$(make cluster/kubeconfig)"
```

#### Example: Export the `KUBECONFIG` for an `eksctl` cluster

```bash
export CLUSTER_TYPE="eksctl"
eval "$(make cluster/kubeconfig)"
```

### `make cluster/delete`

Deletes a cluster created by `make cluster/create`. You must pass the same `CLUSTER_TYPE` and `CLUSTER_NAME` as used when creating the cluster.

## E2E Tests

Run E2E tests against a cluster created by `make cluster/create`. You must pass the same `CLUSTER_TYPE` and `CLUSTER_NAME` as used when creating the cluster.
Run E2E tests against a cluster created by `make cluster/create`. You must pass the same `CLUSTER_TYPE` and `CLUSTER_NAME` as used when creating the cluster. You must have already run `make cluster/image` to build the image for the cluster, or provide an image of your own.

Alternatively, you may run on an externally created cluster by passing `CLUSTER_TYPE` (required to determine which `values.yaml` to deploy) and `KUBECONFIG`. For `kops` clusters, the node IAM role should include the appropriate IAM policies to use the driver (see [the installation docs](./install.md#set-up-driver-permissions)). For `eksctl` clusters, the `ebs-csi-controller-sa` service account should be pre-created and setup to supply an IRSA role with the appropriate policies.

Expand All @@ -175,10 +216,6 @@ Run the single-AZ EBS CSI E2E tests. Requires a cluster with only one Availabili

Run the multi-AZ EBS CSI E2E tests. Requires a cluster with at least two Availability Zones.

### `make e2e/external-arm64`

Run the Kubernetes upstream [external storage E2E tests](https://github.com/kubernetes/kubernetes/blob/master/test/e2e/README.md) using an ARM64 image of the EBS CSI Driver. Requires a cluster with Graviton nodes.

### `make e2e/external-windows`

Run the Kubernetes upstream [external storage E2E tests](https://github.com/kubernetes/kubernetes/blob/master/test/e2e/README.md) with Windows tests enabled. Requires a cluster with Windows nodes.
Expand Down
31 changes: 28 additions & 3 deletions hack/e2e/ecr.sh → hack/e2e/build-image.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# This script builds the EBS CSI Driver image for the e2e tests
# Environment variables have default values (see config.sh) but
# many can be overridden on demand if needed

set -euo pipefail

function ecr_build_and_push() {
BASE_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
BIN="${BASE_DIR}/../../bin"

source "${BASE_DIR}/config.sh"
source "${BASE_DIR}/util.sh"

function build_and_push() {
REGION=${1}
AWS_ACCOUNT_ID=${2}
IMAGE_NAME=${3}
Expand All @@ -25,7 +35,7 @@ function ecr_build_and_push() {

# https://docs.aws.amazon.com/AmazonECR/latest/userguide/service-quotas.html
MAX_IMAGES=10000
IMAGE_COUNT=$(aws ecr list-images --repository-name ${IMAGE_NAME} --region ${REGION} --query 'length(imageIds[])')
IMAGE_COUNT=$(aws ecr list-images --repository-name "${IMAGE_NAME##*/}" --region "${REGION}" --query 'length(imageIds[])')

if [ $IMAGE_COUNT -ge $MAX_IMAGES ]; then
loudecho "Repository image limit reached. Unable to push new images."
Expand All @@ -38,7 +48,9 @@ function ecr_build_and_push() {
if [ -n "${PROW_JOB_ID:-}" ]; then
trap "docker buildx rm ebs-csi-multiarch-builder" EXIT
docker buildx create --driver-opt=image=moby/buildkit:v0.12.5 --bootstrap --use --name ebs-csi-multiarch-builder
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
# Ignore failures: Sometimes, this fails if run in parallel across multiple jobs
# If it fails "for real" the build later will fail, so it is safe to proceed
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes || true
fi

export IMAGE="${IMAGE_NAME}"
Expand All @@ -54,3 +66,16 @@ function ecr_build_and_push() {
fi
make -j $(nproc) all-push
}

if [[ "${CREATE_MISSING_ECR_REPO}" == true ]]; then
REPO_CHECK=$(aws ecr describe-repositories --region "${AWS_REGION}")
if [ $(jq ".repositories | map(.repositoryName) | index(\"${IMAGE_NAME##*/}\")" <<<"${REPO_CHECK}") == "null" ]; then
aws ecr create-repository --region "${AWS_REGION}" --repository-name aws-ebs-csi-driver >/dev/null
fi
fi

build_and_push "${AWS_REGION}" \
"${AWS_ACCOUNT_ID}" \
"${IMAGE_NAME}" \
"${IMAGE_TAG}" \
"${IMAGE_ARCH}"
4 changes: 1 addition & 3 deletions hack/e2e/config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ FIRST_ZONE=$(echo "${ZONES}" | cut -d, -f1)
NODE_COUNT=${NODE_COUNT:-3}
INSTANCE_TYPE=${INSTANCE_TYPE:-c5.large}
WINDOWS=${WINDOWS:-"false"}
WINDOWS_HOSTPROCESS=${WINDOWS_HOSTPROCESS:-"false"}

# kops: must include patch version (e.g. 1.19.1)
# eksctl: mustn't include patch version (e.g. 1.19)
Expand Down Expand Up @@ -57,6 +58,3 @@ TEST_PATH=${TEST_PATH:-"./tests/e2e-kubernetes/..."}
GINKGO_FOCUS=${GINKGO_FOCUS:-"External.Storage"}
GINKGO_SKIP=${GINKGO_SKIP:-"\[Disruptive\]|\[Serial\]"}
GINKGO_PARALLEL=${GINKGO_PARALLEL:-25}

# TODO: Left in for now, but look into if this is still necessary and remove if not
EKSCTL_ADMIN_ROLE=${EKSCTL_ADMIN_ROLE:-"Infra-prod-KopsDeleteAllLambdaServiceRoleF1578477-1ELDFIB4KCMXV"}
11 changes: 8 additions & 3 deletions hack/e2e/create-cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,21 @@ elif [[ "${CLUSTER_TYPE}" == "eksctl" ]]; then
eksctl_create_cluster \
"$CLUSTER_NAME" \
"${BIN}/eksctl" \
"${BIN}/gomplate" \
"$AWS_REGION" \
"$ZONES" \
"$INSTANCE_TYPE" \
"$K8S_VERSION_EKSCTL" \
"$CLUSTER_FILE" \
"$KUBECONFIG" \
"${BASE_DIR}/eksctl/patch.yaml" \
"$EKSCTL_ADMIN_ROLE" \
"$WINDOWS" \
"${BASE_DIR}/eksctl/vpc-resource-controller-configmap.yaml"
"${BASE_DIR}/eksctl/vpc-resource-controller-configmap.yaml" \
"${BASE_DIR}/eksctl/cluster.yaml"
else
echo "Cluster type ${CLUSTER_TYPE} is invalid, must be kops or eksctl" >&2
exit 1
fi

if [[ "$WINDOWS" == true ]]; then
kubectl apply --kubeconfig "${KUBECONFIG}" -f "${BASE_DIR}/eksctl/vpc-resource-controller-configmap.yaml"
fi
33 changes: 33 additions & 0 deletions hack/e2e/eksctl/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: {{ .Env.CLUSTER_NAME }}
region: {{ .Env.REGION }}
version: "{{ .Env.K8S_VERSION }}"
availabilityZones: [{{ .Env.ZONES }}]
iam:
vpcResourceControllerPolicy: true
withOIDC: true
serviceAccounts:
- metadata:
name: ebs-csi-controller-sa
namespace: kube-system
wellKnownPolicies:
ebsCSIController: true
managedNodeGroups:
- name: ng-linux
amiFamily: AmazonLinux2
desiredCapacity: 3
disablePodIMDS: true
instanceTypes: [{{ .Env.INSTANCE_TYPE }}]
ssh:
allow: false
{{- if eq .Env.WINDOWS "true" }}
- name: ng-windows
amiFamily: WindowsServer2022CoreContainer
desiredCapacity: 3
disablePodIMDS: true
instanceTypes: [m5.2xlarge]
ssh:
allow: false
{{- end }}
86 changes: 19 additions & 67 deletions hack/e2e/eksctl/eksctl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,68 +22,40 @@ set -euo pipefail
function eksctl_create_cluster() {
CLUSTER_NAME=${1}
EKSCTL_BIN=${2}
ZONES=${3}
INSTANCE_TYPE=${4}
K8S_VERSION=${5}
CLUSTER_FILE=${6}
KUBECONFIG=${7}
EKSCTL_PATCH_FILE=${8}
EKSCTL_ADMIN_ROLE=${9}
GOMPLATE_BIN=${3}
REGION=${4}
ZONES=${5}
INSTANCE_TYPE=${6}
K8S_VERSION=${7}
CLUSTER_FILE=${8}
KUBECONFIG=${9}
WINDOWS=${10}
VPC_CONFIGMAP_FILE=${11}
TEMPLATE_FILE=${12}

CLUSTER_NAME="${CLUSTER_NAME//./-}"

loudecho "Templating $CLUSTER_NAME to $CLUSTER_FILE"
CLUSTER_NAME="${CLUSTER_NAME}" \
REGION="${REGION}" \
K8S_VERSION="${K8S_VERSION}" \
ZONES="${ZONES}" \
INSTANCE_TYPE="${INSTANCE_TYPE}" \
WINDOWS="${WINDOWS}" \
${GOMPLATE_BIN} -f "${TEMPLATE_FILE}" -o "${CLUSTER_FILE}"

if eksctl_cluster_exists "${CLUSTER_NAME}" "${EKSCTL_BIN}"; then
loudecho "Upgrading cluster $CLUSTER_NAME with $CLUSTER_FILE"
${EKSCTL_BIN} upgrade cluster -f "${CLUSTER_FILE}"
else
loudecho "Creating cluster $CLUSTER_NAME with $CLUSTER_FILE (dry run)"
${EKSCTL_BIN} create cluster \
--managed \
--ssh-access=false \
--zones "${ZONES}" \
--nodes=3 \
--instance-types="${INSTANCE_TYPE}" \
--version="${K8S_VERSION}" \
--disable-pod-imds \
--dry-run \
"${CLUSTER_NAME}" >"${CLUSTER_FILE}"

if test -f "$EKSCTL_PATCH_FILE"; then
eksctl_patch_cluster_file "$CLUSTER_FILE" "$EKSCTL_PATCH_FILE"
fi

loudecho "Creating cluster $CLUSTER_NAME with $CLUSTER_FILE"
${EKSCTL_BIN} create cluster -f "${CLUSTER_FILE}" --kubeconfig "${KUBECONFIG}"
fi

loudecho "Cluster ${CLUSTER_NAME} kubecfg written to ${KUBECONFIG}"
loudecho "Getting cluster ${CLUSTER_NAME}"
${EKSCTL_BIN} get cluster "${CLUSTER_NAME}"

if [[ -n "$EKSCTL_ADMIN_ROLE" ]]; then
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
ADMIN_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:role/${EKSCTL_ADMIN_ROLE}"
loudecho "Granting ${ADMIN_ARN} admin access to the cluster"
${EKSCTL_BIN} create iamidentitymapping --cluster "${CLUSTER_NAME}" --arn "${ADMIN_ARN}" --group system:masters --username admin
fi

if [[ "$WINDOWS" == true ]]; then
${EKSCTL_BIN} create nodegroup \
--managed=true \
--ssh-access=false \
--cluster="${CLUSTER_NAME}" \
--node-ami-family=WindowsServer2022CoreContainer \
--instance-types=m5.2xlarge \
-n ng-windows \
-m 3 \
-M 3

kubectl apply --kubeconfig "${KUBECONFIG}" -f "$VPC_CONFIGMAP_FILE"
loudecho "Applying VPC ConfigMap (Windows only)"
kubectl apply --kubeconfig "${KUBECONFIG}" -f "${VPC_CONFIGMAP_FILE}"
fi

return $?
}

function eksctl_cluster_exists() {
Expand All @@ -108,23 +80,3 @@ function eksctl_delete_cluster() {
loudecho "Deleting cluster ${CLUSTER_NAME}"
${EKSCTL_BIN} delete cluster "${CLUSTER_NAME}"
}

function eksctl_patch_cluster_file() {
CLUSTER_FILE=${1} # input must be yaml
EKSCTL_PATCH_FILE=${2} # input must be yaml

loudecho "Patching cluster $CLUSTER_NAME with $EKSCTL_PATCH_FILE"

# Temporary intermediate files for patching
CLUSTER_FILE_0=$CLUSTER_FILE.0
CLUSTER_FILE_1=$CLUSTER_FILE.1

cp "$CLUSTER_FILE" "$CLUSTER_FILE_0"

# Patch only the Cluster
kubectl patch --kubeconfig "/dev/null" -f "$CLUSTER_FILE_0" --local --type merge --patch "$(cat "$EKSCTL_PATCH_FILE")" -o yaml >"$CLUSTER_FILE_1"
mv "$CLUSTER_FILE_1" "$CLUSTER_FILE_0"

# Done patching, overwrite original CLUSTER_FILE
mv "$CLUSTER_FILE_0" "$CLUSTER_FILE" # output is yaml
}
9 changes: 0 additions & 9 deletions hack/e2e/eksctl/patch.yaml

This file was deleted.

27 changes: 27 additions & 0 deletions hack/e2e/kubeconfig.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

# Copyright 2023 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script echos the KUBECONFIG back to the caller
# CLUSTER_NAME and CLUSTER_TYPE are expected to be specified by the caller

set -euo pipefail

BASE_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
KUBECONFIG="${BASE_DIR}/csi-test-artifacts/${CLUSTER_NAME}.${CLUSTER_TYPE}.kubeconfig"

echo "# Makefiles cannot export environment variables directly"
echo "# Run eval \"\$(make cluster/kubeconfig)\""
echo "export KUBECONFIG=\"${KUBECONFIG}\""
Loading

0 comments on commit 09ce01b

Please sign in to comment.