From db23ea31de84df33479216e31478df90555fdad5 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Wed, 14 Jun 2023 14:09:31 +0300 Subject: [PATCH] virtual cluster e2e tests changes Signed-off-by: Sebastian Sch --- .github/workflows/virtual-cluster.yml | 18 + deploy/configmap.yaml | 1 + .../templates/configmap.yaml | 1 + doc/testing-virtual-machine.md | 33 ++ hack/run-e2e-conformance-virtual-cluster.sh | 340 ++++++++++++++++++ hack/run-e2e-conformance.sh | 4 +- hack/test.sh | 0 test/conformance/tests/init.go | 6 + test/conformance/tests/test_sriov_operator.go | 47 ++- test/util/cluster/cluster.go | 12 +- test/util/network/network.go | 5 + test/util/pod/pod.go | 5 + 12 files changed, 453 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/virtual-cluster.yml create mode 100644 doc/testing-virtual-machine.md create mode 100755 hack/run-e2e-conformance-virtual-cluster.sh create mode 100755 hack/test.sh diff --git a/.github/workflows/virtual-cluster.yml b/.github/workflows/virtual-cluster.yml new file mode 100644 index 000000000..78cdf19e5 --- /dev/null +++ b/.github/workflows/virtual-cluster.yml @@ -0,0 +1,18 @@ +name: sriov-sriov-k8s-test +on: [pull_request] + +jobs: + run-sriov-tests-on-virtual-k8s-cluster: + name: test + runs-on: [sriov] + steps: + - name: Check out code into the Go module directory + uses: actions/checkout@v2 + + - name: Set up Go 1.20 + uses: actions/setup-go@v3 + with: + go-version: 1.20.x + + - name: run test + run: ./hack/run-e2e-conformance-virtual-cluster.sh \ No newline at end of file diff --git a/deploy/configmap.yaml b/deploy/configmap.yaml index 9d7cf8596..41ce4c809 100644 --- a/deploy/configmap.yaml +++ b/deploy/configmap.yaml @@ -7,6 +7,7 @@ data: Intel_i40e_25G_SFP28: "8086 158b 154c" Intel_i40e_10G_X710_SFP: "8086 1572 154c" Intel_ixgbe_10G_X550: "8086 1563 1565" + Intel_ixgbe_82576: "8086 10c9 10ca" Intel_i40e_X710_X557_AT_10G: "8086 1589 154c" Intel_i40e_10G_X710_BACKPLANE: "8086 1581 154c" Intel_i40e_10G_X710_BASE_T: "8086 15ff 154c" diff --git a/deployment/sriov-network-operator/templates/configmap.yaml b/deployment/sriov-network-operator/templates/configmap.yaml index 9d7cf8596..41ce4c809 100644 --- a/deployment/sriov-network-operator/templates/configmap.yaml +++ b/deployment/sriov-network-operator/templates/configmap.yaml @@ -7,6 +7,7 @@ data: Intel_i40e_25G_SFP28: "8086 158b 154c" Intel_i40e_10G_X710_SFP: "8086 1572 154c" Intel_ixgbe_10G_X550: "8086 1563 1565" + Intel_ixgbe_82576: "8086 10c9 10ca" Intel_i40e_X710_X557_AT_10G: "8086 1589 154c" Intel_i40e_10G_X710_BACKPLANE: "8086 1581 154c" Intel_i40e_10G_X710_BASE_T: "8086 15ff 154c" diff --git a/doc/testing-virtual-machine.md b/doc/testing-virtual-machine.md new file mode 100644 index 000000000..0baccc26c --- /dev/null +++ b/doc/testing-virtual-machine.md @@ -0,0 +1,33 @@ +## E2E conformance test + +It's possible to use QEMU to test the SR-IOV operator on a virtual kubernetes/openshift cluster. +Using the IGB model network driver allow to create virtual functions on the virtual system + +## How to test + +First you will need to enable the `DEV_MODE` via the operator environment variable. +Second step is to add the intel virtual nic to the supported nics configmap. + +``` +Intel_ixgbe_82576: 8086 10c9 10ca +``` + +Another requirement is to load the vfio kernel module with no_iommu configuration. Example systemd: + +``` +[Unit] +Description=vfio no-iommu +Before=kubelet.service crio.service node-valid-hostname.service + +[Service] +# Need oneshot to delay kubelet +Type=oneshot +ExecStart=/usr/bin/bash -c "modprobe vfio enable_unsafe_noiommu_mode=1" +StandardOutput=journal+console +StandardError=journal+console + +[Install] +WantedBy=network-online.target +``` + +# TBD \ No newline at end of file diff --git a/hack/run-e2e-conformance-virtual-cluster.sh b/hack/run-e2e-conformance-virtual-cluster.sh new file mode 100755 index 000000000..522613fae --- /dev/null +++ b/hack/run-e2e-conformance-virtual-cluster.sh @@ -0,0 +1,340 @@ +#!/usr/bin/env bash +set -xeo pipefail + +cluster_name=${CLUSTER_NAME:-virtual} +domain_name=$cluster_name.lab + +api_ip=${API_IP:-192.168.122.250} +virtual_router_id=${VIRTUAL_ROUTER_ID:-250} +HOME="/root" + +here="$(dirname "$(readlink --canonicalize "${BASH_SOURCE[0]}")")" +root="$(readlink --canonicalize "$here/..")" + +check_requirements() { + for cmd in kcli virsh podman make go; do + if ! command -v "$cmd" &> /dev/null; then + echo "$cmd is not available" + exit 1 + fi + done + return 0 +} + +echo "## checking requirements" +check_requirements +echo "## delete existing cluster name $cluster_name" +kcli delete cluster $cluster_name -y +kcli delete network $cluster_name -y + +function cleanup { + kcli delete cluster $cluster_name -y + kcli delete network $cluster_name -y +} +trap cleanup EXIT + +kcli create network -c 192.168.${virtual_router_id}.0/24 --nodhcp -i $cluster_name + +cat < ./${cluster_name}-plan.yaml +ctlplane_memory: 4096 +worker_memory: 4096 +pool: default +disk_size: 50 +network: default +api_ip: $api_ip +virtual_router_id: $virtual_router_id +domain: $domain_name +ctlplanes: 1 +workers: 2 +ingress: false +machine: q35 +engine: crio +sdn: flannel +autolabeller: false +vmrules: + - $cluster_name-worker-.*: + nets: + - name: default + type: igb + vfio: true + noconf: true + numa: 0 + - name: $cluster_name + type: igb + vfio: true + noconf: true + numa: 1 + numcpus: 6 + numa: + - id: 0 + vcpus: 0,2,4 + memory: 2048 + - id: 1 + vcpus: 1,3,5 + memory: 2048 + +EOF + +kcli create cluster generic --paramfile ./${cluster_name}-plan.yaml $cluster_name + +export KUBECONFIG=$HOME/.kcli/clusters/$cluster_name/auth/kubeconfig +export PATH=$PWD:$PATH + +ATTEMPTS=0 +MAX_ATTEMPTS=72 +ready=false +sleep_time=10 + +until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ] +do + echo "waiting for cluster to be ready" + if [ `kubectl get node | grep Ready | wc -l` == 3 ]; then + echo "cluster is ready" + ready=true + else + echo "cluster is not ready yet" + sleep $sleep_time + fi + ATTEMPTS=$((ATTEMPTS+1)) +done + +if ! $ready; then + echo "Timed out waiting for cluster to be ready" + kubectl get nodes + exit 1 +fi + +echo "## label cluster workers as sriov capable" +kubectl label node $cluster_name-worker-0.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite +kubectl label node $cluster_name-worker-1.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite + +echo "## label cluster worker as worker" +kubectl label node $cluster_name-worker-0.$domain_name node-role.kubernetes.io/worker= --overwrite +kubectl label node $cluster_name-worker-1.$domain_name node-role.kubernetes.io/worker= --overwrite + +controller_ip=`kubectl get node -o wide | grep ctlp | awk '{print $6}'` +insecure_registry="[[registry]] +location = \"$controller_ip:5000\" +insecure = true +" + +cat << EOF > /etc/containers/registries.conf.d/003-${cluster_name}.conf +$insecure_registry +EOF + +kcli ssh $cluster_name-ctlplane-0 << EOF +sudo su +echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf +systemctl restart crio +EOF + +kcli ssh $cluster_name-worker-0 << EOF +sudo su +echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf +systemctl restart crio +EOF + +kcli ssh $cluster_name-worker-1 << EOF +sudo su +echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf +systemctl restart crio +EOF + +kubectl create namespace container-registry + +echo "## deploy internal registry" +cat <