Skip to content

Commit

Permalink
virtual cluster e2e tests changes
Browse files Browse the repository at this point in the history
Signed-off-by: Sebastian Sch <[email protected]>
  • Loading branch information
SchSeba committed Sep 10, 2023
1 parent 14c6765 commit ebb3e9f
Show file tree
Hide file tree
Showing 12 changed files with 431 additions and 13 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/virtual-cluster.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: sriov-sriov-k8s-test
on: [pull_request]
jobs:
run-sriov-tests-on-virtual-k8s-cluster:
name: test
runs-on: [sriov]
steps:
- name: Check out code into the Go module directory
uses: actions/checkout@v2
- name: run test
run: ./hack/run-e2e-conformance-virtual-cluster.sh
1 change: 1 addition & 0 deletions deploy/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ data:
Intel_i40e_25G_SFP28: "8086 158b 154c"
Intel_i40e_10G_X710_SFP: "8086 1572 154c"
Intel_ixgbe_10G_X550: "8086 1563 1565"
Intel_ixgbe_82576: "8086 10c9 10ca"
Intel_i40e_X710_X557_AT_10G: "8086 1589 154c"
Intel_i40e_10G_X710_BACKPLANE: "8086 1581 154c"
Intel_i40e_10G_X710_BASE_T: "8086 15ff 154c"
Expand Down
1 change: 1 addition & 0 deletions deployment/sriov-network-operator/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ data:
Intel_i40e_25G_SFP28: "8086 158b 154c"
Intel_i40e_10G_X710_SFP: "8086 1572 154c"
Intel_ixgbe_10G_X550: "8086 1563 1565"
Intel_ixgbe_82576: "8086 10c9 10ca"
Intel_i40e_X710_X557_AT_10G: "8086 1589 154c"
Intel_i40e_10G_X710_BACKPLANE: "8086 1581 154c"
Intel_i40e_10G_X710_BASE_T: "8086 15ff 154c"
Expand Down
33 changes: 33 additions & 0 deletions doc/testing-virtual-machine.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## E2E conformance test

It's possible to use QEMU to test the SR-IOV operator on a virtual kubernetes/openshift cluster.
Using the IGB model network driver allow to create virtual functions on the virtual system

## How to test

First you will need to enable the `DEV_MODE` via the operator environment variable.
Second step is to add the intel virtual nic to the supported nics configmap.

```
Intel_ixgbe_82576: 8086 10c9 10ca
```

Another requirement is to load the vfio kernel module with no_iommu configuration. Example systemd:

```
[Unit]
Description=vfio no-iommu
Before=kubelet.service crio.service node-valid-hostname.service
[Service]
# Need oneshot to delay kubelet
Type=oneshot
ExecStart=/usr/bin/bash -c "modprobe vfio enable_unsafe_noiommu_mode=1"
StandardOutput=journal+console
StandardError=journal+console
[Install]
WantedBy=network-online.target
```

# TBD
331 changes: 331 additions & 0 deletions hack/run-e2e-conformance-virtual-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,331 @@
#!/usr/bin/env bash
set -xeo pipefail

cluster_name=${CLUSTER_NAME:-virtual}
domain_name=$cluster_name.lab

api_ip=${API_IP:-192.168.122.250}
virtual_router_id=${VIRTUAL_ROUTER_ID:-250}
HOME="/root"

here="$(dirname "$(readlink --canonicalize "${BASH_SOURCE[0]}")")"
root="$(readlink --canonicalize "$here/..")"

check_requirements() {
for cmd in kcli virsh podman make go; do
if ! command -v "$cmd" &> /dev/null; then
echo "$cmd is not available"
exit 1
fi
done
return 0
}

echo "## checking requirements"
check_requirements
echo "## delete existing cluster name $cluster_name"
kcli delete cluster $cluster_name -y

cat <<EOF > ./${cluster_name}-plan.yaml
ctlplane_memory: 4096
worker_memory: 4096
pool: default
disk_size: 50
network: default
api_ip: $api_ip
virtual_router_id: $virtual_router_id
domain: $domain_name
ctlplanes: 1
workers: 2
ingress: false
machine: q35
engine: crio
sdn: flannel
autolabeller: false
vmrules:
- $cluster_name-worker-.*:
nets:
- name: default
type: igb
vfio: true
noconf: true
numa: 0
- name: default
type: igb
vfio: true
noconf: true
numa: 1
numcpus: 6
numa:
- id: 0
vcpus: 0,2,4
memory: 2048
- id: 1
vcpus: 1,3,5
memory: 2048
EOF

kcli create cluster generic --paramfile ./${cluster_name}-plan.yaml $cluster_name

export KUBECONFIG=$HOME/.kcli/clusters/$cluster_name/auth/kubeconfig
export PATH=$PWD:$PATH

ATTEMPTS=0
MAX_ATTEMPTS=72
ready=false
sleep_time=10

until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ]
do
echo "waiting for cluster to be ready"
if [ `kubectl get node | grep Ready | wc -l` == 3 ]; then
echo "cluster is ready"
ready=true
else
echo "cluster is not ready yet"
sleep $sleep_time
fi
ATTEMPTS=$((ATTEMPTS+1))
done

if ! $ready; then
echo "Timed out waiting for cluster to be ready"
kubectl get nodes
exit 1
fi

echo "## label cluster workers as sriov capable"
kubectl label node $cluster_name-worker-0.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite
kubectl label node $cluster_name-worker-1.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite

echo "## label cluster worker as worker"
kubectl label node $cluster_name-worker-0.$domain_name node-role.kubernetes.io/worker= --overwrite
kubectl label node $cluster_name-worker-1.$domain_name node-role.kubernetes.io/worker= --overwrite

controller_ip=`kubectl get node -o wide | grep ctlp | awk '{print $6}'`
insecure_registry="[[registry]]
location = \"$controller_ip:5000\"
insecure = true
"

cat << EOF > /etc/containers/registries.conf.d/003-${cluster_name}.conf
$insecure_registry
EOF

kcli ssh $cluster_name-ctlplane-0 << EOF
sudo su
echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf
systemctl restart crio
EOF

kcli ssh $cluster_name-worker-0 << EOF
sudo su
echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf
systemctl restart crio
EOF

kcli ssh $cluster_name-worker-1 << EOF
sudo su
echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf
systemctl restart crio
EOF

kubectl create namespace container-registry

echo "## deploy internal registry"
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolume
metadata:
name: registry-pv
spec:
capacity:
storage: 60Gi
volumeMode: Filesystem
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Delete
storageClassName: registry-local-storage
local:
path: /mnt/
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- ${cluster_name}-ctlplane-0.${domain_name}
EOF

cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: registry-pv-claim
namespace: container-registry
spec:
accessModes:
- ReadWriteOnce
volumeMode: Filesystem
resources:
requests:
storage: 60Gi
storageClassName: registry-local-storage
EOF

cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: registry
namespace: container-registry
spec:
replicas: 1
selector:
matchLabels:
app: registry
template:
metadata:
labels:
app: registry
spec:
hostNetwork: true
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
containers:
- image: docker.io/registry:latest
imagePullPolicy: Always
name: registry
volumeMounts:
- name: data
mountPath: /var/lib/registry
volumes:
- name: data
persistentVolumeClaim:
claimName: registry-pv-claim
terminationGracePeriodSeconds: 10
EOF


export SRIOV_NETWORK_OPERATOR_IMAGE="$controller_ip:5000/sriov-network-operator"
export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$controller_ip:5000/sriov-network-config-daemon"

echo "## build operator image"
podman build -t "${SRIOV_NETWORK_OPERATOR_IMAGE}:latest" -f "${root}/Dockerfile" "${root}"

echo "## build daemon image"
podman build -t "${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}:latest" -f "${root}/Dockerfile.sriov-network-config-daemon" "${root}"

podman push --tls-verify=false "${SRIOV_NETWORK_OPERATOR_IMAGE}:latest"
podman push --tls-verify=false "${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}:latest"

# remove the crio bridge and let flannel to recreate
kcli ssh $cluster_name-ctlplane-0 << EOF
sudo su
if [ $(ip a | grep 10.85.0 | wc -l) -eq 0 ]; then ip link del cni0; fi
EOF


kubectl -n kube-system get po | grep multus | awk '{print "kubectl -n kube-system delete po",$1}' | sh
kubectl -n kube-system get po | grep coredns | awk '{print "kubectl -n kube-system delete po",$1}' | sh

TIMEOUT=400
echo "## wait for coredns"
kubectl -n kube-system wait --for=condition=available deploy/coredns --timeout=${TIMEOUT}s
echo "## wait for multus"
kubectl -n kube-system wait --for=condition=ready -l name=multus pod --timeout=${TIMEOUT}s

echo "## deploy cert manager"
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.12.0/cert-manager.yaml

echo "## wait for cert manager to be ready"

ATTEMPTS=0
MAX_ATTEMPTS=72
ready=false
sleep_time=5

until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ]
do
echo "waiting for cert manager webhook to be ready"
if [ `kubectl -n cert-manager get po | grep webhook | grep "1/1" | wc -l` == 1 ]; then
echo "cluster is ready"
ready=true
else
echo "cert manager webhook is not ready yet"
sleep $sleep_time
fi
ATTEMPTS=$((ATTEMPTS+1))
done


export ENABLE_ADMISSION_CONTROLLER=true
export SKIP_VAR_SET=""
export NAMESPACE="sriov-network-operator"
export OPERATOR_NAMESPACE="sriov-network-operator"
export CNI_BIN_PATH=/opt/cni/bin
export OPERATOR_EXEC=kubectl
export CLUSTER_TYPE=kubernetes
export DEV_MODE=TRUE
export VIRTUAL_ENV=TRUE

echo "## deploy namespace"
envsubst< $root/deploy/namespace.yaml | ${OPERATOR_EXEC} apply -f -

echo "## create certificates for webhook"
cat <<EOF | kubectl apply -f -
---
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: selfsigned-issuer
namespace: ${NAMESPACE}
spec:
selfSigned: {}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: network-resources-injector-secret
namespace: ${NAMESPACE}
spec:
commonName: network-resources-injector-service.svc
dnsNames:
- network-resources-injector-service.${NAMESPACE}.svc.cluster.local
- network-resources-injector-service.${NAMESPACE}.svc
issuerRef:
kind: Issuer
name: selfsigned-issuer
secretName: network-resources-injector-secret
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: operator-webhook-service
namespace: ${NAMESPACE}
spec:
commonName: operator-webhook-service.svc
dnsNames:
- operator-webhook-service.${NAMESPACE}.svc.cluster.local
- operator-webhook-service.${NAMESPACE}.svc
issuerRef:
kind: Issuer
name: selfsigned-issuer
secretName: operator-webhook-service
EOF


echo "## apply CRDs"
kubectl apply -k $root/config/crd

echo "## deploying SRIOV Network Operator"
hack/deploy-setup.sh $NAMESPACE

echo "## wait for sriov operator to be ready"
hack/deploy-wait.sh

echo "## run sriov e2e conformance tests"
SUITE=./test/conformance hack/run-e2e-conformance.sh
Loading

0 comments on commit ebb3e9f

Please sign in to comment.