Skip to content

Conformance Cluster Mesh (ci-clustermesh) #2085

Conformance Cluster Mesh (ci-clustermesh)

Conformance Cluster Mesh (ci-clustermesh) #2085

name: Conformance Cluster Mesh (ci-clustermesh)
# Any change in triggers needs to be reflected in the concurrency group.
on:
workflow_dispatch:
inputs:
PR-number:
description: "Pull request number."
required: true
context-ref:
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
required: true
SHA:
description: "SHA under test (head of the PR branch)."
required: true
extra-args:
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
required: false
default: '{}'
# Run every 6 hours
schedule:
- cron: '0 3/6 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# To be able to set commit status
statuses: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
# renovate: datasource=github-releases depName=kubernetes-sigs/kind
kind_version: v0.20.0
# renovate: datasource=docker depName=kindest/node
k8s_version: v1.27.3
# renovate: datasource=github-releases depName=cilium/cilium-cli
cilium_cli_version: v0.15.8
cilium_cli_ci_version:
CILIUM_CLI_MODE: helm
clusterName1: cluster1-${{ github.run_id }}
clusterName2: cluster2-${{ github.run_id }}
contextName1: kind-cluster1-${{ github.run_id }}
contextName2: kind-cluster2-${{ github.run_id }}
check_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
jobs:
commit-status-start:
name: Commmit Status Start
runs-on: ubuntu-latest
steps:
- name: Set initial commit status
uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0
with:
sha: ${{ inputs.SHA || github.sha }}
installation-and-connectivity:
name: Installation and Connectivity Test
runs-on: ubuntu-latest
timeout-minutes: 60
env:
job_name: "Installation and Connectivity Test"
strategy:
fail-fast: false
matrix:
include:
- name: '1'
tunnel: 'disabled'
ipfamily: 'ipv4'
encryption: 'disabled'
kube-proxy: 'iptables'
kvstoremesh: true
cm-auth-mode-1: 'legacy'
cm-auth-mode-2: 'legacy'
- name: '2'
tunnel: 'disabled'
ipfamily: 'ipv4'
encryption: 'wireguard'
kube-proxy: 'none'
kvstoremesh: false
cm-auth-mode-1: 'migration'
cm-auth-mode-2: 'migration'
# IPsec encryption cannot be used with BPF NodePort.
- name: '3'
tunnel: 'disabled'
ipfamily: 'ipv4'
encryption: 'ipsec'
kube-proxy: 'iptables'
kvstoremesh: true
cm-auth-mode-1: 'cluster'
cm-auth-mode-2: 'cluster'
# IPsec encryption is currently not supported in case of ipv6-only clusters (#23553)
# Wireguard encryption is currently affected by a bug in case of ipv6-only clusters (#23917)
- name: '4'
tunnel: 'disabled'
ipfamily: 'ipv6'
encryption: 'disabled'
kube-proxy: 'none'
kvstoremesh: false
cm-auth-mode-1: 'legacy'
cm-auth-mode-2: 'migration'
# IPsec encryption cannot be used with BPF NodePort.
- name: '5'
tunnel: 'disabled'
ipfamily: 'dual'
encryption: 'ipsec'
kube-proxy: 'iptables'
kvstoremesh: true
cm-auth-mode-1: 'migration'
cm-auth-mode-2: 'cluster'
- name: '6'
tunnel: 'vxlan'
ipfamily: 'ipv4'
encryption: 'disabled'
kube-proxy: 'none'
kvstoremesh: false
cm-auth-mode-1: 'cluster'
cm-auth-mode-2: 'cluster'
- name: '7'
tunnel: 'vxlan'
ipfamily: 'ipv4'
encryption: 'wireguard'
kube-proxy: 'iptables'
kvstoremesh: true
cm-auth-mode-1: 'cluster'
cm-auth-mode-2: 'cluster'
# IPsec encryption cannot be used with BPF NodePort.
- name: '8'
tunnel: 'vxlan'
ipfamily: 'ipv4'
encryption: 'ipsec'
kube-proxy: 'iptables'
kvstoremesh: false
cm-auth-mode-1: 'cluster'
cm-auth-mode-2: 'cluster'
# Tunneling is currently not supported in case of ipv6-only clusters (#17240)
# - name: '9'
# tunnel: 'vxlan'
# ipfamily: 'ipv6'
# encryption: 'disabled'
# kube-proxy: 'none'
# kvstoremesh: true
# cm-auth-mode-1: 'cluster'
# cm-auth-mode-2: 'cluster'
- name: '10'
tunnel: 'vxlan'
ipfamily: 'dual'
encryption: 'wireguard'
kube-proxy: 'iptables'
kvstoremesh: false
cm-auth-mode-1: 'cluster'
cm-auth-mode-2: 'cluster'
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Set up job variables for GHA environment
id: vars
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
SHA="${{ inputs.SHA }}"
else
SHA="${{ github.sha }}"
fi
# bpf.masquerade is disabled due to #23283
CILIUM_INSTALL_DEFAULTS="--chart-directory=install/kubernetes/cilium \
--helm-set=debug.enabled=true \
--helm-set=image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/cilium-ci \
--helm-set=image.useDigest=false \
--helm-set=image.tag=${SHA} \
--helm-set=operator.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/operator \
--helm-set=operator.image.suffix=-ci \
--helm-set=operator.image.tag=${SHA} \
--helm-set=operator.image.useDigest=false \
--helm-set=bpf.masquerade=false \
--helm-set=bpf.monitorAggregation=none \
--helm-set=hubble.enabled=true \
--helm-set=hubble.relay.enabled=true \
--helm-set=hubble.relay.image.override=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/hubble-relay-ci:${SHA} \
--helm-set=hubble.relay.image.useDigest=false \
--helm-set=clustermesh.useAPIServer=true \
--helm-set=clustermesh.apiserver.kvstoremesh.enabled=${{ matrix.kvstoremesh }} \
--helm-set=clustermesh.apiserver.image.override=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/clustermesh-apiserver-ci:${SHA} \
--helm-set=clustermesh.apiserver.image.useDigest=false \
--helm-set=clustermesh.apiserver.kvstoremesh.image.override=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/kvstoremesh-ci:${SHA} \
--helm-set=clustermesh.apiserver.kvstoremesh.image.useDigest=false \
"
CILIUM_INSTALL_TUNNEL="--helm-set=tunnel=vxlan"
if [ "${{ matrix.tunnel }}" == "disabled" ]; then
CILIUM_INSTALL_TUNNEL="--helm-set-string=routingMode=native \
--helm-set=autoDirectNodeRoutes=true \
--helm-set=ipv4NativeRoutingCIDR=10.240.0.0/12 \
--helm-set=ipv6NativeRoutingCIDR=fd00:10:240::/44"
fi
case "${{ matrix.ipFamily }}" in
ipv4)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=true --helm-set=ipv6.enabled=false"
KIND_POD_CIDR_1="10.242.0.0/16"
KIND_SVC_CIDR_1="10.243.0.0/16"
KIND_POD_CIDR_2="10.244.0.0/16"
KIND_SVC_CIDR_2="10.245.0.0/16"
;;
ipv6)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=false --helm-set=ipv6.enabled=true"
KIND_POD_CIDR_1="fd00:10:242::/48"
KIND_SVC_CIDR_1="fd00:10:243::/112"
KIND_POD_CIDR_2="fd00:10:244::/48"
KIND_SVC_CIDR_2="fd00:10:245::/112"
;;
dual)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=true --helm-set=ipv6.enabled=true"
KIND_POD_CIDR_1="10.242.0.0/16,fd00:10:242::/48"
KIND_SVC_CIDR_1="10.243.0.0/16,fd00:10:243::/112"
KIND_POD_CIDR_2="10.244.0.0/16,fd00:10:244::/48"
KIND_SVC_CIDR_2="10.245.0.0/16,fd00:10:245::/112"
;;
*)
echo "Unknown IP family '${{ matrix.ipFamily }}'" && false
;;
esac
CILIUM_INSTALL_ENCRYPTION=""
if [ "${{ matrix.encryption }}" != "disabled" ]; then
CILIUM_INSTALL_ENCRYPTION="--helm-set=encryption.enabled=true \
--helm-set=encryption.type=${{ matrix.encryption }}"
fi
CONNECTIVITY_TEST_DEFAULTS="--hubble=false \
--flow-validation=disabled \
--multi-cluster=${{ env.contextName2 }} \
--external-target=google.com \
--collect-sysdump-on-failure"
# Skip external traffic (e.g. 1.1.1.1 and www.google.com) tests as IPv6 is not supported
# in GitHub runners: https://github.com/actions/runner-images/issues/668
if [[ "${{ matrix.ipFamily }}" == "ipv6" ]]; then
CONNECTIVITY_TEST_DEFAULTS="$CONNECTIVITY_TEST_DEFAULTS \
--test='!/pod-to-world' \
--test='!/pod-to-cidr'"
fi
echo cilium_install_defaults="${CILIUM_INSTALL_DEFAULTS} ${CILIUM_INSTALL_TUNNEL} \
${CILIUM_INSTALL_IPFAMILY} ${CILIUM_INSTALL_ENCRYPTION}" >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${SHA} >> $GITHUB_OUTPUT
echo kind_pod_cidr_1=${KIND_POD_CIDR_1} >> $GITHUB_OUTPUT
echo kind_svc_cidr_1=${KIND_SVC_CIDR_1} >> $GITHUB_OUTPUT
echo kind_pod_cidr_2=${KIND_POD_CIDR_2} >> $GITHUB_OUTPUT
echo kind_svc_cidr_2=${KIND_SVC_CIDR_2} >> $GITHUB_OUTPUT
- name: Install Cilium CLI
uses: cilium/cilium-cli@7f33713a0710a1fff76cfe1b7fd7fbaea2ce7977 # v0.15.8
with:
release-version: ${{ env.cilium_cli_version }}
ci-version: ${{ env.cilium_cli_ci_version }}
- name: Generate Kind configuration files
run: |
K8S_VERSION=${{ env.k8s_version }} \
PODCIDR=${{ steps.vars.outputs.kind_pod_cidr_1 }} \
SVCCIDR=${{ steps.vars.outputs.kind_svc_cidr_1 }} \
IPFAMILY=${{ matrix.ipFamily }} \
KUBEPROXYMODE=${{ matrix.kube-proxy }} \
envsubst < ./.github/kind-config.yaml.tmpl > ./.github/kind-config-cluster1.yaml
K8S_VERSION=${{ env.k8s_version }} \
PODCIDR=${{ steps.vars.outputs.kind_pod_cidr_2 }} \
SVCCIDR=${{ steps.vars.outputs.kind_svc_cidr_2 }} \
IPFAMILY=${{ matrix.ipFamily }} \
KUBEPROXYMODE=${{ matrix.kube-proxy }} \
envsubst < ./.github/kind-config.yaml.tmpl > ./.github/kind-config-cluster2.yaml
- name: Create Kind cluster 1
uses: helm/kind-action@dda0770415bac9fc20092cacbc54aa298604d140 # v1.8.0
with:
cluster_name: ${{ env.clusterName1 }}
version: ${{ env.kind_version }}
kubectl_version: ${{ env.k8s_version }}
config: ./.github/kind-config-cluster1.yaml
wait: 0 # The control-plane never becomes ready, since no CNI is present
- name: Create Kind cluster 2
uses: helm/kind-action@dda0770415bac9fc20092cacbc54aa298604d140 # v1.8.0
with:
cluster_name: ${{ env.clusterName2 }}
version: ${{ env.kind_version }}
kubectl_version: ${{ env.k8s_version }}
config: ./.github/kind-config-cluster2.yaml
wait: 0 # The control-plane never becomes ready, since no CNI is present
# Make sure that coredns uses IPv4-only upstream DNS servers also in case of clusters
# with IP family dual, since IPv6 ones are not reachable and cause spurious failures.
- name: Configure the coredns nameservers
if: matrix.ipfamily == 'dual'
run: |
COREDNS_PATCH="
spec:
template:
spec:
dnsPolicy: None
dnsConfig:
nameservers:
- 8.8.4.4
- 8.8.8.8
"
kubectl --context ${{ env.contextName1 }} patch deployment -n kube-system coredns --patch="$COREDNS_PATCH"
kubectl --context ${{ env.contextName2 }} patch deployment -n kube-system coredns --patch="$COREDNS_PATCH"
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-generic-ci hubble-relay-ci clustermesh-apiserver-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
# Warning: since this is a privileged workflow, subsequent workflow job
# steps must take care not to execute untrusted code.
- name: Checkout pull request branch (NOT TRUSTED)
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
- name: Create the IPSec secret in both clusters
if: matrix.encryption == 'ipsec'
run: |
SECRET="3 rfc4106(gcm(aes)) $(openssl rand -hex 20) 128"
kubectl --context ${{ env.contextName1 }} create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="${SECRET}"
kubectl --context ${{ env.contextName2 }} create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="${SECRET}"
- name: Install Cilium in cluster1
run: |
# Explicitly configure the NodePort to make sure that it is different in
# each cluster, to workaround #24692
cilium --context ${{ env.contextName1 }} install \
${{ steps.vars.outputs.cilium_install_defaults }} \
--helm-set cluster.name=${{ env.clusterName1 }} \
--helm-set cluster.id=1 \
--helm-set clustermesh.apiserver.service.nodePort=32379 \
--helm-set clustermesh.apiserver.tls.authMode=${{ matrix.cm-auth-mode-1 }}
- name: Copy the Cilium CA secret to cluster2, as they must match
run: |
kubectl --context ${{ env.contextName1 }} get secret -n kube-system cilium-ca -o yaml |
kubectl --context ${{ env.contextName2 }} create -f -
- name: Install Cilium in cluster2
run: |
# Explicitly configure the NodePort to make sure that it is different in
# each cluster, to workaround #24692
cilium --context ${{ env.contextName2 }} install \
${{ steps.vars.outputs.cilium_install_defaults }} \
--helm-set cluster.name=${{ env.clusterName2 }} \
--helm-set cluster.id=255 \
--helm-set clustermesh.apiserver.service.nodePort=32380 \
--helm-set clustermesh.apiserver.tls.authMode=${{ matrix.cm-auth-mode-2 }}
- name: Wait for cluster mesh status to be ready
run: |
cilium --context ${{ env.contextName1 }} status --wait
cilium --context ${{ env.contextName2 }} status --wait
cilium --context ${{ env.contextName1 }} clustermesh status --wait
cilium --context ${{ env.contextName2 }} clustermesh status --wait
- name: Connect clusters
run: |
cilium --context ${{ env.contextName1 }} clustermesh connect --destination-context ${{ env.contextName2 }}
- name: Wait for cluster mesh status to be ready
run: |
cilium --context ${{ env.contextName1 }} status --wait
cilium --context ${{ env.contextName2 }} status --wait
cilium --context ${{ env.contextName1 }} clustermesh status --wait
cilium --context ${{ env.contextName2 }} clustermesh status --wait
- name: Port forward Relay
run: |
cilium --context ${{ env.contextName1 }} hubble port-forward &
sleep 10s
[[ $(pgrep -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" | wc -l) == 2 ]]
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test (${{ join(matrix.*, ', ') }})
run: |
cilium --context ${{ env.contextName1 }} connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \
--junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"
- name: Post-test information gathering
if: ${{ !success() }}
run: |
cilium --context ${{ env.contextName1 }} status
cilium --context ${{ env.contextName1 }} clustermesh status
cilium --context ${{ env.contextName2 }} status
cilium --context ${{ env.contextName2 }} clustermesh status
kubectl config use-context ${{ env.contextName1 }}
kubectl get pods --all-namespaces -o wide
cilium sysdump --output-filename cilium-sysdump-context1-final-${{ join(matrix.*, '-') }}
kubectl config use-context ${{ env.contextName2 }}
kubectl get pods --all-namespaces -o wide
cilium sysdump --output-filename cilium-sysdump-context2-final-${{ join(matrix.*, '-') }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
with:
name: cilium-sysdumps
path: cilium-sysdump-*.zip
retention-days: 5
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
with:
name: cilium-junits
path: cilium-junits/*.xml
retention-days: 2
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
commit-status-final:
if: ${{ always() }}
name: Commit Status Final
needs: installation-and-connectivity
runs-on: ubuntu-latest
steps:
- name: Set final commit status
uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ needs.installation-and-connectivity.result }}