Skip to content

feat(controller): retry strategy support on daemon containers, fixes #13705 #23850

feat(controller): retry strategy support on daemon containers, fixes #13705

feat(controller): retry strategy support on daemon containers, fixes #13705 #23850

Workflow file for this run

name: CI
on:
push:
branches:
- "main"
- "release-*"
- "!release-2.8"
pull_request:
branches:
- "main"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
changed-files:
name: Get changed files
outputs:
# reference: https://github.com/tj-actions/changed-files#outputs-
tests: ${{ steps.changed-files.outputs.tests_any_modified == 'true' }}
e2e-tests: ${{ steps.changed-files.outputs.e2e-tests_any_modified == 'true' }}
codegen: ${{ steps.changed-files.outputs.codegen_any_modified == 'true' }}
lint: ${{ steps.changed-files.outputs.lint_any_modified == 'true' }}
ui: ${{ steps.changed-files.outputs.ui_any_modified == 'true' }}
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
fetch-depth: 50 # assume PRs are less than 50 commits
- name: Get relevant files changed per group
id: changed-files
uses: tj-actions/changed-files@cbda684547adc8c052d50711417fa61b428a9f88 # v41.1.2
with:
files_yaml: |
common: &common
- .github/workflows/ci-build.yaml
- Makefile
- tasks.yaml
tests: &tests
- *common
- cmd/**
- config/**
- errors/**
- persist/**
- pkg/**
- server/**
- test/**
- util/**
- workflow/**
- go.mod
- go.sum
e2e-tests:
- *tests
# plus manifests and SDKs that are used in E2E tests
- Dockerfile
- manifests/**
- sdks/**
# example test suite
- examples/**
- hack/test-examples.sh
codegen:
- *common
# generated files
- api/**
- docs/fields.md
- docs/executor_swagger.md
- docs/cli/**
- pkg/**
- sdks/java/**
- sdks/python/**
# files that generation is based off
- pkg/**
- cmd/**
- examples/** # examples are used within the fields lists
- manifests/** # a few of these are generated and committed
# generation scripts
- hack/api/**
- hack/docs/**
- hack/manifests/**
- .clang-format
lint:
- *tests
# plus lint config
- .golangci.yml
# all GH workflows / actions
- .github/workflows/**
# docs files below
- docs/**
# generated files are covered by codegen
- '!docs/fields.md'
- '!docs/executor_swagger.md'
- '!docs/cli/**'
# proposals live only on GH as pure markdown
- '!docs/proposals/**'
# docs scripts & tools from `make docs`
- hack/docs/copy-readme.sh
- hack/docs/check-env-doc.sh
- .markdownlint.yaml
- .mlc_config.json
- .spelling
- mkdocs.yml
ui:
- *common
- ui/**
tests:
name: Unit Tests
needs: [ changed-files ]
if: ${{ needs.changed-files.outputs.tests == 'true' }}
runs-on: ubuntu-24.04
timeout-minutes: 10
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: "1.23"
cache: true
- run: make test STATIC_FILES=false GOTEST='go test -p 20 -covermode=atomic -coverprofile=coverage.out'
- name: Upload coverage report
# engineers just ignore this in PRs, so lets not even run it
if: github.ref == 'refs/heads/main'
uses: codecov/codecov-action@84508663e988701840491b86de86b666e8a86bed # v4.3.0
with:
fail_ci_if_error: true
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
tests-windows:
name: Windows Unit Tests
needs: [ changed-files ]
if: ${{ needs.changed-files.outputs.tests == 'true' }}
runs-on: windows-2022
timeout-minutes: 20
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: "1.23"
cache: true
# windows run does not use makefile target because it does a lot more than just testing and is not cross-platform compatible
- run: if (!(Test-Path "ui/dist/app/index.html")) { New-Item -ItemType Directory -Force -Path "ui/dist/app" | Out-Null; New-Item -ItemType File -Path "ui/dist/app/placeholder" | Out-Null }; go test -p 20 -covermode=atomic -coverprofile='coverage.out' $(go list ./... | select-string -Pattern 'github.com/argoproj/argo-workflows/v3/workflow/controller' , 'github.com/argoproj/argo-workflows/v3/server' -NotMatch)
env:
KUBECONFIG: /dev/null
- name: Upload coverage report
# engineers just ignore this in PRs, so lets not even run it
if: github.ref == 'refs/heads/main'
uses: codecov/codecov-action@84508663e988701840491b86de86b666e8a86bed # v4.3.0
with:
fail_ci_if_error: true
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
argo-images:
name: argo-images
# needs: [ lint ]
runs-on: ubuntu-24.04
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
include:
- image: argoexec
- image: argocli
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0
- name: Build and export
uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0
with:
context: .
tags: quay.io/argoproj/${{matrix.image}}:latest
outputs: type=docker,dest=/tmp/${{matrix.image}}_image.tar
target: ${{matrix.image}}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Upload
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
with:
name: ${{matrix.image}}_image.tar
path: /tmp/${{matrix.image}}_image.tar
if-no-files-found: error
e2e-tests:
name: E2E Tests
needs: [ changed-files, argo-images ]
if: ${{ needs.changed-files.outputs.e2e-tests == 'true' }}
runs-on: ubuntu-24.04
# These tests usually finish in ~25m, but occasionally they take much longer due to resource
# contention on the runner, which we have no control over.
timeout-minutes: 60
env:
KUBECONFIG: /home/runner/.kubeconfig
E2E_ENV_FACTOR: 2
strategy:
fail-fast: false
matrix:
include:
- test: test-executor
profile: minimal
use-api: false
- test: test-corefunctional
profile: minimal
use-api: false
- test: test-functional
profile: minimal
use-api: false
- test: test-api
profile: mysql
use-api: true
- test: test-cli
profile: mysql
use-api: true
- test: test-cron
profile: minimal
use-api: false
- test: test-examples
profile: minimal
use-api: false
- test: test-plugins
profile: plugins
use-api: false
- test: test-java-sdk
profile: minimal
use-api: true
- test: test-python-sdk
profile: minimal
use-api: true
- test: test-executor
install_k3s_version: v1.29.10+k3s1
profile: minimal
use-api: false
- test: test-corefunctional
install_k3s_version: v1.29.10+k3s1
profile: minimal
use-api: false
- test: test-functional
install_k3s_version: v1.29.10+k3s1
profile: minimal
use-api: false
steps:
- name: Free up unused disk space
run: |
printf "==> Available space before cleanup\n"
df -h
# these directories are not used by E2E tests
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /usr/local/.ghcup /opt/hostedtoolcache/CodeQL
printf "==> Available space after cleanup\n"
df -h
- name: Install socat (needed by Kubernetes)
run: sudo apt-get -y install socat
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: "1.23"
cache: true
- name: Install Java for the SDK
if: ${{matrix.test == 'test-java-sdk'}}
uses: actions/setup-java@387ac29b308b003ca37ba93a6cab5eb57c8f5f93 # v4.0.0
with:
java-version: '8'
distribution: adopt
cache: maven
- name: Install Python for the SDK
if: ${{matrix.test == 'test-python-sdk'}}
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: '3.x'
cache: pip
- name: Install and start K3S
run: |
if ! echo "${{ matrix.install_k3s_version }}" | egrep '^v[0-9]+\.[0-9]+\.[0-9]+\+k3s1$'; then
export INSTALL_K3S_VERSION=v1.31.2+k3s1
else
export INSTALL_K3S_VERSION=${{ matrix.install_k3s_version }}
fi
curl -sfL https://get.k3s.io | INSTALL_K3S_CHANNEL=stable \
INSTALL_K3S_EXEC="--docker --kubelet-arg=config=${GITHUB_WORKSPACE}/test/e2e/manifests/kubelet-configuration.yaml" \
K3S_KUBECONFIG_MODE=644 \
sh -
until kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml cluster-info ; do sleep 10s ; done
cp /etc/rancher/k3s/k3s.yaml /home/runner/.kubeconfig
echo "- name: fake_token_user" >> $KUBECONFIG
echo " user:" >> $KUBECONFIG
echo " token: xxxxxx" >> $KUBECONFIG
until kubectl cluster-info ; do sleep 10s ; done
- name: Download images
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2
with:
pattern: '*_image.tar'
path: /tmp
- name: Load images
run: |
set -eux
docker load < /tmp/argoexec_image.tar/argoexec_image.tar
docker load < /tmp/argocli_image.tar/argocli_image.tar
- name: Set-up /etc/hosts
run: |
echo '127.0.0.1 dex' | sudo tee -a /etc/hosts
echo '127.0.0.1 minio' | sudo tee -a /etc/hosts
echo '127.0.0.1 postgres' | sudo tee -a /etc/hosts
echo '127.0.0.1 mysql' | sudo tee -a /etc/hosts
echo '127.0.0.1 azurite' | sudo tee -a /etc/hosts
- name: Install manifests
run: make install PROFILE=${{matrix.profile}} STATIC_FILES=false
- name: Build controller
run: make controller kit STATIC_FILES=false
- name: Build CLI
run: make cli STATIC_FILES=false
if: ${{matrix.use-api}}
- name: Start controller/API
run: |
make start PROFILE=${{matrix.profile}} \
AUTH_MODE=client STATIC_FILES=false \
LOG_LEVEL=info \
API=${{matrix.use-api}} \
UI=false \
POD_STATUS_CAPTURE_FINALIZER=true > /tmp/argo.log 2>&1 &
- name: Wait for controller to be up
run: make wait PROFILE=${{matrix.profile}} API=${{matrix.use-api}}
timeout-minutes: 5
- name: Run tests ${{matrix.test}}
run: make ${{matrix.test}} E2E_SUITE_TIMEOUT=20m STATIC_FILES=false
# failure debugging below
- name: Failure debug - k3s logs
if: ${{ failure() }}
run: journalctl -u k3s
- name: Failure debug - describe MinIO/MySQL deployment
if: ${{ failure() }}
run: |
set -eux
kubectl get deploy
kubectl describe deploy
- name: Failure debug - describe MinIO/MySQL pods
if: ${{ failure() }}
run: |
set -eux
kubectl get pods -l '!workflows.argoproj.io/workflow'
kubectl describe pods -l '!workflows.argoproj.io/workflow'
- name: Failure debug - MinIO/MySQL logs
if: ${{ failure() }}
run: kubectl logs -l '!workflows.argoproj.io/workflow' --prefix
- name: Failure debug - Controller/API logs
if: ${{ failure() }}
run: |
[ -e /tmp/argo.log ] && cat /tmp/argo.log
- if: ${{ failure() }}
name: Failure debug - describe Workflows
run: |
set -eux
kubectl get wf
kubectl describe wf
- name: Failure debug - describe Workflow pods
if: ${{ failure() }}
run: |
set -eux
kubectl get pods -l workflows.argoproj.io/workflow
kubectl describe pods -l workflows.argoproj.io/workflow
- name: Failure debug - Workflow Pod logs
if: ${{ failure() }}
run: kubectl logs --all-containers -l workflows.argoproj.io/workflow --prefix
# workaround for status checks -- check this one job instead of each individual E2E job in the matrix
# this allows us to skip the entire matrix when it doesn't need to run while still having accurate status checks
# see https://github.com/orgs/community/discussions/9141#discussioncomment-2296809 and https://github.com/orgs/community/discussions/26822#discussioncomment-3305794
e2e-tests-composite-result:
name: E2E Tests - Composite result
needs: [ e2e-tests ]
if: ${{ always() }}
runs-on: ubuntu-24.04
steps:
- run: |
result="${{ needs.e2e-tests.result }}"
# mark as successful even if skipped
if [[ $result == "success" || $result == "skipped" ]]; then
exit 0
else
exit 1
fi
codegen:
name: Codegen
needs: [ changed-files ]
if: ${{ needs.changed-files.outputs.codegen == 'true' }}
runs-on: ubuntu-24.04
timeout-minutes: 20
env:
GOPATH: /home/runner/go
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: "1.23"
cache: true
- name: Install protoc
run: |
set -eux -o pipefail
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v3.19.4/protoc-3.19.4-linux-x86_64.zip
sudo unzip -o protoc-3.19.4-linux-x86_64.zip -d /usr/local bin/protoc
sudo unzip -o protoc-3.19.4-linux-x86_64.zip -d /usr/local 'include/*'
sudo chmod +x /usr/local/bin/protoc
sudo find /usr/local/include -type f | xargs sudo chmod a+r
sudo find /usr/local/include -type d | xargs sudo chmod a+rx
ls /usr/local/include/google/protobuf/
- name: Pull OpenAPI Generator CLI Docker image
run: |
docker pull openapitools/openapi-generator-cli:v5.4.0 &
docker pull openapitools/openapi-generator-cli:v5.2.1 &
- name: Create symlinks
run: |
mkdir -p /home/runner/go/src/github.com/argoproj
ln -s "$PWD" /home/runner/go/src/github.com/argoproj/argo-workflows
- run: make codegen -B STATIC_FILES=false
# if codegen makes changes that are not in the PR, fail the build
- name: Check if codegen made changes not present in the PR
run: git diff --exit-code
lint:
name: Lint
needs: [ changed-files ]
if: ${{ needs.changed-files.outputs.lint == 'true' }}
runs-on: ubuntu-24.04
timeout-minutes: 15 # must be strictly greater than the timeout in .golangci.yml
env:
GOPATH: /home/runner/go
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: "1.23"
cache: true
- run: make lint STATIC_FILES=false
# if lint makes changes that are not in the PR, fail the build
- name: Check if lint made changes not present in the PR
run: git diff --exit-code
# lint GH Actions
- name: Ensure GH Actions are pinned to SHAs
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@ba37328d4ea95eaf8b3bd6c6cef308f709a5f2ec # v3.0.3
ui:
name: UI
needs: [ changed-files ]
if: ${{ needs.changed-files.outputs.ui == 'true' }}
runs-on: ubuntu-24.04
timeout-minutes: 6
env:
NODE_OPTIONS: --max-old-space-size=4096
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4.0.1
with:
node-version: "20" # change in all GH Workflows
cache: yarn
cache-dependency-path: ui/yarn.lock
- run: yarn --cwd ui install
- run: yarn --cwd ui build
- run: yarn --cwd ui test
- run: yarn --cwd ui lint
- run: yarn --cwd ui deduplicate
# if lint or deduplicate make changes that are not in the PR, fail the build
- name: Check if lint & deduplicate made changes not present in the PR
run: git diff --exit-code
# check to see if it'll start (but not if it'll render)
- run: yarn --cwd ui start &
- run: until curl http://localhost:8080 > /dev/null ; do sleep 10s ; done
timeout-minutes: 1