Skip to content

Resolve vulnerabilities in Kubeflow-containers #3352

Resolve vulnerabilities in Kubeflow-containers

Resolve vulnerabilities in Kubeflow-containers #3352

Workflow file for this run

# This workflow:
# * Builds, tests, and scans all images
# * (optionally) pushes the images to ACR
#
# This workflow triggers on:
# * a push to master
# * any create/synchronize to a PR (eg: any time you push an update to a PR).
#
# Image build/test/scan will run on any of the above events.
# Image push will run only if:
# * this is a push to master
# * if the PR triggering this event has the label 'auto-deploy'
#
# To configure this workflow:
#
# 1. Set up the following secrets in your workspace:
# a. REGISTRY_USERNAME with ACR username
# b. REGISTRY_PASSWORD with ACR Password
# c. AZURE_CREDENTIALS with the output of `az ad sp create-for-rbac --sdk-auth`
# d. DEV_REGISTRY_USERNAME with the DEV ACR username
# e. DEV_REGISTRY_PASSWORD with the DEV ACR Password
#
# 2. Change the values for the REGISTRY_NAME, CLUSTER_NAME, CLUSTER_RESOURCE_GROUP and NAMESPACE environment variables (below in build-push).
name: build_and_push
on:
schedule:
# Execute at 2am EST every day
- cron: '0 21 * * *'
push:
branches:
- 'master-2.0'
pull_request:
types:
- 'opened'
- 'synchronize'
- 'reopened'
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
jobs:
# Any checks that run pre-build
pre-build-checks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Assert committed ./output folder matches `make generate-dockerfiles` output
run: |
sudo apt-get install --yes make
make clean
make generate-dockerfiles
if ! git diff --quiet output/; then
echo 'output folder and docker-bits/resources out of sync!'
exit 1
fi
build-push:
env:
REGISTRY_NAME: k8scc01covidacr
DEV_REGISTRY_NAME: k8scc01covidacrdev
CLUSTER_NAME: k8s-cancentral-01-covid-aks
CLUSTER_RESOURCE_GROUP: k8s-cancentral-01-covid-aks
LOCAL_REPO: localhost:5000
TRIVY_VERSION: "v0.57.0"
TRIVY_DATABASES: '"ghcr.io/aquasecurity/trivy-db:2","public.ecr.aws/aquasecurity/trivy-db"'
TRIVY_JAVA_DATABASES: '"ghcr.io/aquasecurity/trivy-java-db:1","public.ecr.aws/aquasecurity/trivy-java-db"'
TRIVY_MAX_RETRIES: 5
TRIVY_RETRY_DELAY: 20
HADOLINT_VERSION: "2.12.0"
strategy:
fail-fast: false
matrix:
notebook:
# TODO: Pull this from a settings file or Makefile, that way Make can have the same list
- sas
- jupyterlab-cpu
needs: pre-build-checks
runs-on: ubuntu-latest
services:
registry:
image: registry:2
ports:
- 5000:5000
steps:
# pushing to regular acr instead of dev acr since the dev one isnt accessible to the zone
- name: Set ENV variables for a PR containing the auto-deploy tag
if: github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'auto-deploy')
run: |
echo "REGISTRY=k8scc01covidacr.azurecr.io" >> "$GITHUB_ENV"
echo "IMAGE_VERSION=dev" >> "$GITHUB_ENV"
- name: Set ENV variables for pushes to master
if: github.event_name == 'push' && github.ref == 'refs/heads/master-2.0'
run: |
echo "REGISTRY=k8scc01covidacr.azurecr.io" >> "$GITHUB_ENV"
echo "IMAGE_VERSION=v2" >> "$GITHUB_ENV"
echo "IS_LATEST=true" >> "$GITHUB_ENV"
- uses: actions/checkout@master
- name: Echo disk usage before clean up
run: ./.github/scripts/echo_usage.sh
- name: Free up all available disk space before building
run: ./.github/scripts/cleanup_runner.sh
- name: Echo disk usage before build start
run: ./.github/scripts/echo_usage.sh
- name: Get current notebook name
id: notebook-name
shell: bash
run: |
echo NOTEBOOK_NAME=${{ matrix.notebook }} >> $GITHUB_OUTPUT
# Connect to Azure Container registry (ACR)
- uses: azure/docker-login@v1
with:
login-server: ${{ env.REGISTRY_NAME }}.azurecr.io
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
# Connect to Azure DEV Container registry (ACR)
- uses: azure/docker-login@v1
with:
login-server: ${{ env.DEV_REGISTRY_NAME }}.azurecr.io
username: ${{ secrets.DEV_REGISTRY_USERNAME }}
password: ${{ secrets.DEV_REGISTRY_PASSWORD }}
# Image building/storing locally
- name: Make Dockerfiles
run: make generate-dockerfiles
- name: Run Hadolint
run: |
sudo curl -L https://github.com/hadolint/hadolint/releases/download/v${{ env.HADOLINT_VERSION }}/hadolint-Linux-x86_64 --output hadolint
sudo chmod +x hadolint
./hadolint output/${{ matrix.notebook }}/Dockerfile --no-fail
# make build emits full_image_name, image_tag, and image_repo outputs
- name: Build image
id: build-image
run: make build/${{ matrix.notebook }} REPO=${{ env.LOCAL_REPO }}
- name: Echo disk usage after build completion
run: ./.github/scripts/echo_usage.sh
- name: Add standard tag names (short sha, sha, and branch) and any other post-build activity
run: make post-build/${{ matrix.notebook }} REPO=${{ env.LOCAL_REPO }}
- name: Push image to local registry (default pushes all tags)
run: make push/${{ matrix.notebook }} REPO=${{ env.LOCAL_REPO }}
# Image testing
- name: Set Up Python for Test Suite
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Set up venv for Test Suite
run: |
python -m pip install --upgrade pip
make install-python-dev-venv
- name: Test image
run: make test/${{ matrix.notebook }} REPO=${{ env.LOCAL_REPO }}
# Free up space from build process (containerscan action will run out of space if we don't)
- run: ./.github/scripts/cleanup_runner.sh
# Scan image for vulnerabilities
- name: Aqua Security Trivy image scan
if: true
run: |
printf ${{ secrets.CVE_ALLOWLIST }} > .trivyignore
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin ${{ env.TRIVY_VERSION }}
set +e # Lets trivy return an error without it being fatal
for ((i=0; i<${{ env.TRIVY_MAX_RETRIES }}; i++)); do
echo "Attempt $((i + 1)) of ${{ env.TRIVY_MAX_RETRIES }}..."
trivy image \
--db-repository ${{ env.TRIVY_DATABASES }} \
--java-db-repository ${{ env.TRIVY_JAVA_DATABASES }} \
${{ steps.build-image.outputs.full_image_name }} \
--exit-code 10 --timeout=20m --scanners vuln --severity CRITICAL \
--skip-dirs /usr/local/SASHome
EXIT_CODE=$?
if [[ $EXIT_CODE -eq 0 ]]; then
echo "Trivy scan completed successfully."
exit 0
elif [[ $EXIT_CODE -eq 10 ]]; then
echo "Trivy scan completed successfully. Some vulnerabilities were found."
exit 0
elif [[ $i -lt $(( ${{ env.TRIVY_MAX_RETRIES }} - 1)) ]]; then
echo "Encountered unexpected error. Retrying in ${{ env.TRIVY_RETRY_DELAY }} seconds..."
sleep ${{ env.TRIVY_RETRY_DELAY }}
else
echo "Unexpected error persists after ${{ env.TRIVY_MAX_RETRIES }} attempts. Exiting."
exit 1
fi
done
# Push image to ACR
# Pushes if this is a push to master or an update to a PR that has auto-deploy label
- name: Test if we should push to ACR
id: should-i-push
if: |
github.event_name == 'push' ||
(
github.event_name == 'pull_request' &&
contains( github.event.pull_request.labels.*.name, 'auto-deploy')
)
run: echo 'boolean=true' >> $GITHUB_OUTPUT
# Pull the local image back, then "build" it (will just tag the pulled image)
- name: Pull image back from local repo
if: steps.should-i-push.outputs.boolean == 'true'
run: docker pull ${{ steps.build-image.outputs.full_image_name }}
# Rename the localhost:5000/imagename:tag built above to use the real repo
# (get above's name from build-image's output)
- name: Tag images with real repository
if: steps.should-i-push.outputs.boolean == 'true'
run: >
make post-build/${{ matrix.notebook }} DEFAULT_REPO=$REGISTRY IS_LATEST=$IS_LATEST
IMAGE_VERSION=$IMAGE_VERSION SOURCE_FULL_IMAGE_NAME=${{ steps.build-image.outputs.full_image_name }}
- name: Push image to registry
if: steps.should-i-push.outputs.boolean == 'true'
run: |
make push/${{ matrix.notebook }} DEFAULT_REPO=$REGISTRY
- name: Slack Notification
if: failure() && github.event_name=='schedule'
uses: act10ns/slack@v1
with:
status: failure
message: Build failed. https://github.com/StatCan/aaw-kubeflow-containers/actions/runs/${{github.run_id}}