.github/workflows/image.yml

name: "Build image and test"

on:
  schedule:
    - cron: "20 4 * * 1" # once a week
  workflow_dispatch:

  push:
    branches: [main]

  pull_request:

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

env:
  KIND_CLUSTER_NAME: "kserve-testing"
  ISVC_NAME: "vllm-tgis-isvc"
  TEST_NS: "kserve-demo"
  # note: knative serving will try to resolve the image tag unless the registry is kind.local
  # See the deployment configmap:
  #   registries-skipping-tag-resolving: "kind.local,ko.local,dev.local"
  DEV_IMAGE: "kind.local/vllm-tgis:dev"
  QUAY_IMAGE: "quay.io/dtrifiro/vllm-tgis"

jobs:
  build-image:
    timeout-minutes: 40
    name: "Build image"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Free Disk Space
        uses: jlumbroso/free-disk-space@v1.3.1
        with:
          tool-cache: false
          large-packages: false
          docker-images: false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      # TODO: setup caching for mount=type=cache ?
      # - .nox (nox envs)
      # - /root/.cache/pip
      # https://docs.docker.com/build/ci/github-actions/cache/#cache-mounts

      - name: Login to Quay
        uses: docker/login-action@v3
        with:
          registry: quay.io
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_TOKEN }}

      - name: Build and export
        uses: docker/build-push-action@v5
        with:
          context: .
          file: ./Dockerfile
          tags: ${{ env.DEV_IMAGE }}
          # outputs: type=oci,dest=/tmp/image.tar
          outputs: type=docker,dest=/tmp/image.tar
          cache-from: type=gha
          cache-to: type=gha,mode=max

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.QUAY_IMAGE }}
          tags: |
            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
            type=sha
            type=ref,event=branch
            type=ref,event=pr

      - name: Push image
        uses: docker/build-push-action@v5
        with:
          context: .
          file: ./Dockerfile
          load: true
          cache-from: type=gha
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}

      - name: "Push to quay" # NOTE: we need this step since build and push hangs on push
        run: |
          docker push --all-tags "${QUAY_IMAGE}"

      # - name: Upload artifact
      #   uses: actions/upload-artifact@v4
      #   with:
      #     name: vllm-tgis
      #     path: /tmp/image.tar

  # Temporarily disabled: need to make sure the runner can run vLLM (AVX512 instruction set or GPU)
  # kserve-smoke-test:
  #   needs: build-image

  #   name: Kserve Smoke Test
  #   runs-on: ubuntu-latest
  #   steps:
  #     - uses: actions/checkout@v4

  #     - name: Setup kind/kserve
  #       uses: dtrifiro/setup-kserve@v0.0.2
  #       with:
  #         cluster_name: ${{ env.KIND_CLUSTER_NAME }}
  #         namespace: ${{ env.TEST_NS }}

  #     - name: Download built image
  #       uses: actions/download-artifact@v4

  #     - name: Load built image into kind
  #       run: |
  #         kind load image-archive --name ${{ env.KIND_CLUSTER_NAME }} vllm-tgis/image.tar

  #     - name: Free Disk Space
  #       uses: jlumbroso/free-disk-space@v1.3.1
  #       with:
  #         tool-cache: false

  #     - name: Setup flan-t5-small model volume
  #       run: |
  #         sed 's|quay.io/dtrifiro/vllm-tgis:fast|${{ env.DEV_IMAGE }}|g' .github/test/kserve/setup.yaml | \
  #           kubectl apply -f -

  #         max_retries=10
  #         wait_time=60s
  #         until kubectl wait --for=jsonpath='{.status.phase}'=Succeeded pod/setup-flan-t5-small --timeout ${wait_time}; do
  #           echo "Current status:"
  #           kubectl describe pod,pv,pvc
  #           max_retries=$((max_retries-1))
  #           if [[ $max_retries -le 0 ]]; then
  #             echo "Failed to setup model"
  #             kubectl logs pod/setup-flan-t5-small --all-containers
  #             exit 1
  #           fi
  #           echo "-------------------"
  #         done

  #     - name: Deploy ServingRuntime/InferenceService
  #       run: |
  #         sed 's|quay.io/opendatahub/vllm:fast|${{ env.DEV_IMAGE }}|g' .github/test/kserve/vllm-tgis.yaml | \
  #           kubectl apply -f -

  #         max_retries=10
  #         wait_time=60s
  #         until kubectl wait isvc/${ISVC_NAME} --for=condition=Ready --timeout=${wait_time}; do
  #           echo "Current status:"
  #           kubectl describe isvc,servingruntime,pod
  #           max_retries=$((max_retries-1))
  #           if [[ $max_retries -le 0 ]]; then
  #             exit 1
  #           fi
  #           echo "-------------------"
  #         done

  #     - name: Perform test inference (http)
  #       run: |
  #         export ISVC_URL="$(oc get isvc ${ISVC_NAME} -o jsonpath='{.status.components.predictor.url}')"
  #         export ISVC_HOSTNAME=$(echo $ISVC_URL | cut -d/ -f3-)
  #         echo "Querying ISVC at: ${ISVC_URL}"

  #         # We can't query the service via hostname, we need to add the entry to /etc/hosts
  #         echo "127.0.0.1 ${ISVC_HOSTNAME}" | sudo tee -a /etc/hosts

  #         python examples/inference.py