Skip to content

Build image and test #7

Build image and test

Build image and test #7

Workflow file for this run

name: "Build image and test"
on:
schedule:
- cron: "20 4 * * 1" # once a week
workflow_dispatch:
push:
branches: [main]
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
KIND_CLUSTER_NAME: "kserve-testing"
ISVC_NAME: "vllm-tgis-isvc"
TEST_NS: "kserve-demo"
# note: knative serving will try to resolve the image tag unless the registry is kind.local
# See the deployment configmap:
# registries-skipping-tag-resolving: "kind.local,ko.local,dev.local"
DEV_IMAGE: "kind.local/vllm-tgis:dev"
QUAY_IMAGE: "quay.io/dtrifiro/vllm-tgis"
jobs:
build-image:
timeout-minutes: 40
name: "Build image"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Free Disk Space
uses: jlumbroso/[email protected]
with:
tool-cache: false
large-packages: false
docker-images: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
# TODO: setup caching for mount=type=cache ?
# - .nox (nox envs)
# - /root/.cache/pip
# https://docs.docker.com/build/ci/github-actions/cache/#cache-mounts
- name: Login to Quay
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_TOKEN }}
- name: Build and export
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
tags: ${{ env.DEV_IMAGE }}
# outputs: type=oci,dest=/tmp/image.tar
outputs: type=docker,dest=/tmp/image.tar
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.QUAY_IMAGE }}
tags: |
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
type=sha
type=ref,event=branch
type=ref,event=pr
- name: Push image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
load: true
cache-from: type=gha
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: "Push to quay" # NOTE: we need this step since build and push hangs on push
run: |
docker push --all-tags "${QUAY_IMAGE}"
# - name: Upload artifact
# uses: actions/upload-artifact@v4
# with:
# name: vllm-tgis
# path: /tmp/image.tar
# Temporarily disabled: need to make sure the runner can run vLLM (AVX512 instruction set or GPU)
# kserve-smoke-test:
# needs: build-image
# name: Kserve Smoke Test
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# - name: Setup kind/kserve
# uses: dtrifiro/[email protected]
# with:
# cluster_name: ${{ env.KIND_CLUSTER_NAME }}
# namespace: ${{ env.TEST_NS }}
# - name: Download built image
# uses: actions/download-artifact@v4
# - name: Load built image into kind
# run: |
# kind load image-archive --name ${{ env.KIND_CLUSTER_NAME }} vllm-tgis/image.tar
# - name: Free Disk Space
# uses: jlumbroso/[email protected]
# with:
# tool-cache: false
# - name: Setup flan-t5-small model volume
# run: |
# sed 's|quay.io/dtrifiro/vllm-tgis:fast|${{ env.DEV_IMAGE }}|g' .github/test/kserve/setup.yaml | \
# kubectl apply -f -
# max_retries=10
# wait_time=60s
# until kubectl wait --for=jsonpath='{.status.phase}'=Succeeded pod/setup-flan-t5-small --timeout ${wait_time}; do
# echo "Current status:"
# kubectl describe pod,pv,pvc
# max_retries=$((max_retries-1))
# if [[ $max_retries -le 0 ]]; then
# echo "Failed to setup model"
# kubectl logs pod/setup-flan-t5-small --all-containers
# exit 1
# fi
# echo "-------------------"
# done
# - name: Deploy ServingRuntime/InferenceService
# run: |
# sed 's|quay.io/opendatahub/vllm:fast|${{ env.DEV_IMAGE }}|g' .github/test/kserve/vllm-tgis.yaml | \
# kubectl apply -f -
# max_retries=10
# wait_time=60s
# until kubectl wait isvc/${ISVC_NAME} --for=condition=Ready --timeout=${wait_time}; do
# echo "Current status:"
# kubectl describe isvc,servingruntime,pod
# max_retries=$((max_retries-1))
# if [[ $max_retries -le 0 ]]; then
# exit 1
# fi
# echo "-------------------"
# done
# - name: Perform test inference (http)
# run: |
# export ISVC_URL="$(oc get isvc ${ISVC_NAME} -o jsonpath='{.status.components.predictor.url}')"
# export ISVC_HOSTNAME=$(echo $ISVC_URL | cut -d/ -f3-)
# echo "Querying ISVC at: ${ISVC_URL}"
# # We can't query the service via hostname, we need to add the entry to /etc/hosts
# echo "127.0.0.1 ${ISVC_HOSTNAME}" | sudo tee -a /etc/hosts
# python examples/inference.py