[WIP] Add e2e test for tune
api with LLM hyperparameter optimization
#237
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: E2E Test with tune API | |
on: | |
pull_request: | |
paths-ignore: | |
- "pkg/ui/v1beta1/frontend/**" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
e2e: | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 120 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Setup Test Env | |
uses: ./.github/workflows/template-setup-e2e-test | |
with: | |
kubernetes-version: ${{ matrix.kubernetes-version }} | |
- name: Install Training Operator SDK | |
shell: bash | |
run: | | |
pip install "kubeflow-training[huggingface]==1.8.1" | |
# Step to check disk space | |
- name: Check Disk Space | |
run: | | |
echo "Checking disk space usage before e2e test..." | |
df -h # Run 'df' to check free disk space | |
- name: Run e2e test with tune API | |
uses: ./.github/workflows/template-e2e-test | |
with: | |
tune-api: true | |
training-operator: true | |
# Step to check disk space | |
- name: Check Disk Space | |
if: always() # Run this step even if previous steps fail | |
run: | | |
echo "Checking disk space usage after e2e test..." | |
df -h # Run 'df' to check free disk space | |
# Step to get logs of the relevant Experiment pod | |
- name: Fetch Experiment Pod Logs | |
if: always() # Run this step even if previous steps fail | |
run: | | |
kubectl get pods -n default | |
POD_NAME=$(kubectl get pods -n default --no-headers -o custom-columns=":metadata.name" | grep tune-example-2) | |
echo "Fetching logs for pod: $POD_NAME" | |
kubectl describe pod $POD_NAME -n default | |
kubectl logs $POD_NAME -n default | |
kubectl get events -n default | grep "tune-example-2" | |
strategy: | |
fail-fast: false | |
matrix: | |
# Detail: https://hub.docker.com/r/kindest/node | |
kubernetes-version: ["v1.27.11", "v1.28.7", "v1.29.2"] |