Skip to content

[WIP] Add e2e test for tune api with LLM hyperparameter optimization #243

[WIP] Add e2e test for tune api with LLM hyperparameter optimization

[WIP] Add e2e test for tune api with LLM hyperparameter optimization #243

name: E2E Test with tune API
on:
pull_request:
paths-ignore:
- "pkg/ui/v1beta1/frontend/**"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
e2e:
runs-on: ubuntu-22.04
timeout-minutes: 120
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Test Env
uses: ./.github/workflows/template-setup-e2e-test
with:
kubernetes-version: ${{ matrix.kubernetes-version }}
- name: Install Training Operator SDK
shell: bash
run: |
pip install "kubeflow-training[huggingface]==1.8.1"
# Step to check disk space
- name: Check Disk Space
run: |
echo "Checking disk space usage before e2e test..."
df -h # Run 'df' to check free disk space
- name: Run e2e test with tune API
uses: ./.github/workflows/template-e2e-test
with:
tune-api: true
training-operator: true
# Step to check disk space
- name: Check Disk Space
if: always() # Run this step even if previous steps fail
run: |
echo "Checking disk space usage after e2e test..."
df -h # Run 'df' to check free disk space
# Step to get logs of the relevant Experiment pod
- name: Fetch Experiment Pod Logs
if: always() # Run this step even if previous steps fail
run: |
kubectl get pods -n default
POD_NAME=$(kubectl get pods -n default --no-headers -o custom-columns=":metadata.name" | grep tune-example-2 | grep master)
echo "Fetching logs for pod: $POD_NAME"
kubectl describe pod $POD_NAME -n default
kubectl logs $POD_NAME -n default
kubectl top pods $POD_NAME
kubectl get events -n default | grep "tune-example-2"
# Step to fetch kubelet logs from Minikube
- name: Fetch Kubelet Logs
if: always() # Run this step even if previous steps fail
shell: bash
run: |
echo "Fetching kubelet logs..."
sudo journalctl -u kubelet
strategy:
fail-fast: false
matrix:
# Detail: https://hub.docker.com/r/kindest/node
kubernetes-version: ["v1.27.11", "v1.28.7", "v1.29.2"]