diff --git a/.github/workflows/precompiled.yaml b/.github/workflows/precompiled.yaml index f4fd2c02..962aaea0 100644 --- a/.github/workflows/precompiled.yaml +++ b/.github/workflows/precompiled.yaml @@ -120,7 +120,7 @@ jobs: echo "SHIVA############# ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver-branch }}-${KERNEL_VERSION}-${DIST}" docker images "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver-branch }}-${KERNEL_VERSION}-${DIST}" docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver-branch }}-${KERNEL_VERSION}-${DIST}" \ - | gzip > ./driver-images-${{ matrix.driver-branch }}-${KERNEL_VERSION}-${DIST}.tar.gz + -o ./driver-images-${{ matrix.driver-branch }}-${KERNEL_VERSION}-${DIST}.tar # set env for artifacts upload echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV @@ -132,7 +132,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: driver-images-${{ matrix.driver-branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} - path: ./driver-images-${{ matrix.driver-branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar.gz + path: ./driver-images-${{ matrix.driver-branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar retention-days: 1 determine-e2e-test-matrix: @@ -256,6 +256,8 @@ jobs: echo "Downloading $image in tests directory" gh run download --name $image --dir ./tests/ done + # SHIVA + ls ./tests/* - name: Upgrade the kernel for Precompiled e2e test env: @@ -291,7 +293,7 @@ jobs: TEST_CASE_ARGS="${GPU_OPERATOR_OPTIONS} --set driver.version=${DRIVER_VERSION}" # add escape character for space TEST_CASE_ARGS=$(printf '%q ' "$TEST_CASE_ARGS") - IMAGE_PATH="./tests/driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}.tar.gz" + IMAGE_PATH="./tests/driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}.tar" ./tests/ci-run-e2e.sh "${TEST_CASE}" "${TEST_CASE_ARGS}" ${IMAGE_PATH} || status=$? if [ $status -eq 1 ]; then echo "e2e validation failed for driver version $DRIVER_VERSION with status $status" @@ -305,9 +307,9 @@ jobs: run: | ls ./tests/* for DRIVER_BRANCH in $driver_branch; do - image_path="./tests/driver-images-${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}.tar.gz" + image_path="./tests/driver-images-${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}.tar" echo "uploading $image_path" - docker load -i "$image" + docker load -i $image_path echo "docker tag driver:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST} ${PRIVATE_REGISTRY}/nvidia/driver:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}" docker tag driver:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST} \ ${PRIVATE_REGISTRY}/nvidia/driver:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST} @@ -322,43 +324,3 @@ jobs: name: nvidiadriver-Precompiled-e2e-test-logs path: ./logs/ retention-days: 15 - -# SHIVA -# This should be inside e2e as 2 times download not needed , also check if there is feasabilty , if we can uploaded images directly from artifacts to ghcr.io - precompiled-push-image: - runs-on: ubuntu-latest - needs: - - set-driver-version-matrix - - determine-e2e-test-matrix - strategy: - matrix: - kernel-version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }} - driver-branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }} - steps: - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Set and Calculate test vars - run: | - echo "DIST=ubuntu22.04" >> $GITHUB_ENV - echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV - - # - name: Download Docker image artifact - # uses: actions/download-artifact@v4 - # with: - # name: driver-images-${{ matrix.driver-branch}}-${{ matrix.kernel-version }}-${{ env.DIST }} - # path: ./ - - - name: Push built image - run: | - ls ./* - # docker load -i ${{ matrix.driver-branch }}-${{ matrix.kernel-version }}-${{ env.DIST }}.tar.gz - # docker tag driver:${{ matrix.driver-branch }}-${{ matrix.kernel-version }}-${{ env.DIST }} \ - # ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver-branch }}-${{ matrix.kernel-version }}-${{ env.DIST }} - # docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver-branch }}-${{ matrix.kernel-version }}-${{ env.DIST }} - - - name: Remove built image tar - run: rm -f driver-images-${{ matrix.driver-branch }}-${{ matrix.kernel-version }}-${{ env.DIST }}.tar.gz diff --git a/tests/cases/nvidia-driver.sh b/tests/cases/nvidia-driver.sh index 42ea5f71..ac3f5402 100755 --- a/tests/cases/nvidia-driver.sh +++ b/tests/cases/nvidia-driver.sh @@ -10,6 +10,7 @@ fi export TEST_CASE_ARGS="$1" if [[ $# -eq 2 ]]; then export IMAGE_PATH="$2" + docker load -i "$IMAGE_PATH" fi SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../scripts && pwd )" diff --git a/tests/scripts/install-operator.sh b/tests/scripts/install-operator.sh index f1b34879..2b4bcbaf 100755 --- a/tests/scripts/install-operator.sh +++ b/tests/scripts/install-operator.sh @@ -22,7 +22,7 @@ kubectl create namespace "${TEST_NAMESPACE}" # Run the helm install command echo "OPERATOR_OPTIONS: ${OPERATOR_OPTIONS}" -eval ${HELM} install gpu-operator \ - -n "${TEST_NAMESPACE}" ${IMAGE_PATH} \ +eval ${HELM} install gpu-operator nvidia/gpu-operator \ + -n "${TEST_NAMESPACE}" \ "${OPERATOR_OPTIONS}" \ --wait diff --git a/tests/scripts/verify-operator.sh b/tests/scripts/verify-operator.sh index d663db37..56e3cf3d 100755 --- a/tests/scripts/verify-operator.sh +++ b/tests/scripts/verify-operator.sh @@ -24,4 +24,8 @@ if [ $exit_status -ne 0 ]; then exit 1 else echo "All gpu-operator pods are ready." + # SHIVA + curl -o ${SCRIPT_DIR}/must-gather.sh "https://raw.githubusercontent.com/NVIDIA/gpu-operator/main/hack/must-gather.sh" + chmod +x ${SCRIPT_DIR}/must-gather.sh + ARTIFACT_DIR="${LOG_DIR}" ${SCRIPT_DIR}/must-gather.sh fi