Skip to content

Commit

Permalink
Run test job irrespective of if the build jobs succeeds or fails
Browse files Browse the repository at this point in the history
This lets us avoid losing test coverage if a single unrelated build job fails. E.g Windows build job fails but everything else succeeds. In this case, we still want to run the tests for other platforms.

Also, if a build job fails, its corresponding test job will also report a failure as a result of not being able to download the wheel artifact so we should still be able to tell the source of job failure easily.

PiperOrigin-RevId: 720306268
  • Loading branch information
nitins17 authored and Google-ML-Automation committed Jan 31, 2025
1 parent def5fd5 commit f7e105b
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/bazel_cuda_non_rbe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,30 @@ jobs:
gsutil -m cp -r "${{ inputs.gcs_download_uri }}/jaxlib*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" $(pwd)/dist/ &&
gsutil -m cp -r "${{ inputs.gcs_download_uri }}/jax*cuda*plugin*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" $(pwd)/dist/ &&
gsutil -m cp -r "${{ inputs.gcs_download_uri }}/jax*cuda*pjrt*${OS}*${ARCH}*.whl" $(pwd)/dist/
- name: Check if wheel were downloaded successfully
run: >-
jaxlib_whl=$(find dist/ -type f -name "jaxlib*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" -print -quit 2>/dev/null)
if [[ ! -f "$jaxlib_whl" ]]; then
echo "No jaxlib wheel found in dist/. Please check if the artifact was built"
echo "successfully in the 'build-jaxlib-artifact' job and that the download step above"
echo "ran correctly."
echo "Skipping the test run..."
exit 1
else
echo "jaxlib wheel found in dist/"
fi
cuda_plugin_whl=$(find dist/ -type f -name "jax*cuda*plugin*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" -print -quit 2>/dev/null)
cuda_pjrt_whl=$(find dist/ -type f -name "jax*cuda*pjrt*${OS}*${ARCH}*.whl" -print -quit 2>/dev/null)
if [[ ! -f "$cuda_plugin_whl" ]] || [[ ! -f "$cuda_pjrt_whl" ]]; then
echo "Either the CUDA plugin or PJRT wheel was not found in dist/. Please check if the"
echo "artifacts were built successfully in the 'build-cuda-artifacts' job and that the"
echo "download step above ran correctly."
echo "Skipping the test run..."
exit 1
else
echo "CUDA plugin and PJRT wheels found in dist/"
fi
# Halt for testing
- name: Wait For Connection
uses: google-ml-infra/actions/ci_connection@main
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/pytest_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,18 @@ jobs:
run: >-
mkdir dist &&
gsutil -m cp -r "${{ inputs.gcs_download_uri }}/jaxlib*%PYTHON_MAJOR_MINOR%*%OS%*%ARCH%*.whl" dist/
- name: Check if wheels were downloaded successfully
run: >-
jaxlib_whl=$(find dist/ -type f -name "jaxlib*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" -print -quit 2>/dev/null)
if [[ ! -f "$jaxlib_whl" ]]; then
echo "No jaxlib wheel found in dist/. Please check if the artifact was built"
echo "successfully in the 'build-jaxlib-artifact' job and that the download step above"
echo "ran correctly."
echo "Skipping the test run..."
exit 1
else
echo "jaxlib wheel found in dist/"
fi
- name: Install Python dependencies
run: $JAXCI_PYTHON -m pip install -r build/requirements.in
# Halt for testing
Expand Down
24 changes: 24 additions & 0 deletions .github/workflows/pytest_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,30 @@ jobs:
gsutil -m cp -r "${{ inputs.gcs_download_uri }}/jaxlib*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" $(pwd)/dist/ &&
gsutil -m cp -r "${{ inputs.gcs_download_uri }}/jax*cuda*plugin*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" $(pwd)/dist/ &&
gsutil -m cp -r "${{ inputs.gcs_download_uri }}/jax*cuda*pjrt*${OS}*${ARCH}*.whl" $(pwd)/dist/
- name: Check if wheels were downloaded successfully
run: >--
jaxlib_whl=$(find dist/ -type f -name "jaxlib*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" -print -quit 2>/dev/null)
if [[ ! -f "$jaxlib_whl" ]]; then
echo "No jaxlib wheel found in dist/. Please check if the artifact was built"
echo "successfully in the 'build-jaxlib-artifact' job and that the download step above"
echo "ran correctly."
echo "Skipping the test run..."
exit 1
else
echo "jaxlib wheel found in dist/"
fi
cuda_plugin_whl=$(find dist/ -type f -name "jax*cuda*plugin*${PYTHON_MAJOR_MINOR}*${OS}*${ARCH}*.whl" -print -quit 2>/dev/null)
cuda_pjrt_whl=$(find dist/ -type f -name "jax*cuda*pjrt*${OS}*${ARCH}*.whl" -print -quit 2>/dev/null)
if [[ ! -f "$cuda_plugin_whl" ]] || [[ ! -f "$cuda_pjrt_whl" ]]; then
echo "Either the CUDA plugin or PJRT wheel was not found in dist/. Please check if the"
echo "artifacts were built successfully in the 'build-cuda-artifacts' job and that the"
echo "download step above ran correctly."
echo "Skipping the test run..."
exit 1
else
echo "CUDA plugin and PJRT wheels found in dist/"
fi
- name: Install Python dependencies
run: $JAXCI_PYTHON -m pip install -r build/requirements.in
# Halt for testing
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/wheel_tests_continuous.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ name: CI - Wheel Tests (Continuous)
on:
schedule:
- cron: "0 */2 * * *" # Run once every 2 hours
# DO NOT SUBMIT without removing this
pull_request:
branches:
- main

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
Expand Down Expand Up @@ -61,6 +65,10 @@ jobs:
gcs_upload_uri: 'gs://general-ml-ci-transient/jax-github-actions/jax/${{ github.workflow }}/${{ github.run_number }}/${{ github.run_attempt }}'

run-pytest-cpu:
# Run test jobs even if the build job fails. Avoids losing test coverage if a single unrelated
# build job fails. E.g Windows build job fails but everything else succeeds. In this case, we
# still want to run the tests for other platforms.
if: ${{ always() }}
needs: build-jaxlib-artifact
uses: ./.github/workflows/pytest_cpu.yml
strategy:
Expand All @@ -78,6 +86,10 @@ jobs:
gcs_download_uri: ${{ needs.build-jaxlib-artifact.outputs.gcs_upload_uri }}

run-pytest-cuda:
# Run test jobs even if the build job fails. Avoids losing test coverage if a single unrelated
# build job fails. E.g Windows build job fails but everything else succeeds. In this case, we
# still want to run the tests for other platforms.
if: ${{ always() }}
needs: [build-jaxlib-artifact, build-cuda-artifacts]
uses: ./.github/workflows/pytest_cuda.yml
strategy:
Expand Down Expand Up @@ -105,6 +117,10 @@ jobs:
gcs_download_uri: ${{ needs.build-jaxlib-artifact.outputs.gcs_upload_uri }}

run-bazel-test-cuda:
# Run test jobs even if the build job fails. Avoids losing test coverage if a single unrelated
# build job fails. E.g Windows build job fails but everything else succeeds. In this case, we
# still want to run the tests for other platforms.
if: ${{ always() }}
needs: [build-jaxlib-artifact, build-cuda-artifacts]
uses: ./.github/workflows/bazel_cuda_non_rbe.yml
strategy:
Expand Down

0 comments on commit f7e105b

Please sign in to comment.