From b66834f3fda5513733fd5be7340100746cf533dd Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 25 Nov 2024 02:43:36 -0800 Subject: [PATCH] Add CI job testing offline analysis of nsys-jax output archives --- .../nsys_jax/nsys_jax/scripts/nsys_jax.py | 17 ++++++----- .github/workflows/_ci.yaml | 30 +++++++++++++++++++ .github/workflows/nsys-jax.yaml | 2 -- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/.github/container/nsys_jax/nsys_jax/scripts/nsys_jax.py b/.github/container/nsys_jax/nsys_jax/scripts/nsys_jax.py index 0666a41cf..87702d6b7 100644 --- a/.github/container/nsys_jax/nsys_jax/scripts/nsys_jax.py +++ b/.github/container/nsys_jax/nsys_jax/scripts/nsys_jax.py @@ -72,16 +72,17 @@ def rep(x): else echo "Virtual environment already exists, not installing anything..." fi -if [ -z ${{NSYS_JAX_INSTALL_SKIP_LAUNCH+x}} ]; then - # Pick up the current profile data by default - export NSYS_JAX_DEFAULT_PREFIX="${{PWD}}" - # https://setuptools.pypa.io/en/latest/userguide/datafiles.html#accessing-data-files-at-runtime - NOTEBOOK=$("${{BIN}}/python" -c 'from importlib.resources import files; print(files("nsys_jax") / "analyses" / "Analysis.ipynb")') - echo "Launching: cd ${{SCRIPT_DIR}} && ${{BIN}}/jupyter-lab ${{NOTEBOOK}}" - cd "${{SCRIPT_DIR}}" && "${{BIN}}/jupyter-lab" "${{NOTEBOOK}}" +# Pick up the current profile data by default +export NSYS_JAX_DEFAULT_PREFIX="${{PWD}}" +# https://setuptools.pypa.io/en/latest/userguide/datafiles.html#accessing-data-files-at-runtime +NOTEBOOK=$("${{BIN}}/python" -c 'from importlib.resources import files; print(files("nsys_jax") / "analyses" / "Analysis.ipynb")') +if [ -z ${{NSYS_JAX_IPYTHON_NOT_JUPYTER_LAB+x}} ]; then + CMD="${{BIN}}/jupyter-lab" else - echo "Skipping launch of Jupyter Lab due to NSYS_JAX_INSTALL_SKIP_LAUNCH" + CMD="${{BIN}}/ipython" fi +echo "Launching: cd ${{SCRIPT_DIR}} && ${{CMD}} ${{NOTEBOOK}}" +cd "${{SCRIPT_DIR}}" && "${{CMD}}" "${{NOTEBOOK}}" """ diff --git a/.github/workflows/_ci.yaml b/.github/workflows/_ci.yaml index 9bca25bbf..e9427fc85 100644 --- a/.github/workflows/_ci.yaml +++ b/.github/workflows/_ci.yaml @@ -403,6 +403,36 @@ jobs: *-execution-combine.log secrets: inherit + # test-nsys-jax generates several fresh .zip archive outputs by running nsys-jax with real GPU hardware; this test + # runs on a regular GitHub Actions runner and checks that offline post-processing works in an environment that does + # not already have nsys-jax installed + test-nsys-jax-archive: + needs: test-nsys-jax + if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a + strategy: + matrix: + os: [ubuntu-22.04, ubuntu-24.04, macOS-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Download nsys-jax output .zip files + uses: actions/download-artifact@v4 + with: + name: nsys-jax-unit-test-A100 + - name: Extract archives and execute install scripts + run: | + for zip in $(ls *.zip); do + ZIP="${PWD}/${zip}" + pushd $(mktemp -d) + unzip "${ZIP}" + ls -l + # TODO: verify this isn't needed, or make sure it isn't needed + chmod 755 install.sh + # Run the notebook with IPython, not Jupyter Lab, so it exits and prints something informative to stdout + # Skip executing Jupyter lab + NSYS_JAX_IPYTHON_NOT_JUPYTER_LAB=1 ./install.sh + popd + done + # test-equinox: # needs: build-equinox # if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a diff --git a/.github/workflows/nsys-jax.yaml b/.github/workflows/nsys-jax.yaml index 0c73273bf..e15cd557f 100644 --- a/.github/workflows/nsys-jax.yaml +++ b/.github/workflows/nsys-jax.yaml @@ -116,8 +116,6 @@ jobs: # Run with ipython for the sake of getting a clear error message ipython "${NOTEBOOK}" - # TODO: add a test that generates + runs install.sh - # This input file was generated with something like # srun -n 1 --container-name=XXX --container-image=ghcr.io/nvidia/jax:pax-2024-07-06 # env NPROC=4 XLA_FLAGS=--xla_gpu_enable_latency_hiding_scheduler\