Skip to content

Commit

Permalink
Add CI job testing offline analysis of nsys-jax output archives
Browse files Browse the repository at this point in the history
  • Loading branch information
olupton committed Nov 25, 2024
1 parent 5c9bdd2 commit b66834f
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 10 deletions.
17 changes: 9 additions & 8 deletions .github/container/nsys_jax/nsys_jax/scripts/nsys_jax.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,17 @@ def rep(x):
else
echo "Virtual environment already exists, not installing anything..."
fi
if [ -z ${{NSYS_JAX_INSTALL_SKIP_LAUNCH+x}} ]; then
# Pick up the current profile data by default
export NSYS_JAX_DEFAULT_PREFIX="${{PWD}}"
# https://setuptools.pypa.io/en/latest/userguide/datafiles.html#accessing-data-files-at-runtime
NOTEBOOK=$("${{BIN}}/python" -c 'from importlib.resources import files; print(files("nsys_jax") / "analyses" / "Analysis.ipynb")')
echo "Launching: cd ${{SCRIPT_DIR}} && ${{BIN}}/jupyter-lab ${{NOTEBOOK}}"
cd "${{SCRIPT_DIR}}" && "${{BIN}}/jupyter-lab" "${{NOTEBOOK}}"
# Pick up the current profile data by default
export NSYS_JAX_DEFAULT_PREFIX="${{PWD}}"
# https://setuptools.pypa.io/en/latest/userguide/datafiles.html#accessing-data-files-at-runtime
NOTEBOOK=$("${{BIN}}/python" -c 'from importlib.resources import files; print(files("nsys_jax") / "analyses" / "Analysis.ipynb")')
if [ -z ${{NSYS_JAX_IPYTHON_NOT_JUPYTER_LAB+x}} ]; then
CMD="${{BIN}}/jupyter-lab"
else
echo "Skipping launch of Jupyter Lab due to NSYS_JAX_INSTALL_SKIP_LAUNCH"
CMD="${{BIN}}/ipython"
fi
echo "Launching: cd ${{SCRIPT_DIR}} && ${{CMD}} ${{NOTEBOOK}}"
cd "${{SCRIPT_DIR}}" && "${{CMD}}" "${{NOTEBOOK}}"
"""


Expand Down
30 changes: 30 additions & 0 deletions .github/workflows/_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,36 @@ jobs:
*-execution-combine.log
secrets: inherit

# test-nsys-jax generates several fresh .zip archive outputs by running nsys-jax with real GPU hardware; this test
# runs on a regular GitHub Actions runner and checks that offline post-processing works in an environment that does
# not already have nsys-jax installed
test-nsys-jax-archive:
needs: test-nsys-jax
if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
strategy:
matrix:
os: [ubuntu-22.04, ubuntu-24.04, macOS-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Download nsys-jax output .zip files
uses: actions/download-artifact@v4
with:
name: nsys-jax-unit-test-A100
- name: Extract archives and execute install scripts
run: |
for zip in $(ls *.zip); do
ZIP="${PWD}/${zip}"
pushd $(mktemp -d)
unzip "${ZIP}"
ls -l
# TODO: verify this isn't needed, or make sure it isn't needed
chmod 755 install.sh
# Run the notebook with IPython, not Jupyter Lab, so it exits and prints something informative to stdout
# Skip executing Jupyter lab
NSYS_JAX_IPYTHON_NOT_JUPYTER_LAB=1 ./install.sh
popd
done
# test-equinox:
# needs: build-equinox
# if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/nsys-jax.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,6 @@ jobs:
# Run with ipython for the sake of getting a clear error message
ipython "${NOTEBOOK}"
# TODO: add a test that generates + runs install.sh

# This input file was generated with something like
# srun -n 1 --container-name=XXX --container-image=ghcr.io/nvidia/jax:pax-2024-07-06
# env NPROC=4 XLA_FLAGS=--xla_gpu_enable_latency_hiding_scheduler\
Expand Down

0 comments on commit b66834f

Please sign in to comment.