Skip to content

Commit

Permalink
Unit tests setup own venv (microsoft#2628)
Browse files Browse the repository at this point in the history
add reusable workflow that sets up fresh venv for each test and prints relevant environment info
  • Loading branch information
mrwyattii authored Dec 20, 2022
1 parent 8c56c25 commit 6fff50f
Show file tree
Hide file tree
Showing 15 changed files with 110 additions and 145 deletions.
15 changes: 4 additions & 11 deletions .github/workflows/amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,14 @@ jobs:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2

# Runs a single command using the runners shell
- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
rocm-smi --showhw
which python
python --version
which hipcc
hipcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/rocm5.1.1
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
sudo apt-get update
sudo apt-get install -y libaio-dev
- name: Install transformers
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/formatting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
run: |
which python
python --version
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install deepspeed
run: |
Expand Down
14 changes: 4 additions & 10 deletions .github/workflows/nv-accelerate-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,17 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu111
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed
pip install .[dev,autotuning]
ds_report
Expand Down
15 changes: 4 additions & 11 deletions .github/workflows/nv-inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,11 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -41,12 +36,10 @@ jobs:
git clone https://github.com/huggingface/transformers
cd transformers
git rev-parse --short HEAD
pip uninstall --yes transformers
pip install .
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed
pip install .[dev,1bit,autotuning,inf]
ds_report
Expand Down
15 changes: 4 additions & 11 deletions .github/workflows/nv-lightning-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,17 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision
pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed
pip install .[dev,autotuning]
ds_report
Expand All @@ -49,7 +43,6 @@ jobs:
- name: PyTorch Lightning Tests
run: |
if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
pip uninstall --yes pytorch-lightning
pip install pytorch-lightning
pip install "protobuf<4.21.0"
cd tests
Expand Down
24 changes: 11 additions & 13 deletions .github/workflows/nv-megatron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,31 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Install
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed
pip install .[dev]
ds_report
- name: Install apex
run: |
pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" git+https://github.com/NVIDIA/apex.git
- name: Python environment
run: |
pip list
- name: Unit tests
- name: Megatron unit tests
run: |
git clone --branch mrwyattii/add-unit-test https://github.com/microsoft/Megatron-DeepSpeed.git
git clone --branch mrwyattii/fix-deprecated-numpy-types https://github.com/microsoft/Megatron-DeepSpeed.git
cd Megatron-DeepSpeed
pip install .
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
Expand Down
29 changes: 16 additions & 13 deletions .github/workflows/nv-mii.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,34 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Install MII
- name: Install transformers
run: |
git clone https://github.com/huggingface/transformers
cd transformers
# if needed switch to the last known good SHA until transformers@master is fixed
# git checkout 1cc453d33
git rev-parse --short HEAD
pip install .
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed deepspeed-mii transformers
pip install .[dev]
pip install git+https://github.com/huggingface/transformers.git
ds_report
- name: Python environment
run: |
pip list
- name: Unit tests
- name: MII unit tests
run: |
git clone https://github.com/microsoft/DeepSpeed-MII.git
cd DeepSpeed-MII
Expand Down
16 changes: 4 additions & 12 deletions .github/workflows/nv-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,11 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -36,18 +31,15 @@ jobs:
# if needed switch to the last known good SHA until transformers@master is fixed
# git checkout 1cc453d33
git rev-parse --short HEAD
pip uninstall --yes transformers
pip install .
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed
pip install .[dev,1bit,autotuning,inf]
ds_report
- name: Install lm-eval
run: |
pip uninstall --yes lm-eval
pip install git+https://github.com/EleutherAI/lm-evaluation-harness
# This is required until lm-eval makes a new release. v0.2.0 is
# broken for latest version of transformers
Expand Down
15 changes: 4 additions & 11 deletions .github/workflows/nv-torch-latest-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,11 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -43,12 +38,10 @@ jobs:
# if needed switch to the last known good SHA until transformers@master is fixed
# git checkout 1cc453d33
git rev-parse --short HEAD
pip uninstall --yes transformers
pip install .
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed
pip install .[dev,1bit,autotuning]
ds_report
Expand Down
15 changes: 4 additions & 11 deletions .github/workflows/nv-torch-nightly-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,11 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu116
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -36,12 +31,10 @@ jobs:
# if needed switch to the last known good SHA until transformers@master is fixed
# git checkout 1cc453d33
git rev-parse --short HEAD
pip uninstall --yes transformers
pip install .
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed
pip install .[dev,1bit,autotuning]
ds_report
Expand Down
15 changes: 4 additions & 11 deletions .github/workflows/nv-torch18-p40.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,11 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: environment
- id: setup-venv
uses: ./.github/workflows/setup-venv

- name: Install pytorch
run: |
echo "JobID: $AISC_NODE_INSTANCE_ID"
nvidia-smi
which python
python --version
which nvcc
nvcc --version
pip install --upgrade pip
pip uninstall --yes torch torchvision triton
pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cu101
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -43,12 +38,10 @@ jobs:
# if needed switch to the last known good SHA until transformers@master is fixed
# git checkout 1cc453d33
git rev-parse --short HEAD
pip uninstall --yes transformers
pip install .
- name: Install deepspeed
run: |
pip uninstall --yes deepspeed
pip install .[dev,1bit,autotuning]
ds_report
Expand Down
Loading

0 comments on commit 6fff50f

Please sign in to comment.