Skip to content

Commit

Permalink
ci: adding HPU agents
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda committed Feb 13, 2023
1 parent 7acaf26 commit 2bb68e5
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 4 deletions.
8 changes: 7 additions & 1 deletion .azure/ci-testig-parameterized.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ schedules:
include: ["main"]

jobs:
- template: testing-template.yml
- template: cuda-template.yml
parameters:
configs:
- "Lightning-AI/metrics_pl-develop.yaml"
Expand All @@ -24,3 +24,9 @@ jobs:
- "microsoft/deepspeed-release.yaml"
- "neptune-ai/lightning_integration.yaml"
- "manujosephv/pytorch-tabular_lit-release.yaml"

- template: habana-template.yml
parameters:
configs:
- "Lightning-AI/metrics_pl-develop.yaml"
- "Lightning-AI/metrics_pl-release.yaml"
7 changes: 4 additions & 3 deletions .azure/testing-template.yml → .azure/cuda-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ jobs:
timeoutInMinutes: 75
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: 2
workspace:
clean: all

pool: 'lit-rtx-3090'
# this need to have installed docker in the base image...
Expand All @@ -47,6 +45,9 @@ jobs:
# image: "nvcr.io/nvidia/pytorch:21.11-py3"
image: "pytorch/pytorch:1.13.0-cuda11.6-cudnn8-runtime"
options: "--gpus=all --shm-size=8g -v /usr/bin/docker:/tmp/docker:ro"
workspace:
clean: all

steps:

- bash: |
Expand All @@ -70,7 +71,7 @@ jobs:
- bash: |
sudo apt-get update -q --fix-missing
sudo apt-get install -q -y build-essential gcc g++ cmake git unzip tree --no-install-recommends
sudo apt-get install -q -y --no-install-recommends build-essential gcc g++ cmake git unzip tree
# Python's dependencies
pip --version
pip install -r requirements.txt
Expand Down
106 changes: 106 additions & 0 deletions .azure/habana-template.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
jobs:

- job: check_diff
pool:
vmImage: 'Ubuntu-20.04'
steps:
- bash: |
pip --version
pip install -q -r requirements.txt
pip list
displayName: 'Install dependencies'
- script: |
echo $PR_NUMBER
CONFIGS=$(python _actions/assistant.py changed_configs $PR_NUMBER --as_list=False 2>&1)
printf "Changed configs: $CONFIGS\n"
echo "##vso[task.setvariable variable=diff;isOutput=true]$CONFIGS"
name: files
env:
PR_NUMBER: "$(System.PullRequest.PullRequestNumber)"
displayName: 'Config diff'
- ${{ each config in parameters.configs }}:
- job:
displayName: ${{config}}
dependsOn: check_diff
variables:
# map the output variable from A into this job
configs: $[ dependencies.check_diff.outputs['files.diff'] ]
config: "${{ config }}"

condition: or(eq(variables['Build.SourceBranch'], 'refs/heads/main'), contains(variables['configs'], variables['config']))
# how long to run the job before automatically cancelling
timeoutInMinutes: 75
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: 2

pool: 'intel-hpus'
# this need to have installed docker in the base image...
container:
image: "vault.habana.ai/gaudi-docker/1.8.0/ubuntu20.04/habanalabs/pytorch-installer-1.13.1:latest"
options: "--runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host --shm-size=4g -v /usr/bin/docker:/tmp/docker:ro"
workspace:
clean: all

steps:

- script: |
container_id=$(head -1 /proc/self/cgroup|cut -d/ -f3)
/tmp/docker exec -t -u 0 $container_id \
sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confold" -y install sudo"
echo "##vso[task.setvariable variable=CONTAINER_ID]$container_id"
displayName: 'Install Sudo in container (thanks Microsoft!)'
- bash: |
whoami && id
sudo apt-get install -q -y hwinfo
hwinfo --short
python --version
python --version
pip --version
pip list
displayName: 'Image info & HW'
- bash: |
sudo apt-get update -q --fix-missing
sudo apt-get install -q -y --no-install-recommends build-essential gcc g++ cmake git unzip tree
# Python's dependencies
pip --version
pip install -r requirements.txt
pip list
displayName: 'Install dependencies'
#- bash: |
# echo $CONTAINER_ID
# displayName: 'Sanity check'

- bash: |
python _actions/assistant.py prepare_env --config_file=${{config}} > prepare_env.sh
cat prepare_env.sh
displayName: 'Create scripts'
- bash: |
bash prepare_env.sh
# pip list
tree .
displayName: 'Prepare env.'
- script: |
ENVS=$(python _actions/assistant.py list_env --config_file=${{config}} --export 2>&1)
printf "PyTest env. variables: $ENVS\n"
echo "##vso[task.setvariable variable=envs;isOutput=true]$ENVS"
ARGS=$(python _actions/assistant.py specify_tests --config_file=${{config}} 2>&1)
printf "PyTest arguments: $ARGS\n"
echo "##vso[task.setvariable variable=args;isOutput=true]$ARGS"
name: testing
displayName: 'testing specs'
- bash: |
$(testing.envs)
python -m pytest $(testing.args) -v
workingDirectory: _integrations
displayName: 'Integration tests'
# ToDo: add Slack notification

0 comments on commit 2bb68e5

Please sign in to comment.