[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
Lightning-AI · Apr 6, 2024 · 22dfb63 · 22dfb63
1 parent 95572e5
commit 22dfb63
Show file tree

Hide file tree

Showing 9 changed files with 346 additions and 345 deletions.
diff --git a/.azure/docker-build.yml b/.azure/docker-build.yml
@@ -1,6 +1,6 @@
 trigger:
   tags:
-    include: ['*']
+    include: ["*"]
   branches:
     include: ["main"]
   paths:
@@ -16,7 +16,7 @@ trigger:
 
 pr:
   branches:
-    include: ['*']
+    include: ["*"]
   paths:
     include:
       - ".azure/docker-build.yml"
@@ -29,35 +29,41 @@ pr:
       - "**/*.md"
 
 schedules:
-- cron: '0 */2 * * *'
-  displayName: rebuild dockers for CI every 2 hours
-  branches:
-    include: ["main"]
+  - cron: "0 */2 * * *"
+    displayName: rebuild dockers for CI every 2 hours
+    branches:
+      include: ["main"]
 
 jobs:
   - job: build_push
     strategy:
       #maxParallel: "3"
       matrix:
         # CUDA 12.1
-        'cuda 12.1 | torch 2.2 | cudnn FE v1.2':
-          {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.2.1', TRITON_VERSION: '2.2.0', CUDNN_FRONTEND: "1.2.1"}
-        'cuda 12.1 | torch 2.3 /test | cudnn FE v1.2':
-          {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.3.0', TRITON_VERSION: '2.2.0', TORCH_INSTALL: 'test', CUDNN_FRONTEND: "1.2.1"}
-        'cuda 12.1 | torch 2.4 /nightly | cudnn FE v1.2':
-          {CUDA_VERSION: '12.1.1', TORCH_VERSION: 'main', TORCH_INSTALL: 'source', CUDNN_FRONTEND: "1.2.1"}
+        "cuda 12.1 | torch 2.2 | cudnn FE v1.2":
+          { CUDA_VERSION: "12.1.1", TORCH_VERSION: "2.2.1", TRITON_VERSION: "2.2.0", CUDNN_FRONTEND: "1.2.1" }
+        "cuda 12.1 | torch 2.3 /test | cudnn FE v1.2":
+          {
+            CUDA_VERSION: "12.1.1",
+            TORCH_VERSION: "2.3.0",
+            TRITON_VERSION: "2.2.0",
+            TORCH_INSTALL: "test",
+            CUDNN_FRONTEND: "1.2.1",
+          }
+        "cuda 12.1 | torch 2.4 /nightly | cudnn FE v1.2":
+          { CUDA_VERSION: "12.1.1", TORCH_VERSION: "main", TORCH_INSTALL: "source", CUDNN_FRONTEND: "1.2.1" }
         #'cuda 12.1': # this version - '8.9.5.29-1+cuda12.1' for 'libcudnn8' was not found
     # how much time to give 'run always even if cancelled tasks' before stopping them
     cancelTimeoutInMinutes: "2"
     timeoutInMinutes: "95"
     variables:
-      UBUNTU_VERSION: '22.04'
-      PYTHON_VERSION: '3.10'
-      APEX_CHECKOUT: 'master'
-      imageRepository: 'pytorchlightning/lightning-thunder'
-      dockerfilePath: 'dockers/ubuntu-cuda/Dockerfile'
-      imageTag: 'ubuntu$(UBUNTU_VERSION)-cuda$(CUDA_VERSION)-cudnn-fe$(CUDNN_FRONTEND)-py$(PYTHON_VERSION)-pt_${TORCH_VERSION/v/}-apex'
-    pool: 'lit-rtx-3090'
+      UBUNTU_VERSION: "22.04"
+      PYTHON_VERSION: "3.10"
+      APEX_CHECKOUT: "master"
+      imageRepository: "pytorchlightning/lightning-thunder"
+      dockerfilePath: "dockers/ubuntu-cuda/Dockerfile"
+      imageTag: "ubuntu$(UBUNTU_VERSION)-cuda$(CUDA_VERSION)-cudnn-fe$(CUDNN_FRONTEND)-py$(PYTHON_VERSION)-pt_${TORCH_VERSION/v/}-apex"
+    pool: "lit-rtx-3090"
     workspace:
       clean: all
     steps:
@@ -84,7 +90,7 @@ jobs:
             --build-arg APEX_CHECKOUT="$(APEX_CHECKOUT)" \
             . --no-cache
         timeoutInMinutes: "95"
-        displayName: 'Build base image'
+        displayName: "Build base image"
 
       - bash: |
           docker image ls | grep $(imageRepository)
@@ -95,7 +101,7 @@ jobs:
             bash -c  "cd /workspace && ls -lh . && \
                       pip install -q . && \
                       bash azure/sanity-check.sh"
-        displayName: 'Sanity check'
+        displayName: "Sanity check"
 
       - bash: |
           set -e
@@ -104,7 +110,7 @@ jobs:
           docker push $(imageRepository):$(imageTag)
         condition: ne(variables['Build.Reason'], 'PullRequest')
         timeoutInMinutes: "35"
-        displayName: 'Push base image'
+        displayName: "Push base image"
 
       #- task: Docker@1
       #  inputs:

diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml
@@ -1,6 +1,6 @@
 trigger:
   tags:
-    include: ['*']
+    include: ["*"]
   branches:
     include:
       - "main"
@@ -9,34 +9,34 @@ trigger:
 
 pr:
   branches:
-    include: ['*']
+    include: ["*"]
 
 jobs:
   - job: testing
     strategy:
       matrix:
         # CUDA 12.1
-        'ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.2 | regular':
-          docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.2.1-apex'
-          CUDA_VERSION_MM: '121'
-        'ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.2 | distributed':
-          docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.2.1-apex'
-          CUDA_VERSION_MM: '121'
-          testing: 'distributed'
-        'ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.3 | regular':
-          docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.3.0-apex'
-          CUDA_VERSION_MM: '121'
-        'ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.3 | distributed':
-          docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.3.0-apex'
-          CUDA_VERSION_MM: '121'
-          testing: 'distributed'
-        'ubuntu22.04 | cuda 12.1 | python 3.10 | torch-nightly | regular':
-          docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_main-apex'
-          CUDA_VERSION_MM: '121'
-        'ubuntu22.04 | cuda 12.1 | python 3.10 | torch-nightly | distributed':
-          docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_main-apex'
-          CUDA_VERSION_MM: '121'
-          testing: 'distributed'
+        "ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.2 | regular":
+          docker-image: "ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.2.1-apex"
+          CUDA_VERSION_MM: "121"
+        "ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.2 | distributed":
+          docker-image: "ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.2.1-apex"
+          CUDA_VERSION_MM: "121"
+          testing: "distributed"
+        "ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.3 | regular":
+          docker-image: "ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.3.0-apex"
+          CUDA_VERSION_MM: "121"
+        "ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.3 | distributed":
+          docker-image: "ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.3.0-apex"
+          CUDA_VERSION_MM: "121"
+          testing: "distributed"
+        "ubuntu22.04 | cuda 12.1 | python 3.10 | torch-nightly | regular":
+          docker-image: "ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_main-apex"
+          CUDA_VERSION_MM: "121"
+        "ubuntu22.04 | cuda 12.1 | python 3.10 | torch-nightly | distributed":
+          docker-image: "ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_main-apex"
+          CUDA_VERSION_MM: "121"
+          testing: "distributed"
     # how much time to give 'run always even if cancelled tasks' before stopping them
     cancelTimeoutInMinutes: "2"
     pool: "lit-rtx-3090"
@@ -52,102 +52,101 @@ jobs:
     workspace:
       clean: all
     steps:
+      - bash: |
+          echo $(DEVICES)
+          echo "CUDA_VERSION_MM=$CUDA_VERSION_MM"
+          lspci | egrep 'VGA|3D'
+          whereis nvidia
+          nvidia-smi
+          which python && which pip
+          python --version
+          pip --version
+          pip list
+          echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
+        displayName: "Image info & NVIDIA"
 
-    - bash: |
-        echo $(DEVICES)
-        echo "CUDA_VERSION_MM=$CUDA_VERSION_MM"
-        lspci | egrep 'VGA|3D'
-        whereis nvidia
-        nvidia-smi
-        which python && which pip
-        python --version
-        pip --version
-        pip list
-        echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
-      displayName: 'Image info & NVIDIA'
+      - bash: |
+          # drop pt from requirements so not to interfere with the existing one
+          bash .azure/remove-torch-lines.sh requirements/base.txt
+          cat requirements/base.txt
 
-    - bash: |
-        # drop pt from requirements so not to interfere with the existing one
-        bash .azure/remove-torch-lines.sh requirements/base.txt
-        cat requirements/base.txt
+          # double check on test requirements
+          pip install -r requirements/test.txt
 
-        # double check on test requirements
-        pip install -r requirements/test.txt
+          # https://docs.codecov.com/docs/codecov-uploader
+          curl -Os https://uploader.codecov.io/latest/linux/codecov
+          chmod +x codecov
 
-        # https://docs.codecov.com/docs/codecov-uploader
-        curl -Os https://uploader.codecov.io/latest/linux/codecov
-        chmod +x codecov
+          # install this package
+          python setup.py develop
+        displayName: "Install package & ..."
 
-        # install this package
-        python setup.py develop
-      displayName: 'Install package & ...'
+      - bash: bash .azure/sanity-check.sh
+        displayName: "Sanity check / details"
 
-    - bash: bash .azure/sanity-check.sh
-      displayName: 'Sanity check / details'
+      - bash: |
+          set -ex
+          coverage run --source thunder -m \
+            pytest thunder/tests/ \
+              -m "not standalone" \
+              -v --datefmt="%Y%m%d-%H:%M:%S.%f" \
+              --timeout=240 \
+              --random-order-seed=42 \
+              --durations=250 \
+              --timeout=240 \
+              --numprocesses=9 \
+              --ignore=thunder/tests/distributed --ignore=thunder/tests/test_networks.py
+          # compile coverage results
+          python -m coverage report
+          python -m coverage xml
+          # upload to codecov
+          ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
+            --flags=gpu,pytest,regular --name="GPU-coverage" --env=linux,azure
+        condition: ne(variables['testing'], 'distributed')
+        timeoutInMinutes: "30"
+        displayName: "Testing: regular"
 
-    - bash: |
-        set -ex
-        coverage run --source thunder -m \
-          pytest thunder/tests/ \
-            -m "not standalone" \
-            -v --datefmt="%Y%m%d-%H:%M:%S.%f" \
-            --timeout=240 \
-            --random-order-seed=42 \
-            --durations=250 \
-            --timeout=240 \
-            --numprocesses=9 \
-            --ignore=thunder/tests/distributed --ignore=thunder/tests/test_networks.py
-        # compile coverage results
-        python -m coverage report
-        python -m coverage xml
-        # upload to codecov
-        ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
-          --flags=gpu,pytest,regular --name="GPU-coverage" --env=linux,azure
-      condition: ne(variables['testing'], 'distributed')
-      timeoutInMinutes: "30"
-      displayName: 'Testing: regular'
+      - bash: |
+          set -ex
+          # these test need to run in single thread as they occurs with CUDA OOM
+          coverage run --source thunder -m \
+             pytest \
+               thunder/tests/test_networks.py \
+               -m "not standalone" \
+               -v --durations=0 \
+               --random-order-seed=42 \
+               --numprocesses=3
+          # compile coverage results
+          python -m coverage report
+          python -m coverage xml
+          # upload to codecov
+          ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
+            --flags=gpu,pytest,networks --name="GPU-coverage" --env=linux,azure
+        condition: ne(variables['testing'], 'distributed')
+        timeoutInMinutes: "15"
+        displayName: "Testing: networks"
 
-    - bash: |
-        set -ex
-        # these test need to run in single thread as they occurs with CUDA OOM
-        coverage run --source thunder -m \
-           pytest \
-             thunder/tests/test_networks.py \
-             -m "not standalone" \
-             -v --durations=0 \
-             --random-order-seed=42 \
-             --numprocesses=3
-        # compile coverage results
-        python -m coverage report
-        python -m coverage xml
-        # upload to codecov
-        ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
-          --flags=gpu,pytest,networks --name="GPU-coverage" --env=linux,azure
-      condition: ne(variables['testing'], 'distributed')
-      timeoutInMinutes: "15"
-      displayName: 'Testing: networks'
+      #- bash: |
+      #    bash .azure/run_standalone_tests.sh \
+      #      "thunder/tests" \
+      #      "-m standalone --ignore=thunder/tests/distributed"
+      #  condition: ne(variables['testing'], 'distributed')
+      #  displayName: 'Testing: standalone'
 
-    #- bash: |
-    #    bash .azure/run_standalone_tests.sh \
-    #      "thunder/tests" \
-    #      "-m standalone --ignore=thunder/tests/distributed"
-    #  condition: ne(variables['testing'], 'distributed')
-    #  displayName: 'Testing: standalone'
-
-    - bash: |
-        set -ex
-        # run all found tests in given past as standalone
-        bash scripts/run_standalone_tests.sh "thunder/tests/distributed"
-        # compile coverage results
-        # TODO: collect and merge reports
-        #  python -m coverage report
-        #  python -m coverage xml
-        #  # upload to codecov
-        #  ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
-        #    --flags=gpu,pytest,distributed --name="GPU-coverage" --env=linux,azure
-      condition: eq(variables['testing'], 'distributed')
-      timeoutInMinutes: "20"
-      displayName: 'Testing: distributed'
+      - bash: |
+          set -ex
+          # run all found tests in given past as standalone
+          bash scripts/run_standalone_tests.sh "thunder/tests/distributed"
+          # compile coverage results
+          # TODO: collect and merge reports
+          #  python -m coverage report
+          #  python -m coverage xml
+          #  # upload to codecov
+          #  ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
+          #    --flags=gpu,pytest,distributed --name="GPU-coverage" --env=linux,azure
+        condition: eq(variables['testing'], 'distributed')
+        timeoutInMinutes: "20"
+        displayName: "Testing: distributed"
 
     # todo (mruberry): decide whether this should be here or in another workflow
     #- bash: |