Cemberk · Cemberk · Jul 23, 2024 · Jul 23, 2024 · Jul 23, 2024 · Jul 23, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -142,6 +142,7 @@ jobs:
             - run: python utils/custom_init_isort.py --check_only
             - run: python utils/sort_auto_mappings.py --check_only
             - run: python utils/check_doc_toc.py
+            - run: python utils/check_docstrings.py --check_all
 
     check_repository_consistency:
         working_directory: ~/transformers
@@ -190,4 +191,4 @@ workflows:
             - check_circleci_user
             - check_code_quality
             - check_repository_consistency
-            - fetch_all_tests
+            - fetch_all_tests
diff --git a/.github/ISSUE_TEMPLATE/i18n.md b/.github/ISSUE_TEMPLATE/i18n.md
@@ -34,7 +34,7 @@ Some notes:
 
 ## Tutorial section
 - [ ] [pipeline_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md)
-- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/master/docs/source/autoclass_tutorial.md)
+- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/autoclass_tutorial.md)
 - [ ]  [preprocessing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.md)
 - [ ]  [training.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.md)
 - [ ]  [accelerate.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.md)

diff --git a/.github/workflows/self-pr-slow-ci.yml b/.github/workflows/self-pr-slow-ci.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     paths:
       - "src/transformers/models/*/modeling_*.py"
-      - "tests/models/*/test_*.py"
+      - "tests/**/test_*.py"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml
@@ -14,11 +14,14 @@ on:
       - ".github/**"
       - "templates/**"
       - "utils/**"
+  pull_request:
+     types: [opened, reopened, synchronize]
+     branches: ["main"]
 
 jobs:
   run_amd_ci:
     name: AMD mi210
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    if: (cancelled() != true) && (github.event_name != 'schedule') && (github.event_name == 'pull_request')
     uses: ./.github/workflows/self-push-amd.yml
     with:
       gpu_flavor: mi210

diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml
@@ -13,12 +13,15 @@ on:
       - "tests/**"
       - ".github/**"
       - "templates/**"
-      - "utils/**"
+      - "utils/**"    
+  pull_request:
+     types: [opened, reopened, synchronize]
+     branches: ["main"]
 
 jobs:
   run_amd_ci:
     name: AMD mi250
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    if: (cancelled() != true) && (github.event_name != 'schedule') && (github.event_name == 'pull_request')
     uses: ./.github/workflows/self-push-amd.yml
     with:
       gpu_flavor: mi250

diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml
@@ -18,37 +18,22 @@ env:
   HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
 
 jobs:
-  check_runner_status:
-    name: Check Runner Status
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout transformers
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
-
-      - name: Check Runner Status
-        run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-
   check_runners:
     name: Check Runners
-    needs: check_runner_status
     strategy:
       matrix:
         machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    runs-on: rocm
     container:
       image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
     steps:
       - name: ROCM-SMI
-        run: |
-          rocm-smi
-      - name: ROCM-INFO
         run: |
           rocminfo  | grep "Agent" -A 14
-      - name: Show ROCR environment
+      - name: Show HIP environment
         run: |
+          echo "HIP: $HIP_VISIBLE_DEVICES"
           echo "ROCR: $ROCR_VISIBLE_DEVICES"
 
   setup_gpu:
@@ -57,14 +42,21 @@ jobs:
     strategy:
       matrix:
         machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    runs-on: rocm
     container:
       image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
       test_map: ${{ steps.set-matrix.outputs.test_map }}
     steps:
+      - name: Remove transformers repository (installed during docker image build)
+        working-directory: /
+        shell: bash
+        run:  | 
+          rm -r transformers
+          git clone https://github.com/ROCmSoftwarePlatform/transformers.git
+
       # Necessary to get the correct branch name and commit SHA for `workflow_run` event
       # We also take into account the `push` event (we might want to test some changes in a branch)
       - name: Prepare custom environment variables
@@ -155,11 +147,23 @@ jobs:
       matrix:
         folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
         machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    runs-on: rocm
     container:
       image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
     steps:
+
+      - name: Remove transformers repository (installed during docker image build)
+        working-directory: /
+        shell: bash
+        run:  | 
+          rm -r transformers
+          git clone https://github.com/ROCmSoftwarePlatform/transformers.git
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
       # Necessary to get the correct branch name and commit SHA for `workflow_run` event
       # We also take into account the `push` event (we might want to test some changes in a branch)
       - name: Prepare custom environment variables
@@ -192,10 +196,6 @@ jobs:
           git checkout ${{ env.CI_SHA }}
           echo "log = $(git log -n 1)"
 
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
       - name: Echo folder ${{ matrix.folders }}
         shell: bash
         # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
@@ -209,13 +209,11 @@ jobs:
           echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
 
       - name: ROCM-SMI
-        run: |
-          rocm-smi
-      - name: ROCM-INFO
         run: |
           rocminfo  | grep "Agent" -A 14
-      - name: Show ROCR environment
+      - name: Show HIP environment
         run: |
+          echo "HIP: $HIP_VISIBLE_DEVICES"
           echo "ROCR: $ROCR_VISIBLE_DEVICES"
 
       - name: Environment
@@ -246,10 +244,9 @@ jobs:
 
   send_results:
     name: Send results to webhook
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     if: always()
     needs: [
-        check_runner_status,
         check_runners,
         setup_gpu,
         run_models_gpu,
@@ -261,7 +258,6 @@ jobs:
         shell: bash
         # For the meaning of these environment variables, see the job `Setup`
         run: |
-          echo "Runner availability: ${{ needs.check_runner_status.result }}"
           echo "Setup status: ${{ needs.setup_gpu.result }}"
           echo "Runner status: ${{ needs.check_runners.result }}"
 
@@ -303,27 +299,3 @@ jobs:
           git checkout ${{ env.CI_SHA }}
           echo "log = $(git log -n 1)"
 
-      - uses: actions/download-artifact@v4
-      - name: Send message to Slack
-        env:
-          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
-          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
-          CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
-          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
-          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }}
-          CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
-          CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
-          CI_SHA: ${{ env.CI_SHA }}
-          RUNNER_STATUS: ${{ needs.check_runner_status.result }}
-          RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
-          SETUP_STATUS: ${{ needs.setup_gpu.result }}
-
-        # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
-        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
-        run: |
-          pip install slack_sdk
-          pip show slack_sdk
-          python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"
diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml
@@ -10,20 +10,9 @@ jobs:
   trufflehog:
     runs-on: ubuntu-latest
     steps:
-    - shell: bash
-      run: |
-        if [ "${{ github.event_name }}" == "push" ]; then
-          echo "depth=$(($(jq length <<< '${{ toJson(github.event.commits) }}') + 2))" >> $GITHUB_ENV
-          echo "branch=${{ github.ref_name }}" >> $GITHUB_ENV
-        fi
-        if [ "${{ github.event_name }}" == "pull_request" ]; then
-          echo "depth=$((${{ github.event.pull_request.commits }}+2))" >> $GITHUB_ENV
-          echo "branch=${{ github.event.pull_request.head.ref }}" >> $GITHUB_ENV
-        fi
-    - name: Checkout code
-      uses: actions/checkout@v4
-      with:
-        ref: ${{env.branch}}
-        fetch-depth: ${{env.depth}}
-    - name: Secret Scanning
-      uses: trufflesecurity/trufflehog@main
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Secret Scanning
+        uses: trufflesecurity/trufflehog@main
diff --git a/Makefile b/Makefile
@@ -56,6 +56,7 @@ quality:
 	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
 	python utils/check_doc_toc.py
+	python utils/check_docstrings.py --check_all
 
 
 # Format source code automatically and check is there are any problems left that need manual fixing

diff --git a/docker/consistency.dockerfile b/docker/consistency.dockerfile
@@ -8,7 +8,7 @@ RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
 RUN uv pip install --no-cache-dir --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
 # tensorflow pin matching setup.py
 RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,vision,testing]"
+RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,torch-speech,vision,testing]"
 RUN git lfs install
 
 RUN pip uninstall -y transformers

diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
@@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).
 
-ARG PYTORCH='2.3.0'
+ARG PYTORCH='2.4.0'
 # (not always a valid torch version)
 ARG INTEL_TORCH_EXT='2.3.0'
 # Example: `cu102`, `cu113`, etc.

diff --git a/docker/transformers-pytorch-gpu/Dockerfile b/docker/transformers-pytorch-gpu/Dockerfile
@@ -11,7 +11,7 @@ ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
 
 # If set to nothing, will install the latest version
-ARG PYTORCH='2.3.0'
+ARG PYTORCH='2.4.0'
 ARG TORCH_VISION=''
 ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.

diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -99,6 +99,8 @@
     sections:
     - local: generation_strategies
       title: Customize the generation strategy
+    - local: kv_cache
+      title: Best Practices for Generation with Cache
     title: Generation
   - isExpanded: false
     sections:
@@ -118,7 +120,7 @@
   - local: custom_models
     title: Share a custom model
   - local: chat_templating
-    title: Templates for chat models
+    title: Chat templates
   - local: trainer
     title: Trainer
   - local: sagemaker
@@ -368,6 +370,8 @@
         title: ESM
       - local: model_doc/falcon
         title: Falcon
+      - local: model_doc/falcon_mamba
+        title: FalconMamba
       - local: model_doc/fastspeech2_conformer
         title: FastSpeech2Conformer
       - local: model_doc/flan-t5
@@ -436,6 +440,8 @@
         title: MADLAD-400
       - local: model_doc/mamba
         title: Mamba
+      - local: model_doc/mamba2
+        title: mamba2
       - local: model_doc/marian
         title: MarianMT
       - local: model_doc/markuplm
@@ -466,6 +472,8 @@
         title: MT5
       - local: model_doc/mvp
         title: MVP
+      - local: model_doc/nemotron
+        title: Nemotron
       - local: model_doc/nezha
         title: NEZHA
       - local: model_doc/nllb
@@ -500,6 +508,8 @@
         title: QDQBert
       - local: model_doc/qwen2
         title: Qwen2
+      - local: model_doc/qwen2_audio
+        title: Qwen2Audio
       - local: model_doc/qwen2_moe
         title: Qwen2MoE
       - local: model_doc/rag