Merge branch 'main' into add-modernbert-onnx

huggingface · Dec 20, 2024 · 3f50f31 · 3f50f31
2 parents baaca83 + d21256c
commit 3f50f31
Show file tree

Hide file tree

Showing 48 changed files with 2,373 additions and 101 deletions.
diff --git a/.github/workflows/test_bettertransformer.yml b/.github/workflows/test_bettertransformer.yml
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: [3.9]
-        os: [ubuntu-20.04, macos-14]
+        os: [ubuntu-20.04]
 
     runs-on: ${{ matrix.os }}
     steps:

diff --git a/.github/workflows/test_executorch_export.yml b/.github/workflows/test_executorch_export.yml
@@ -0,0 +1,35 @@
+name: ExecuTorch Export / Python - Test
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.10', '3.11', '3.12']
+        os: [macos-15]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies for ExecuTorch
+        run: |
+          pip install .[tests,exporters-executorch]
+          pip list
+      - name: Run tests
+        working-directory: tests
+        run: |
+          RUN_SLOW=1 pytest executorch/export/test_*.py -s -vvvv --durations=0
diff --git a/.github/workflows/test_executorch_runtime.yml b/.github/workflows/test_executorch_runtime.yml
@@ -0,0 +1,42 @@
+name: ExecuTorch Runtime / Python - Test
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.10', '3.11', '3.12']
+        os: [macos-15]
+        test-modeling:
+          - test_modeling_gemma2.py
+          - test_modeling_gemma.py
+          - test_modeling_llama.py
+          - test_modeling_olmo.py
+          - test_modeling.py
+          - test_modeling_qwen2.py
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies for ExecuTorch
+        run: |
+          pip install .[tests,exporters-executorch]
+          pip list
+      - name: Run tests
+        working-directory: tests
+        run: |
+          RUN_SLOW=1 pytest executorch/runtime/${{ matrix.test-modeling }} -s -vvvv --durations=0
diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml
@@ -18,7 +18,7 @@ jobs:
       fail-fast: false
       matrix:
         transformers-version: ["latest"]
-        os: [ubuntu-20.04, windows-2019, macos-15]
+        os: [ubuntu-20.04, windows-2019] # TODO : add macos-15 after mps fix
         include:
           - transformers-version: "4.36.*"
             os: ubuntu-20.04

diff --git a/docs/Dockerfile b/docs/Dockerfile
@@ -1,4 +1,4 @@
-FROM nikolaik/python-nodejs:python3.9-nodejs18
+FROM nikolaik/python-nodejs:python3.11-nodejs23
 
 ARG commit_sha
 ARG clone_url
@@ -8,4 +8,4 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder.git
 
 RUN git clone $clone_url && cd optimum && git checkout $commit_sha
-RUN python3 -m pip install --no-cache-dir ./optimum[onnxruntime,benchmark,quality,exporters-tf,doc-build,diffusers]
+RUN python3 -m pip install --no-cache-dir ./optimum[onnxruntime,benchmark,quality,exporters-executorch,doc-build,diffusers]
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -81,6 +81,23 @@
       title: Reference
       isExpanded: false
     title: "ONNX"
+  - sections:
+    - local: exporters/executorch/overview
+      title: Overview
+    - sections:
+      - local: exporters/executorch/usage_guides/export_a_model
+        title: Export a model to ExecuTorch
+      - local: exporters/executorch/usage_guides/contribute
+        title: Add support for exporting an architecture to ExecuTorch
+      title: How-to guides
+    - sections:
+      - local: exporters/executorch/package_reference/configuration
+        title: ExecuTorch configurations
+      - local: exporters/executorch/package_reference/export
+        title: Export functions
+      title: Reference
+      isExpanded: false
+    title: "ExecuTorch"
   - sections:
     - local: exporters/tflite/overview
       title: Overview

diff --git a/docs/source/exporters/executorch/overview.mdx b/docs/source/exporters/executorch/overview.mdx
@@ -0,0 +1,26 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Overview
+
+🤗 Optimum handles the export of PyTorch to ExecuTorch in the `exporters.executorch` module. It provides classes, functions, and a command line interface to perform the export easily.
+
+Supported architectures from [🤗 Transformers](https://huggingface.co/docs/transformers/index):
+
+- Gemma
+- Gemma2
+- Llama2
+- Llama3(Llama3.2)
+- OLMo
+- Qwen2(Qwen2.5)
+
+There are many more models are supported by ExecuTorch, we will add those models to Optimum over time. Read more at [pytorch/executorch/examples/](https://github.com/pytorch/executorch/tree/main/examples)
diff --git a/docs/source/exporters/executorch/package_reference/configuration.mdx b/docs/source/exporters/executorch/package_reference/configuration.mdx
@@ -0,0 +1,54 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Configuration for ExecuTorch Export
+
+ExecuTorch export provides a flexible configuration mechanism through dynamic registration, enabling users to have
+complete control over the export process. The configuration system is divided into task configurations and recipe
+configurations, each addressing specific aspects of the export pipeline.
+
+
+## Task Configurations
+
+Task configurations determine how a Hugging Face model should be loaded and prepared for export, tailored to specific tasks.
+
+For instance, when exporting a model for a text generation task, the provided configuration utilizes **static caching** and
+**SDPA (Scaled Dot-Product Attention)** for inference optimization.
+
+By leveraging task configurations, users can ensure that their models are appropriately prepared for efficient execution on
+the ExecuTorch backend.
+
+[[autodoc]] exporters.executorch.task_registry.discover_tasks
+
+[[autodoc]] exporters.executorch.task_registry.register_task
+
+[[autodoc]] exporters.executorch.tasks.causal_lm.load_causal_lm_model
+
+
+## Recipe Configurations
+
+Recipe configurations control the specifics of lowering an eager PyTorch module to the ExecuTorch backend. These
+configurations allow users to:
+
+- Specify whether and how to **quantize** the model.
+- Delegate computation to various accelerators, such as **CPU**, **GPU**, **NPU**, **DSP**, and others.
+- Define **custom transformation passes**.
+- Implement advanced techniques like memory planning algorithms to optimize resource utilization.
+
+[[autodoc]] exporters.executorch.recipe_registry.discover_recipes
+
+[[autodoc]] exporters.executorch.recipe_registry.register_recipe
+
+[[autodoc]] exporters.executorch.recipes.xnnpack.export_to_executorch_with_xnnpack
+
+The combination of task and recipe configurations ensures that users can customize both the high-level task setup
+and the low-level export details to suit their deployment requirements.
diff --git a/docs/source/exporters/executorch/package_reference/export.mdx b/docs/source/exporters/executorch/package_reference/export.mdx
@@ -0,0 +1,26 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Export functions
+
+## Main functions
+
+[[autodoc]] exporters.executorch.convert.export_to_executorch
+
+The primary export function is designed to be **model- and task-independent** as well as **optimization-agnostic**, providing a
+highly flexible and modular interface for exporting Hugging Face models to the ExecuTorch backend.
+
+This approach highlights the **composability** of ExecuTorch export pipeline, where dynamically registered **task configurations**
+specify how a :hug model is prepared, and **recipe configurations** encapsulate device-specific optimizations during export. This
+separation allows users to customize the export process without altering the core function.
+
+For more details on task and recipe configurations, see the [Configuration for ExecuTorch Export](./configuration.mdx).
diff --git a/docs/source/exporters/executorch/usage_guides/contribute.mdx b/docs/source/exporters/executorch/usage_guides/contribute.mdx
@@ -0,0 +1,57 @@
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Adding support for an unsupported architecture
+
+We welcome contributions to extend the functionality of ExecuTorch export. This guide provides high-level instructions for contributors who want to:
+
+1. Export a new model that is not currently supported.
+2. Add new recipes or support a new task for export.
+
+---
+
+## Exporting a New Model
+
+If you want to export a model that is not already supported by the library, follow these steps:
+
+### Step 1: Export and Test the Model
+1. Attempt to export and lower the model using an existing task and recipe. On success, it will store the exported model in a `.pte` file.
+2. Add a test case for the model in the appropriate test suite.
+   - For example, you can make sure tests pass for the new `my_new_model` by running:
+     ```bash
+     pytest tests/executorch/export/test_*.py -k "test_my_new_model"  # doctest: +SKIP
+     pytest tests/executorch/runtime/test_*.py -k "test_my_new_model"  # doctest: +SKIP
+     ```
+
+### Step 2: Handle Export Failures
+1. If the export fails in Step 1, report the issue by opening a GitHub issue.
+2. If the issue requires changes to the model’s architecture or its Hugging Face implementation, these modifications may be made upstream in the Hugging Face Transformers library.
+
+---
+
+## Adding New Recipes or Tasks
+
+To extend ExecuTorch with new recipes or tasks, follow these guidelines:
+
+### Registering a New Recipe
+You can add a custom recipe to define specific optimizations or configurations for exporting models. Below is an example:
+
+```python
+from exporters.executorch import register_recipe
+
+@register_recipe("my_custom_recipe")
+def export_with_custom_recipe(model, config, *args, **kwargs):
+    # Example: Apply a custom quantization
+```
+
+### Registering a Task
+The task registration process is same as adding a recipe. Besides that you may need to implement a new `ExecuTorchModelForXXX` class.