Merge branch 'main' into token-cache

dottxt-ai · Jun 5, 2024 · c6b66dc · c6b66dc
2 parents d69df32 + 0b4d12b
commit c6b66dc
Show file tree

Hide file tree

Showing 49 changed files with 1,080 additions and 291 deletions.
diff --git a/.github/scripts/build_sdist_and_wheel.sh b/.github/scripts/build_sdist_and_wheel.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Build sdist and wheel
+python -m pip install -U pip
+python -m pip install build
+python -m build
+
+# Check sdist install and imports
+mkdir -p test-sdist
+cd test-sdist
+python -m venv venv-sdist
+venv-sdist/bin/python -m pip install ../dist/outlines-*.tar.gz
+venv-sdist/bin/python -c "import outlines"
+cd ..
+
+# Check wheel install and imports
+mkdir -p test-wheel
+cd test-wheel
+python -m venv venv-wheel
+venv-wheel/bin/python -m pip install ../dist/outlines-*.whl
+venv-wheel/bin/python -c "import outlines"
+cd ..
diff --git a/.github/workflows/asv_benchmark_pr.yml b/.github/workflows/asv_benchmark_pr.yml
@@ -0,0 +1,57 @@
+name: Benchmark PR
+
+on:
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+env:
+  PYTHON_VERSION: "3.10"
+  WORKING_DIR: ${{ github.workspace }}/benchmarks
+  BENCHMARKS_OUTPUT: ${{ github.workspace }}/benchmarks_output
+
+jobs:
+  benchmark-pr:
+    runs-on: ubuntu-latest
+    if: contains(github.event.pull_request.labels.*.name, 'run_benchmarks') || github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_run'
+
+    defaults:
+      run:
+        working-directory: ${{ env.WORKING_DIR }}
+
+    steps:
+
+    - name: Checkout repository
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install asv virtualenv lf-asv-formatter
+
+    - name: Create ASV machine config file
+      run: asv machine --machine gh-runner --yes
+
+    - name: Run Benchmarks - `PR HEAD` vs `main`
+      run: |
+        # prepare main branch for comparison
+        git remote add upstream https://github.com/${{ github.repository }}.git
+        git fetch upstream main
+
+        # Run benchmarks, allow errors, they will be caught in the next step
+        asv continuous upstream/main HEAD \
+            --no-stats --interleave-rounds -a repeat=3 || true
+
+    - name: BENCHMARK RESULTS
+      run: |
+        asv compare --factor=1.1 --no-stats --split upstream/main HEAD | tee ${{ env.BENCHMARKS_OUTPUT }}
+        if grep -q "Benchmarks that have got worse" "${{ env.BENCHMARKS_OUTPUT }}"; then
+          echo "Performance degradation detected!"
+          exit 1
+        fi
diff --git a/.github/workflows/release_pypi.yaml b/.github/workflows/release_pypi.yaml
@@ -15,28 +15,11 @@ jobs:
       uses: actions/setup-python@v2
       with:
         python-version: "3.10"
-    - name: Build sdist and wheel
-      run: |
-        python -m pip install -U pip
-        python -m pip install build
-        python -m build
+    - name: Build SDist and Wheel
+      run: ./.github/scripts/build_sdist_and_wheel.sh
     - name: Check that the package version matches the Release name
       run: |
         grep -Rq "^Version: ${GITHUB_REF:10}$" outlines.egg-info/PKG-INFO
-    - name: Check sdist install and imports
-      run: |
-        mkdir -p test-sdist
-        cd test-sdist
-        python -m venv venv-sdist
-        venv-sdist/bin/python -m pip install ../dist/outlines-*.tar.gz
-        venv-sdist/bin/python -c "import outlines"
-    - name: Check wheel install and imports
-      run: |
-        mkdir -p test-wheel
-        cd test-wheel
-        python -m venv venv-wheel
-        venv-wheel/bin/python -m pip install ../dist/outlines-*.whl
-        venv-wheel/bin/python -c "import outlines"
     - name: Publish to PyPi
       uses: pypa/[email protected]
       with:

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -88,3 +88,11 @@ jobs:
           name: html-report
           path: htmlcov
         if: ${{ failure() }}
+
+  build-wheel:
+    name: Build Wheel and Test SDist
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Build SDist and Wheel
+        run: ./.github/scripts/build_sdist_and_wheel.sh
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ docs/build
 .idea/
 *.gguf
 .venv
+benchmarks/results
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -30,3 +30,4 @@ repos:
     - id: mypy
       args: [--allow-redefinition]
       exclude: ^examples/
+      additional_dependencies: [types-tqdm]
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ First time here? Go to our [setup guide](https://outlines-dev.github.io/outlines
 - [x] 🚀 [Serve with vLLM](https://outlines-dev.github.io/outlines/reference/vllm), with official Docker image, [`outlinesdev/outlines`](https://hub.docker.com/r/outlinesdev/outlines)!
 
 
-Outlines 〰 has new releases and features coming every week. Make sure to ⭐ star and 👀 watch this repository, follow [@dottxtai][twitter] to stay up to date!
+Outlines 〰 has new releases and features coming every week. Make sure to ⭐ star and 👀 watch this repository, follow [@dottxtai][dottxt-twitter] to stay up to date!
 
 ## Why should I use structured generation?
 

diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
@@ -0,0 +1,20 @@
+{
+    "version": 1,
+    "project": "Outlines",
+    "project_url": "https://outlines-dev.github.io/outlines/",
+    "repo": "..",
+    "branches": [
+	"HEAD"
+    ],
+    "build_command": [
+        "python -mpip install .[test]",
+        "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}",
+    ],
+    "environment_type": "virtualenv",
+    "show_commit_url": "https://github.com/outlines-dev/outlines/commit/",
+    "benchmark_dir": ".",
+    "env_dir": "env",
+    "results_dir": "results",
+    "html_dir": "html",
+    "build_cache_size": 8
+}
diff --git a/...s/benchmark/test_benchmark_json_schema.py → benchmarks/bench_json_schema.py b/...s/benchmark/test_benchmark_json_schema.py → benchmarks/bench_json_schema.py
@@ -1,11 +1,8 @@
-import pytest
+from outlines.caching import cache_disabled
+from outlines.fsm.guide import RegexGuide
+from outlines.fsm.json_schema import build_regex_from_schema
 
-import outlines
-
-outlines.disable_cache()
-
-from outlines.fsm.guide import RegexGuide  # noqa: E402
-from outlines.fsm.json_schema import build_regex_from_schema  # noqa: E402
+from .common import ensure_numba_compiled, setup_tokenizer  # noqa: E402
 
 simple_schema = """{
         "$defs": {
@@ -63,30 +60,22 @@
   "required": ["id", "work", "recording_artists"]
 }"""
 
-
 schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)
 
 
-@pytest.mark.parametrize("schema_name", schemas.keys())
-def test_benchmark_json_schema_to_regex(benchmark, ensure_numba_compiled, schema_name):
-    """Benchmark convert json schema to regex"""
-    schema = schemas[schema_name]
-    benchmark.pedantic(
-        build_regex_from_schema,
-        args=(schema,),
-        rounds=8,
-    )
+class JsonSchemaBenchmark:
+    params = schemas.keys()
+
+    def setup(self, schema_name):
+        self.tokenizer = setup_tokenizer()
+        self.schema = schemas[schema_name]
+        ensure_numba_compiled(self.tokenizer)
 
+    @cache_disabled()
+    def time_json_schema_to_regex(self, schema_name):
+        build_regex_from_schema(self.schema)
 
-@pytest.mark.parametrize("schema_name", schemas.keys())
-def test_benchmark_json_schema_to_fsm(
-    benchmark, tokenizer, ensure_numba_compiled, schema_name
-):
-    """Benchmark compile json schema as FSM"""
-    schema = schemas[schema_name]
-    regex = build_regex_from_schema(schema)
-    benchmark.pedantic(
-        RegexGuide,
-        args=(regex, tokenizer),
-        rounds=8,
-    )
+    @cache_disabled()
+    def time_json_schema_to_fsm(self, schema_name):
+        regex = build_regex_from_schema(self.schema)
+        RegexGuide(regex, self.tokenizer)
diff --git a/benchmarks/bench_numba_compile.py b/benchmarks/bench_numba_compile.py
@@ -0,0 +1,34 @@
+import importlib
+
+import interegular
+import numba
+
+from outlines.caching import cache_disabled
+from outlines.fsm import regex
+
+from .common import setup_tokenizer
+
+
+class NumbaCompileBenchmark:
+    def setup(self):
+        self.tokenizer = setup_tokenizer()
+        self.regex = regex
+        original_njit = numba.njit
+
+        def mock_njit(*args, **kwargs):
+            kwargs["cache"] = False
+            return original_njit(*args, **kwargs)
+
+        self.original_njit = original_njit
+        numba.njit = mock_njit
+        importlib.reload(self.regex)
+        self.regex_pattern, _ = self.regex.make_deterministic_fsm(
+            interegular.parse_pattern("a").to_fsm().reduce()
+        )
+
+    def teardown(self):
+        numba.njit = self.original_njit
+
+    @cache_disabled()
+    def time_compile_numba(self):
+        self.regex.create_fsm_index_tokenizer(self.regex_pattern, self.tokenizer)
diff --git a/tests/benchmark/test_benchmark_regex_fsm.py → benchmarks/bench_regex_guide.py b/tests/benchmark/test_benchmark_regex_fsm.py → benchmarks/bench_regex_guide.py
@@ -1,10 +1,7 @@
-import pytest
+from outlines.caching import cache_disabled
+from outlines.fsm.guide import RegexGuide
 
-import outlines
-
-outlines.disable_cache()
-
-from outlines.fsm.guide import RegexGuide  # noqa: E402
+from .common import ensure_numba_compiled, setup_tokenizer
 
 regex_samples = {
     "email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
@@ -19,14 +16,27 @@
 }
 
 
-@pytest.mark.parametrize("regex_name", regex_samples.keys())
-def test_benchmark_regex_to_fsm(
-    benchmark, tokenizer, ensure_numba_compiled, regex_name
-):
-    """Benchmark converting regex to FSM"""
-    regex_str = regex_samples[regex_name]
-    benchmark.pedantic(
-        RegexGuide,
-        args=(regex_str, tokenizer),
-        rounds=8,
-    )
+class RegexGuideBenchmark:
+    params = regex_samples.keys()
+
+    def setup(self, pattern_name):
+        self.tokenizer = setup_tokenizer()
+        ensure_numba_compiled(self.tokenizer)
+        self.pattern = regex_samples[pattern_name]
+
+    @cache_disabled()
+    def time_regex_to_guide(self, pattern_name):
+        RegexGuide(self.pattern, self.tokenizer)
+
+
+class MemoryRegexGuideBenchmark:
+    params = ["simple_phone", "complex_span_constrained_relation_extraction"]
+
+    def setup(self, pattern_name):
+        self.tokenizer = setup_tokenizer()
+        ensure_numba_compiled(self.tokenizer)
+        self.pattern = regex_samples[pattern_name]
+
+    @cache_disabled()
+    def peakmem_regex_to_guide(self, pattern_name):
+        RegexGuide(self.pattern, self.tokenizer)
diff --git a/tests/benchmark/conftest.py → benchmarks/common.py b/tests/benchmark/conftest.py → benchmarks/common.py
@@ -1,17 +1,14 @@
-import pytest
 from transformers import AutoTokenizer
 
 from outlines.fsm.guide import RegexGuide
 from outlines.models.transformers import TransformerTokenizer
 
 
-@pytest.fixture
-def tokenizer():
+def setup_tokenizer():
     tokenizer = AutoTokenizer.from_pretrained("gpt2")
     return TransformerTokenizer(tokenizer)
 
 
-@pytest.fixture
 def ensure_numba_compiled(tokenizer):
     RegexGuide("a", tokenizer)
     return True
diff --git a/docs/community/contribute.md b/docs/community/contribute.md
@@ -39,7 +39,7 @@ source .venv/bin/activate
 Then install the dependencies in editable mode, and install the pre-commit hooks:
 
 ```python
-pip install -e .[test]
+pip install -e ".[test]"
 pre-commit install
 ```
 
@@ -57,12 +57,38 @@ And run the code style checks:
 pre-commit run --all-files
 ```
 
-When modifying the code related to the index compilation, we kindly ask you to
-post benchmarks before and after your changes. You can run benchmarks using:
+### Benchmarking
 
-```python
-pytest --benchmark-only
+Outlines uses [asv](https://asv.readthedocs.io) for automated benchmark testing. Benchmarks are run automatically before pull requests are merged to prevent performance degredation.
+
+You can run the benchmark test suite locally with the following command:
+```
+asv run --config benchmarks/asv.conf.json
+```
+
+Run a specific test:
+```
+asv run --config benchmarks/asv.conf.json -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
+```
+
+Profile a specific test:
 ```
+asv run --config benchmarks/asv.conf.json --profile -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
+```
+
+Compare to `origin/main`
+```
+get fetch origin
+asv continuous origin/main HEAD --config benchmarks/asv.conf.json
+```
+
+#### ASV PR Behavior
+
+- **View ASV Benchmark Results:** Open the workflow, view `BENCHMARK RESULTS` section.
+- Merging is blocked unless benchmarks are run for the latest commit.
+- Benchmarks fail if performance degrades by more than 10% for any individual benchmark.
+- The "Benchmark PR" workflow runs when its manually dispatched, or if the `run_benchmarks` label is added to the PR they run for every commit.
+
 
 ### Contribute to the documentation