Skip to content

Commit

Permalink
add asv PR benchmark workflow, Migrate benchmarks from pytest-benchma…
Browse files Browse the repository at this point in the history
…rk to asv, add memory benchmarks
  • Loading branch information
lapp0 committed May 29, 2024
1 parent 538f77a commit dddddbd
Show file tree
Hide file tree
Showing 10 changed files with 199 additions and 78 deletions.
61 changes: 61 additions & 0 deletions .github/workflows/asv_benchmark_pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Benchmark PR

on:
pull_request:
branches: [main]
workflow_dispatch:
env:
PYTHON_VERSION: "3.10"
WORKING_DIR: ${{ github.workspace }}/benchmarks

FAIL_THRSHOLD: 1.1

jobs:
benchmark-pr:
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'run_benchmarks') || github.event_name == 'workflow_dispatch'

defaults:
run:
working-directory: ${{ env.WORKING_DIR }}

steps:

- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install asv virtualenv lf-asv-formatter
- name: Create ASV machine config file
run: asv machine --machine gh-runner --yes

- name: Save comparison of PR against main branch
run: |
# prepare main branch for comparison
git remote add upstream https://github.com/${{ github.repository }}.git
git fetch upstream main
# Run benchmarks, writing comment contents to ./output
asv continuous upstream/main HEAD \
--skip-existing --sort ratio --split --interleave-rounds -a repeat=1
asv compare upstream/main HEAD --sort ratio --split | tee output

- name: Upload benchmark results
uses: actions/upload-artifact@v2
with:
name: benchmark-results-${{ github.sha }}
path: output

- name: Set benchmark output
run: |
echo "Success: [Benchmark Results](../actions/runs/${{ github.run_id }})"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ docs/build
.idea/
*.gguf
.venv
benchmarks/results
Empty file added benchmarks/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"version": 1,
"project": "Outlines",
"project_url": "https://outlines-dev.github.io/outlines/",
"repo": "..",
"branches": [
"HEAD"
],
"build_command": [
"pip install .[test]",
"python -m build --wheel -o {build_cache_dir} {build_dir}"
],
"environment_type": "virtualenv",
"show_commit_url": "https://github.com/lapp0/outlines/commit/",
"benchmark_dir": ".",
"env_dir": "env",
"results_dir": "results",
"html_dir": "html",
"build_cache_size": 8
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import pytest

import outlines

outlines.disable_cache()

from outlines.fsm.guide import RegexGuide # noqa: E402
from outlines.fsm.json_schema import build_regex_from_schema # noqa: E402

from .common import ( # noqa: E402
clear_outlines_cache,
ensure_numba_compiled,
setup_tokenizer,
)

simple_schema = """{
"$defs": {
"Armor": {
Expand Down Expand Up @@ -63,30 +67,21 @@
"required": ["id", "work", "recording_artists"]
}"""


schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)


@pytest.mark.parametrize("schema_name", schemas.keys())
def test_benchmark_json_schema_to_regex(benchmark, ensure_numba_compiled, schema_name):
"""Benchmark convert json schema to regex"""
schema = schemas[schema_name]
benchmark.pedantic(
build_regex_from_schema,
args=(schema,),
rounds=8,
)
class JsonSchemaBenchmark:
params = schemas.keys()

def setup(self, schema_name):
clear_outlines_cache()
self.tokenizer = setup_tokenizer()
self.schema = schemas[schema_name]
ensure_numba_compiled(self.tokenizer)

def time_json_schema_to_regex(self, schema_name):
build_regex_from_schema(self.schema)

@pytest.mark.parametrize("schema_name", schemas.keys())
def test_benchmark_json_schema_to_fsm(
benchmark, tokenizer, ensure_numba_compiled, schema_name
):
"""Benchmark compile json schema as FSM"""
schema = schemas[schema_name]
regex = build_regex_from_schema(schema)
benchmark.pedantic(
RegexGuide,
args=(regex, tokenizer),
rounds=8,
)
def time_json_schema_to_fsm(self, schema_name):
regex = build_regex_from_schema(self.schema)
RegexGuide(regex, self.tokenizer)
37 changes: 37 additions & 0 deletions benchmarks/bench_numba_compile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import importlib

import interegular
import numba

import outlines

from .common import clear_outlines_cache, setup_tokenizer

outlines.disable_cache()


class NumbaCompileBenchmark:
def setup(self):
clear_outlines_cache()
from outlines.fsm import regex

self.tokenizer = setup_tokenizer()
self.regex = regex
original_njit = numba.njit

def mock_njit(*args, **kwargs):
kwargs["cache"] = False
return original_njit(*args, **kwargs)

self.original_njit = original_njit
numba.njit = mock_njit
importlib.reload(self.regex)
self.regex_pattern, _ = self.regex.make_deterministic_fsm(
interegular.parse_pattern("a").to_fsm().reduce()
)

def teardown(self):
numba.njit = self.original_njit

def time_compile_numba(self):
self.regex.create_fsm_index_tokenizer(self.regex_pattern, self.tokenizer)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

import outlines

from .common import clear_outlines_cache, ensure_numba_compiled, setup_tokenizer

outlines.disable_cache()

from outlines.fsm.guide import RegexGuide # noqa: E402
Expand All @@ -19,14 +19,27 @@
}


@pytest.mark.parametrize("regex_name", regex_samples.keys())
def test_benchmark_regex_to_fsm(
benchmark, tokenizer, ensure_numba_compiled, regex_name
):
"""Benchmark converting regex to FSM"""
regex_str = regex_samples[regex_name]
benchmark.pedantic(
RegexGuide,
args=(regex_str, tokenizer),
rounds=8,
)
class RegexGuideBenchmark:
params = regex_samples.keys()

def setup(self, pattern_name):
clear_outlines_cache()
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

def time_regex_to_guide(self, pattern_name):
RegexGuide(self.pattern, self.tokenizer)


class MemoryRegexGuideBenchmark:
params = ["simple_phone", "complex_span_constrained_relation_extraction"]

def setup(self, pattern_name):
clear_outlines_cache()
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

def peakmem_regex_to_guide(self, pattern_name):
RegexGuide(self.pattern, self.tokenizer)
10 changes: 6 additions & 4 deletions tests/benchmark/conftest.py → benchmarks/common.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import pytest
from transformers import AutoTokenizer

import outlines.caching
from outlines.fsm.guide import RegexGuide
from outlines.models.transformers import TransformerTokenizer


@pytest.fixture
def tokenizer():
def clear_outlines_cache():
outlines.caching.clear_cache()


def setup_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("gpt2")
return TransformerTokenizer(tokenizer)


@pytest.fixture
def ensure_numba_compiled(tokenizer):
RegexGuide("a", tokenizer)
return True
33 changes: 29 additions & 4 deletions docs/community/contribute.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,37 @@ And run the code style checks:
pre-commit run --all-files
```

When modifying the code related to the index compilation, we kindly ask you to
post benchmarks before and after your changes. You can run benchmarks using:
### Benchmarking

```python
pytest --benchmark-only
Outlines uses [asv](https://asv.readthedocs.io) for automated benchmark testing. Benchmarks are run automatically before pull requests are merged to prevent performance degredation.

You can run the benchmark test suite locally with the following command:
```
asv run --config benchmarks/asv.conf.json
```

Run a specific test:
```
asv run --config benchmarks/asv.conf.json -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
```

Profile a specific test:
```
asv run --config benchmarks/asv.conf.json --profile -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
```

Compare to `origin/main`
```
get fetch origin
asv continuous origin/main HEAD --config benchmarks/asv.conf.json
```

#### ASV PR Behavior


- Merging is blocked unless benchmarks are run for the latest commit.
- Benchmarks fail if performance degrades by more than 10% for any individual benchmark.
- The "Benchmark PR" workflow runs when its manually dispatched, or if the `run_benchmarks` label is added to the PR they run for every commit.

### Contribute to the documentation

Expand Down
33 changes: 0 additions & 33 deletions tests/benchmark/test_benchmark_numba_compile.py

This file was deleted.

0 comments on commit dddddbd

Please sign in to comment.