Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce PR Benchmark Workflow #903

Merged
merged 4 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/asv_benchmark_pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Benchmark PR

on:
pull_request:
branches: [main]
workflow_dispatch:
env:
PYTHON_VERSION: "3.10"
WORKING_DIR: ${{ github.workspace }}/benchmarks
BENCHMARKS_OUTPUT: ${{ github.workspace }}/benchmarks_output

jobs:
benchmark-pr:
runs-on: ubuntu-latest
lapp0 marked this conversation as resolved.
Show resolved Hide resolved
if: contains(github.event.pull_request.labels.*.name, 'run_benchmarks') || github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_run'

defaults:
run:
working-directory: ${{ env.WORKING_DIR }}

steps:

- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install asv virtualenv lf-asv-formatter

- name: Create ASV machine config file
run: asv machine --machine gh-runner --yes

- name: Run Benchmarks - `PR HEAD` vs `main`
run: |
# prepare main branch for comparison
git remote add upstream https://github.com/${{ github.repository }}.git
git fetch upstream main

# Run benchmarks, allow errors, they will be caught in the next step
asv continuous upstream/main HEAD \
lapp0 marked this conversation as resolved.
Show resolved Hide resolved
--no-stats --interleave-rounds -a repeat=3 || true

- name: BENCHMARK RESULTS
run: |
asv compare --factor=1.1 --no-stats --split upstream/main HEAD | tee ${{ env.BENCHMARKS_OUTPUT }}
if grep -q "Benchmarks that have got worse" "${{ env.BENCHMARKS_OUTPUT }}"; then
echo "Performance degradation detected!"
exit 1
fi
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ docs/build
.idea/
*.gguf
.venv
benchmarks/results
Empty file added benchmarks/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"version": 1,
"project": "Outlines",
"project_url": "https://outlines-dev.github.io/outlines/",
"repo": "..",
"branches": [
"HEAD"
],
"build_command": [
"python -mpip install .[test]",
"PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}",
],
"environment_type": "virtualenv",
"show_commit_url": "https://github.com/outlines-dev/outlines/commit/",
"benchmark_dir": ".",
"env_dir": "env",
"results_dir": "results",
"html_dir": "html",
"build_cache_size": 8
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import pytest
from outlines.caching import cache_disabled
from outlines.fsm.guide import RegexGuide
from outlines.fsm.json_schema import build_regex_from_schema

import outlines

outlines.disable_cache()

from outlines.fsm.guide import RegexGuide # noqa: E402
from outlines.fsm.json_schema import build_regex_from_schema # noqa: E402
from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402

simple_schema = """{
"$defs": {
Expand Down Expand Up @@ -63,30 +60,22 @@
"required": ["id", "work", "recording_artists"]
}"""


schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)


@pytest.mark.parametrize("schema_name", schemas.keys())
def test_benchmark_json_schema_to_regex(benchmark, ensure_numba_compiled, schema_name):
"""Benchmark convert json schema to regex"""
schema = schemas[schema_name]
benchmark.pedantic(
build_regex_from_schema,
args=(schema,),
rounds=8,
)
class JsonSchemaBenchmark:
params = schemas.keys()

def setup(self, schema_name):
self.tokenizer = setup_tokenizer()
self.schema = schemas[schema_name]
ensure_numba_compiled(self.tokenizer)

@cache_disabled()
def time_json_schema_to_regex(self, schema_name):
build_regex_from_schema(self.schema)

@pytest.mark.parametrize("schema_name", schemas.keys())
def test_benchmark_json_schema_to_fsm(
benchmark, tokenizer, ensure_numba_compiled, schema_name
):
"""Benchmark compile json schema as FSM"""
schema = schemas[schema_name]
regex = build_regex_from_schema(schema)
benchmark.pedantic(
RegexGuide,
args=(regex, tokenizer),
rounds=8,
)
@cache_disabled()
def time_json_schema_to_fsm(self, schema_name):
regex = build_regex_from_schema(self.schema)
RegexGuide(regex, self.tokenizer)
34 changes: 34 additions & 0 deletions benchmarks/bench_numba_compile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import importlib

import interegular
import numba

from outlines.caching import cache_disabled
from outlines.fsm import regex

from .common import setup_tokenizer


class NumbaCompileBenchmark:
def setup(self):
self.tokenizer = setup_tokenizer()
self.regex = regex
original_njit = numba.njit

def mock_njit(*args, **kwargs):
kwargs["cache"] = False
return original_njit(*args, **kwargs)

self.original_njit = original_njit
numba.njit = mock_njit
importlib.reload(self.regex)
self.regex_pattern, _ = self.regex.make_deterministic_fsm(
interegular.parse_pattern("a").to_fsm().reduce()
)

def teardown(self):
numba.njit = self.original_njit

@cache_disabled()
def time_compile_numba(self):
self.regex.create_fsm_index_tokenizer(self.regex_pattern, self.tokenizer)
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import pytest
from outlines.caching import cache_disabled
from outlines.fsm.guide import RegexGuide

import outlines

outlines.disable_cache()

from outlines.fsm.guide import RegexGuide # noqa: E402
from .common import ensure_numba_compiled, setup_tokenizer

regex_samples = {
"email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
Expand All @@ -19,14 +16,27 @@
}


@pytest.mark.parametrize("regex_name", regex_samples.keys())
def test_benchmark_regex_to_fsm(
benchmark, tokenizer, ensure_numba_compiled, regex_name
):
"""Benchmark converting regex to FSM"""
regex_str = regex_samples[regex_name]
benchmark.pedantic(
RegexGuide,
args=(regex_str, tokenizer),
rounds=8,
)
class RegexGuideBenchmark:
params = regex_samples.keys()

def setup(self, pattern_name):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

@cache_disabled()
def time_regex_to_guide(self, pattern_name):
RegexGuide(self.pattern, self.tokenizer)


class MemoryRegexGuideBenchmark:
params = ["simple_phone", "complex_span_constrained_relation_extraction"]

def setup(self, pattern_name):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

@cache_disabled()
def peakmem_regex_to_guide(self, pattern_name):
RegexGuide(self.pattern, self.tokenizer)
5 changes: 1 addition & 4 deletions tests/benchmark/conftest.py → benchmarks/common.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import pytest
from transformers import AutoTokenizer

from outlines.fsm.guide import RegexGuide
from outlines.models.transformers import TransformerTokenizer


@pytest.fixture
def tokenizer():
def setup_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("gpt2")
return TransformerTokenizer(tokenizer)


@pytest.fixture
def ensure_numba_compiled(tokenizer):
RegexGuide("a", tokenizer)
return True
34 changes: 30 additions & 4 deletions docs/community/contribute.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,38 @@ And run the code style checks:
pre-commit run --all-files
```

When modifying the code related to the index compilation, we kindly ask you to
post benchmarks before and after your changes. You can run benchmarks using:
### Benchmarking

```python
pytest --benchmark-only
Outlines uses [asv](https://asv.readthedocs.io) for automated benchmark testing. Benchmarks are run automatically before pull requests are merged to prevent performance degredation.

You can run the benchmark test suite locally with the following command:
```
asv run --config benchmarks/asv.conf.json
```

Run a specific test:
```
asv run --config benchmarks/asv.conf.json -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
```

Profile a specific test:
```
asv run --config benchmarks/asv.conf.json --profile -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
```

Compare to `origin/main`
```
get fetch origin
asv continuous origin/main HEAD --config benchmarks/asv.conf.json
```

#### ASV PR Behavior

- **View ASV Benchmark Results:** Open the workflow, view `BENCHMARK RESULTS` section.
- Merging is blocked unless benchmarks are run for the latest commit.
- Benchmarks fail if performance degrades by more than 10% for any individual benchmark.
- The "Benchmark PR" workflow runs when its manually dispatched, or if the `run_benchmarks` label is added to the PR they run for every commit.


### Contribute to the documentation

Expand Down
13 changes: 13 additions & 0 deletions outlines/caching.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import contextlib
import functools
import os
from typing import Callable, Optional
Expand Down Expand Up @@ -164,3 +165,15 @@ def clear_cache():
"""Erase the cache completely."""
memory = get_cache()
memory.clear()


@contextlib.contextmanager
def cache_disabled():
# outlines.caching._caching_enabled
global _caching_enabled
original_state = _caching_enabled
_caching_enabled = False
try:
yield
finally:
_caching_enabled = original_state
33 changes: 0 additions & 33 deletions tests/benchmark/test_benchmark_numba_compile.py

This file was deleted.

31 changes: 31 additions & 0 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import tempfile
import unittest

import diskcache
import pytest
Expand Down Expand Up @@ -157,3 +158,33 @@ def foo():

# assert with version upgrade, old cache is invalidated and new cache is used
a, b = foo()


def test_cache_disabled_decorator(test_cache):
"""Ensure cache can be disabled in a local scope"""

from outlines.caching import cache_disabled

mock = unittest.mock.MagicMock()

@test_cache
def fn():
mock()
return 1

# first call isn't cached
fn()
assert mock.call_count == 1

# second call doesn't run fn, uses cache
fn()
assert mock.call_count == 1

# cache_disabled decorator disables cache within scope
with cache_disabled():
fn()
assert mock.call_count == 2 # called once in cache_disabled scope

# scope has exited, cache is enabled again
fn()
assert mock.call_count == 2
Loading