Skip to content

Commit

Permalink
Use more performant fsm backend
Browse files Browse the repository at this point in the history
  • Loading branch information
lapp0 committed Sep 27, 2024
1 parent 6035e86 commit 857b902
Show file tree
Hide file tree
Showing 14 changed files with 24 additions and 2,173 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.10"]
python-version: ["3.10", "3.12"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand Down
5 changes: 0 additions & 5 deletions benchmarks/bench_cfg_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from outlines.fsm.guide import CFGGuide
from outlines.models.transformers import TransformerTokenizer

from .common import ensure_numba_compiled

random.seed(42)


Expand All @@ -30,9 +28,6 @@ class CFGGuideBenchmark:

def setup(self, grammar_name):
self.tokenizer = get_tiny_tokenizer()
ensure_numba_compiled(
self.tokenizer
) # numba not currently used, but will be in the future
self.prebuilt_cfg_guide = CFGGuide(
benched_grammars[grammar_name], self.tokenizer
)
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/bench_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from outlines.fsm.guide import RegexGuide
from outlines.fsm.json_schema import build_regex_from_schema

from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402
from .common import setup_tokenizer # noqa: E402

simple_schema = """{
"$defs": {
Expand Down Expand Up @@ -69,7 +69,6 @@ class JsonSchemaBenchmark:
def setup(self, schema_name):
self.tokenizer = setup_tokenizer()
self.schema = schemas[schema_name]
ensure_numba_compiled(self.tokenizer)

@cache_disabled()
def time_json_schema_to_regex(self, schema_name):
Expand Down
34 changes: 0 additions & 34 deletions benchmarks/bench_numba_compile.py

This file was deleted.

4 changes: 1 addition & 3 deletions benchmarks/bench_regex_guide.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from outlines.caching import cache_disabled
from outlines.fsm.guide import RegexGuide

from .common import ensure_numba_compiled, setup_tokenizer
from .common import setup_tokenizer

regex_samples = {
"email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
Expand All @@ -21,7 +21,6 @@ class RegexGuideBenchmark:

def setup(self, pattern_name):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

@cache_disabled()
Expand All @@ -34,7 +33,6 @@ class MemoryRegexGuideBenchmark:

def setup(self, pattern_name):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

@cache_disabled()
Expand Down
6 changes: 0 additions & 6 deletions benchmarks/common.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
from transformers import AutoTokenizer

from outlines.fsm.guide import RegexGuide
from outlines.models.transformers import TransformerTokenizer


def setup_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("gpt2")
return TransformerTokenizer(tokenizer)


def ensure_numba_compiled(tokenizer):
RegexGuide("a", tokenizer)
return True
47 changes: 0 additions & 47 deletions outlines/fsm/fsm.py

This file was deleted.

Loading

0 comments on commit 857b902

Please sign in to comment.