Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Faster FSM #1175

Merged
merged 1 commit into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.10"]
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand Down
5 changes: 0 additions & 5 deletions benchmarks/bench_cfg_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from outlines.fsm.guide import CFGGuide
from outlines.models.transformers import TransformerTokenizer

from .common import ensure_numba_compiled

random.seed(42)


Expand All @@ -30,9 +28,6 @@ class CFGGuideBenchmark:

def setup(self, grammar_name):
self.tokenizer = get_tiny_tokenizer()
ensure_numba_compiled(
self.tokenizer
) # numba not currently used, but will be in the future
self.prebuilt_cfg_guide = CFGGuide(
benched_grammars[grammar_name], self.tokenizer
)
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/bench_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from outlines.fsm.guide import RegexGuide
from outlines.fsm.json_schema import build_regex_from_schema

from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402
from .common import setup_tokenizer # noqa: E402

simple_schema = """{
"$defs": {
Expand Down Expand Up @@ -69,7 +69,6 @@ class JsonSchemaBenchmark:
def setup(self, schema_name):
self.tokenizer = setup_tokenizer()
self.schema = schemas[schema_name]
ensure_numba_compiled(self.tokenizer)

@cache_disabled()
def time_json_schema_to_regex(self, schema_name):
Expand Down
34 changes: 0 additions & 34 deletions benchmarks/bench_numba_compile.py

This file was deleted.

4 changes: 1 addition & 3 deletions benchmarks/bench_regex_guide.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from outlines.caching import cache_disabled
from outlines.fsm.guide import RegexGuide

from .common import ensure_numba_compiled, setup_tokenizer
from .common import setup_tokenizer

regex_samples = {
"email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
Expand All @@ -21,7 +21,6 @@ class RegexGuideBenchmark:

def setup(self, pattern_name):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

@cache_disabled()
Expand All @@ -34,7 +33,6 @@ class MemoryRegexGuideBenchmark:

def setup(self, pattern_name):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

@cache_disabled()
Expand Down
6 changes: 0 additions & 6 deletions benchmarks/common.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
from transformers import AutoTokenizer

from outlines.fsm.guide import RegexGuide
from outlines.models.transformers import TransformerTokenizer


def setup_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("gpt2")
return TransformerTokenizer(tokenizer)


def ensure_numba_compiled(tokenizer):
RegexGuide("a", tokenizer)
return True
47 changes: 0 additions & 47 deletions outlines/fsm/fsm.py

This file was deleted.

Loading
Loading