diff --git a/docs/reference/cfg.md b/docs/reference/cfg.md index 4f0285c11..cc62df3b8 100644 --- a/docs/reference/cfg.md +++ b/docs/reference/cfg.md @@ -30,9 +30,11 @@ print(sequence) # (8-2) ``` -!!! Note "Performance" +###### Disclaimer - The implementation of grammar-structured generation in Outlines is very naive. This does not reflect the performance of [.txt](https://dottxt.co)'s product, where we made grammar-structured generation as fast as regex-structured generation. +!!! Note "Experimental" + + Outlines current **community-contributed** implementation of CFG-structured generation is experimental. This does not reflect the performance of [.txt](https://dottxt.co)'s product, where we have optimized grammar-structured generation to be as fast as regex-structured generation. Additionally, it does not fully align with the approach described in our [technical report](https://arxiv.org/pdf/2307.09702), aside from its use of incremental/partial parsing. This feature is still a work in progress, requiring performance enhancements and bug fixes for an ideal implementation. For more details, please see our [grammar-related open issues on GitHub](https://github.com/outlines-dev/outlines/issues?q=is%3Aissue+is%3Aopen+label%3Agrammar). ## Ready-to-use grammars diff --git a/outlines/fsm/guide.py b/outlines/fsm/guide.py index aa073d107..44a918494 100644 --- a/outlines/fsm/guide.py +++ b/outlines/fsm/guide.py @@ -1,5 +1,6 @@ import collections import copy +import warnings from dataclasses import dataclass from typing import ( TYPE_CHECKING, @@ -319,6 +320,11 @@ def __init__(self, cfg_string: str, tokenizer): """ Construct the PartialLark parser and set the empty initial_state (PartialParserState) """ + warnings.warn( + "Outlines' public *community-contributed* CFG structured generation is experimental. " + "Please review https://outlines-dev.github.io/outlines/reference/cfg#disclaimer" + ) + self.cfg_string = cfg_string self.tokenizer = tokenizer self.eos_token_id = self.tokenizer.eos_token_id