Merge pull request #30 from parkervg/back-to-guidance

Back to guidance
parkervg · Sep 1, 2024 · 36262c1 · 36262c1
2 parents 6818562 + c900e63
commit 36262c1
Show file tree

Hide file tree

Showing 40 changed files with 680 additions and 812 deletions.
diff --git a/README.md b/README.md
@@ -139,11 +139,11 @@ For in-depth descriptions of the above queries, check out our [documentation](ht
 - Supports many DBMS 💾
   - SQLite, PostgreSQL, DuckDB, Pandas (aka duckdb in a trenchcoat)
 - Supports many models ✨
-  - Transformers, Llama.cpp, OpenAI, Ollama
+  - Transformers, OpenAI, Anthropic, Ollama
 - Easily extendable to [multi-modal usecases](./examples/vqa-ingredient.ipynb) 🖼️
 - Smart parsing optimizes what is passed to external functions 🧠
   - Traverses abstract syntax tree with [sqlglot](https://github.com/tobymao/sqlglot) to minimize LLM function calls 🌳
-- Constrained decoding with [outlines](https://github.com/outlines-dev/outlines) 🚀
+- Constrained decoding with [guidance](https://github.com/guidance-ai/guidance) 🚀
 - LLM function caching, built on [diskcache](https://grantjenks.com/docs/diskcache/) 🔑
 
 ## Quickstart
@@ -246,5 +246,4 @@ Special thanks to those below for inspiring this project. Definitely recommend c
   - As far as I can tell, the first publication to propose unifying model calls within SQL 
   - Served as the inspiration for the [vqa-ingredient.ipynb](./examples/vqa-ingredient.ipynb) example
 - The authors of [Grammar Prompting for Domain-Specific Language Generation with Large Language Models](https://arxiv.org/abs/2305.19234)
-- The maintainers of the [Outlines](https://github.com/outlines-dev/outlines) library for powering the constrained decoding capabilities of BlendSQL
-  - Paper at https://arxiv.org/abs/2307.09702
+- The maintainers of the [Guidance](https://github.com/guidance-ai/guidance) library for powering the constrained decoding capabilities of BlendSQL
diff --git a/benchmark/run.py b/benchmark/run.py
@@ -7,11 +7,8 @@
 
 from blendsql import blend
 from blendsql.models import TransformersLLM
-import outlines.caching
 
-outlines.caching.clear_cache()
-
-MODEL = TransformersLLM("hf-internal-testing/tiny-random-PhiForCausalLM", caching=False)
+MODEL = TransformersLLM("HuggingFaceTB/SmolLM-135M", caching=False)
 NUM_ITER_PER_QUERY = 5
 
 if __name__ == "__main__":

diff --git a/blendsql/__init__.py b/blendsql/__init__.py
@@ -1,5 +1,2 @@
-__version__ = "0.0.21"
-
-
 from .ingredients.builtin import LLMMap, LLMQA, LLMJoin, LLMValidate, ImageCaption
 from .blend import blend
diff --git a/blendsql/_constants.py b/blendsql/_constants.py
@@ -11,7 +11,7 @@ def __contains__(cls, item):
 
 DEFAULT_ANS_SEP = ";"
 DEFAULT_NAN_ANS = "-"
-MAP_BATCH_SIZE = 5
+MAP_BATCH_SIZE = 15
 
 
 class IngredientType(str, Enum, metaclass=StrInMeta):

diff --git a/blendsql/blend.py b/blendsql/blend.py
@@ -654,7 +654,7 @@ def _blend(
             if kwargs_dict.get(IngredientKwarg.REGEX, None) is not None:
                 logger.debug(
                     Fore.LIGHTBLACK_EX
-                    + f"Using regex '{kwargs_dict[IngredientKwarg.REGEX](1)}'"
+                    + f"Using regex '{kwargs_dict[IngredientKwarg.REGEX]}'"
                     + Fore.RESET
                 )
             if table_to_title is not None:
@@ -822,17 +822,25 @@ def _blend(
                         column in x for x in [llm_out_df.columns, base_table.columns]
                     ):
                         # Fill nan in llm_out_df with those values in base_table
-                        pd.testing.assert_index_equal(
-                            base_table.index, llm_out_df.index
-                        )
+                        try:
+                            pd.testing.assert_index_equal(
+                                base_table.index, llm_out_df.index
+                            )
+                        except AssertionError:
+                            logger.debug(
+                                Fore.RED + "pd.testing.assert_index_equal error"
+                            )
                         llm_out_df[column] = llm_out_df[column].fillna(
                             base_table[column]
                         )
                         base_table = base_table.drop(columns=column)
                 llm_out_df = llm_out_df[
                     llm_out_df.columns.difference(base_table.columns)
                 ]
-                pd.testing.assert_index_equal(base_table.index, llm_out_df.index)
+                try:
+                    pd.testing.assert_index_equal(base_table.index, llm_out_df.index)
+                except AssertionError:
+                    logger.debug(Fore.RED + "pd.testing.assert_index_equal error")
                 merged = base_table.merge(
                     llm_out_df, how="left", right_index=True, left_index=True
                 )
@@ -915,7 +923,7 @@ def blend(
             For example, in `{{LLMMap('convert to date', 'w::listing date')}} <= '1960-12-31'`
             We can infer the output format should look like '1960-12-31' and both:
                 1) Put this string in the `example_outputs` kwarg
-                2) If we have a LocalModel, pass the '\d{4}-\d{2}-\d{2}' pattern to outlines.generate.regex
+                2) If we have a LocalModel, pass the '\d{4}-\d{2}-\d{2}' pattern to guidance
         table_to_title: Optional mapping from table name to title of table.
             Useful for datasets like WikiTableQuestions, where relevant info is stored in table title.
         schema_qualify: Optional bool, determines if we run qualify_columns() from sqlglot

diff --git a/blendsql/blend_cli.py b/blendsql/blend_cli.py
@@ -10,7 +10,6 @@
     OpenaiLLM,
     TransformersLLM,
     AzureOpenaiLLM,
-    LlamaCppLLM,
     OllamaLLM,
 )
 from blendsql.ingredients.builtin import LLMQA, LLMMap, LLMJoin
@@ -20,7 +19,6 @@
 MODEL_TYPE_TO_CLASS = {
     "openai": OpenaiLLM,
     "azure_openai": AzureOpenaiLLM,
-    "llama_cpp": LlamaCppLLM,
     "transformers": TransformersLLM,
     "ollama": OllamaLLM,
 }

diff --git a/blendsql/generate/__init__.py b/blendsql/generate/__init__.py
diff --git a/blendsql/generate/choice.py b/blendsql/generate/choice.py
diff --git a/blendsql/generate/regex.py b/blendsql/generate/regex.py
diff --git a/blendsql/generate/text.py b/blendsql/generate/text.py