Skip to content

Commit

Permalink
Merge pull request #30 from parkervg/back-to-guidance
Browse files Browse the repository at this point in the history
Back to guidance
  • Loading branch information
parkervg authored Sep 1, 2024
2 parents 6818562 + c900e63 commit 36262c1
Show file tree
Hide file tree
Showing 40 changed files with 680 additions and 812 deletions.
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,11 @@ For in-depth descriptions of the above queries, check out our [documentation](ht
- Supports many DBMS 💾
- SQLite, PostgreSQL, DuckDB, Pandas (aka duckdb in a trenchcoat)
- Supports many models ✨
- Transformers, Llama.cpp, OpenAI, Ollama
- Transformers, OpenAI, Anthropic, Ollama
- Easily extendable to [multi-modal usecases](./examples/vqa-ingredient.ipynb) 🖼️
- Smart parsing optimizes what is passed to external functions 🧠
- Traverses abstract syntax tree with [sqlglot](https://github.com/tobymao/sqlglot) to minimize LLM function calls 🌳
- Constrained decoding with [outlines](https://github.com/outlines-dev/outlines) 🚀
- Constrained decoding with [guidance](https://github.com/guidance-ai/guidance) 🚀
- LLM function caching, built on [diskcache](https://grantjenks.com/docs/diskcache/) 🔑

## Quickstart
Expand Down Expand Up @@ -246,5 +246,4 @@ Special thanks to those below for inspiring this project. Definitely recommend c
- As far as I can tell, the first publication to propose unifying model calls within SQL
- Served as the inspiration for the [vqa-ingredient.ipynb](./examples/vqa-ingredient.ipynb) example
- The authors of [Grammar Prompting for Domain-Specific Language Generation with Large Language Models](https://arxiv.org/abs/2305.19234)
- The maintainers of the [Outlines](https://github.com/outlines-dev/outlines) library for powering the constrained decoding capabilities of BlendSQL
- Paper at https://arxiv.org/abs/2307.09702
- The maintainers of the [Guidance](https://github.com/guidance-ai/guidance) library for powering the constrained decoding capabilities of BlendSQL
5 changes: 1 addition & 4 deletions benchmark/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,8 @@

from blendsql import blend
from blendsql.models import TransformersLLM
import outlines.caching

outlines.caching.clear_cache()

MODEL = TransformersLLM("hf-internal-testing/tiny-random-PhiForCausalLM", caching=False)
MODEL = TransformersLLM("HuggingFaceTB/SmolLM-135M", caching=False)
NUM_ITER_PER_QUERY = 5

if __name__ == "__main__":
Expand Down
3 changes: 0 additions & 3 deletions blendsql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
__version__ = "0.0.21"


from .ingredients.builtin import LLMMap, LLMQA, LLMJoin, LLMValidate, ImageCaption
from .blend import blend
2 changes: 1 addition & 1 deletion blendsql/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __contains__(cls, item):

DEFAULT_ANS_SEP = ";"
DEFAULT_NAN_ANS = "-"
MAP_BATCH_SIZE = 5
MAP_BATCH_SIZE = 15


class IngredientType(str, Enum, metaclass=StrInMeta):
Expand Down
20 changes: 14 additions & 6 deletions blendsql/blend.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ def _blend(
if kwargs_dict.get(IngredientKwarg.REGEX, None) is not None:
logger.debug(
Fore.LIGHTBLACK_EX
+ f"Using regex '{kwargs_dict[IngredientKwarg.REGEX](1)}'"
+ f"Using regex '{kwargs_dict[IngredientKwarg.REGEX]}'"
+ Fore.RESET
)
if table_to_title is not None:
Expand Down Expand Up @@ -822,17 +822,25 @@ def _blend(
column in x for x in [llm_out_df.columns, base_table.columns]
):
# Fill nan in llm_out_df with those values in base_table
pd.testing.assert_index_equal(
base_table.index, llm_out_df.index
)
try:
pd.testing.assert_index_equal(
base_table.index, llm_out_df.index
)
except AssertionError:
logger.debug(
Fore.RED + "pd.testing.assert_index_equal error"
)
llm_out_df[column] = llm_out_df[column].fillna(
base_table[column]
)
base_table = base_table.drop(columns=column)
llm_out_df = llm_out_df[
llm_out_df.columns.difference(base_table.columns)
]
pd.testing.assert_index_equal(base_table.index, llm_out_df.index)
try:
pd.testing.assert_index_equal(base_table.index, llm_out_df.index)
except AssertionError:
logger.debug(Fore.RED + "pd.testing.assert_index_equal error")
merged = base_table.merge(
llm_out_df, how="left", right_index=True, left_index=True
)
Expand Down Expand Up @@ -915,7 +923,7 @@ def blend(
For example, in `{{LLMMap('convert to date', 'w::listing date')}} <= '1960-12-31'`
We can infer the output format should look like '1960-12-31' and both:
1) Put this string in the `example_outputs` kwarg
2) If we have a LocalModel, pass the '\d{4}-\d{2}-\d{2}' pattern to outlines.generate.regex
2) If we have a LocalModel, pass the '\d{4}-\d{2}-\d{2}' pattern to guidance
table_to_title: Optional mapping from table name to title of table.
Useful for datasets like WikiTableQuestions, where relevant info is stored in table title.
schema_qualify: Optional bool, determines if we run qualify_columns() from sqlglot
Expand Down
2 changes: 0 additions & 2 deletions blendsql/blend_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
OpenaiLLM,
TransformersLLM,
AzureOpenaiLLM,
LlamaCppLLM,
OllamaLLM,
)
from blendsql.ingredients.builtin import LLMQA, LLMMap, LLMJoin
Expand All @@ -20,7 +19,6 @@
MODEL_TYPE_TO_CLASS = {
"openai": OpenaiLLM,
"azure_openai": AzureOpenaiLLM,
"llama_cpp": LlamaCppLLM,
"transformers": TransformersLLM,
"ollama": OllamaLLM,
}
Expand Down
3 changes: 0 additions & 3 deletions blendsql/generate/__init__.py

This file was deleted.

22 changes: 0 additions & 22 deletions blendsql/generate/choice.py

This file was deleted.

28 changes: 0 additions & 28 deletions blendsql/generate/regex.py

This file was deleted.

56 changes: 0 additions & 56 deletions blendsql/generate/text.py

This file was deleted.

Loading

0 comments on commit 36262c1

Please sign in to comment.