-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from parkervg/joiner-heuristic-integration
Joiner heuristic integration
- Loading branch information
Showing
5 changed files
with
181 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from blendsql import blend, LLMJoin, LLMMap, LLMQA | ||
from blendsql.db import SQLite | ||
from blendsql.models import TransformersLLM | ||
from blendsql.utils import fetch_from_hub | ||
from tqdm import tqdm | ||
|
||
# TEST_QUERIES = [ | ||
# """ | ||
# SELECT DISTINCT venue FROM w | ||
# WHERE city = 'sydney' AND {{ | ||
# LLMMap( | ||
# 'More than 30 total points?', | ||
# 'w::score' | ||
# ) | ||
# }} = TRUE | ||
# """, | ||
# """ | ||
# SELECT * FROM w | ||
# WHERE city = {{ | ||
# LLMQA( | ||
# 'Which city is located 120 miles west of Sydney?', | ||
# (SELECT * FROM documents WHERE documents MATCH 'sydney OR 120'), | ||
# options='w::city' | ||
# ) | ||
# }} | ||
# """, | ||
# """ | ||
# SELECT date, rival, score, documents.content AS "Team Description" FROM w | ||
# JOIN {{ | ||
# LLMJoin( | ||
# left_on='documents::title', | ||
# right_on='w::rival' | ||
# ) | ||
# }} | ||
# """ | ||
# ] | ||
|
||
TEST_QUERIES = [ | ||
""" | ||
SELECT title, player FROM w JOIN {{ | ||
LLMJoin( | ||
left_on='documents::title', | ||
right_on='w::player' | ||
) | ||
}} | ||
""" | ||
] | ||
if __name__ == "__main__": | ||
""" | ||
Without cached LLM response (10 runs): | ||
before: 3.16 | ||
after: 1.91 | ||
With cached LLM response (100 runs): | ||
before: 0.0175 | ||
after: 0.0166 | ||
"Qwen -1.5-0.5B" | ||
With cached LLM response (30 runs): | ||
with fuzzy join: 0.431 | ||
without fuzzy join: 0.073 | ||
Without cached LLM response (30 runs): | ||
with fuzzy join: 0.286 | ||
without fuzzy join: 318.85 | ||
""" | ||
db = SQLite(fetch_from_hub("1966_NBA_Expansion_Draft_0.db")) | ||
#db = SQLite(fetch_from_hub("multi_table.db")) | ||
TEST_TRANSFORMERS_LLM = "hf-internal-testing/tiny-random-PhiForCausalLM" | ||
model = TransformersLLM(TEST_TRANSFORMERS_LLM, caching=False) | ||
|
||
times = [] | ||
for i in range(30): | ||
for q in TEST_QUERIES: | ||
|
||
# Make our smoothie - the executed BlendSQL script | ||
smoothie = blend( | ||
query=q, | ||
db=db, | ||
blender=model, | ||
verbose=False, | ||
ingredients={LLMJoin, LLMMap, LLMQA}, | ||
) | ||
times.append(smoothie.meta.process_time_seconds) | ||
print(smoothie.df) | ||
print(f"Average time across {len(times)} runs: {sum(times) / len(times)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ def find_version(*file_paths): | |
url="https://github.com/parkervg/blendsql", | ||
author="Parker Glenn", | ||
author_email="[email protected]", | ||
description="Query language to blend SQL logic and LLM reasoning across multi-modal data.", | ||
description="Query language for blending SQL logic and LLM reasoning across multi-modal data.", | ||
long_description=open("README.md").read(), | ||
long_description_content_type="text/markdown", | ||
license="Apache License 2.0", | ||
|
@@ -46,6 +46,12 @@ def find_version(*file_paths): | |
"python-dotenv==1.0.1", | ||
"sqlglot==18.13.0", | ||
"sqlalchemy>=2.0.0", | ||
# skrub doesn't currently support python<3.10: https://github.com/skrub-data/skrub/issues/815 | ||
"skrub==0.1.0 ; python_version>='3.10'", | ||
# We fetch this branch which removes python 3.10 style type annotations instead, then | ||
"skrub @ git+https://github.com/jeromedockes/skrub.git@strip-type-annotations ; python_version<'3.10'", | ||
# https://github.com/skrub-data/skrub/issues/910 | ||
"scikit-learn==1.4.2", | ||
"huggingface_hub", | ||
"datasets", | ||
"lark", | ||
|
@@ -74,7 +80,7 @@ def find_version(*file_paths): | |
"recognizers-text-suite", | ||
"emoji==1.7.0", | ||
], | ||
"test": ["pytest", "huggingface_hub", "pre-commit"], | ||
"test": ["pytest", "pre-commit", "llama-cpp-python", "transformers", "torch"], | ||
"docs": [ | ||
"mkdocs-material", | ||
"mkdocstrings", | ||
|