Merge pull request #20 from parkervg/feature/benchmark

Adding `benchmark/` directory
parkervg · Jun 1, 2024 · 708b04f · 708b04f
2 parents d260e69 + 181ad26
commit 708b04f
Show file tree

Hide file tree

Showing 21 changed files with 222 additions and 0 deletions.
diff --git a/benchmark/1966_nba_draft/__init__.py b/benchmark/1966_nba_draft/__init__.py
diff --git a/benchmark/1966_nba_draft/load.py b/benchmark/1966_nba_draft/load.py
@@ -0,0 +1,13 @@
+from typing import Tuple, Type, Set
+
+from blendsql import LLMQA, LLMMap, LLMJoin
+from blendsql.utils import fetch_from_hub
+from blendsql.ingredients import Ingredient
+from blendsql.db import Database, SQLite
+
+
+def load_benchmark() -> Tuple[Database, Set[Type[Ingredient]]]:
+    return (
+        SQLite(fetch_from_hub("1966_NBA_Expansion_Draft_0.db")),
+        {LLMQA, LLMMap, LLMJoin},
+    )
diff --git a/benchmark/1966_nba_draft/queries/q01.sql b/benchmark/1966_nba_draft/queries/q01.sql
@@ -0,0 +1,11 @@
+WITH knicks_players AS (
+    SELECT * FROM w WHERE "previous team" = 'new york knicks'
+), not_used AS (
+    SELECT * FROM w WHERE player = 'john barnhill'
+) SELECT * FROM knicks_players WHERE "years of nba experience" > 1
+AND {{
+        LLMMap(
+            'Did they play more than one position?',
+            'knicks_players::pos'
+        )
+    }} = TRUE
diff --git a/benchmark/1966_nba_draft/queries/q02.sql b/benchmark/1966_nba_draft/queries/q02.sql
@@ -0,0 +1,11 @@
+SELECT title, player FROM w JOIN {{
+    LLMJoin(
+        left_on='documents::title',
+        right_on='w::player'
+    )
+}} WHERE {{
+    LLMMap(
+       'How many years with the franchise?',
+       'w::career with the franchise'
+    )
+}} > 5
diff --git a/benchmark/__init__.py b/benchmark/__init__.py
diff --git a/benchmark/financials/__init__.py b/benchmark/financials/__init__.py
diff --git a/benchmark/financials/load.py b/benchmark/financials/load.py
@@ -0,0 +1,27 @@
+from typing import Tuple, Type, Set
+
+from tests.utils import (
+    starts_with,
+    get_length,
+    select_first_sorted,
+    do_join,
+    return_aapl,
+    get_table_size,
+)
+from blendsql.utils import fetch_from_hub
+from blendsql.ingredients import Ingredient
+from blendsql.db import Database, SQLite
+
+
+def load_benchmark() -> Tuple[Database, Set[Type[Ingredient]]]:
+    return (
+        SQLite(fetch_from_hub("multi_table.db")),
+        {
+            starts_with,
+            get_length,
+            select_first_sorted,
+            do_join,
+            return_aapl,
+            get_table_size,
+        },
+    )
diff --git a/benchmark/financials/queries/q01.sql b/benchmark/financials/queries/q01.sql
@@ -0,0 +1,9 @@
+SELECT Symbol, "North America", "Japan" FROM geographic
+WHERE geographic.Symbol IN (
+    SELECT Symbol FROM portfolio
+    WHERE {{starts_with('A', 'portfolio::Description')}} = 1
+    AND portfolio.Symbol in (
+        SELECT Symbol FROM constituents
+        WHERE constituents.Sector = 'Information Technology'
+    )
+)
diff --git a/benchmark/financials/queries/q02.sql b/benchmark/financials/queries/q02.sql
@@ -0,0 +1,6 @@
+SELECT "Run Date", Account, Action, ROUND("Amount ($)", 2) AS 'Total Dividend Payout ($$)', Name
+FROM account_history
+LEFT JOIN constituents ON account_history.Symbol = constituents.Symbol
+WHERE constituents.Sector = 'Information Technology'
+AND {{starts_with('A', 'constituents::Name')}} = 1
+AND lower(account_history.Action) like "%dividend%"
diff --git a/benchmark/financials/queries/q03.sql b/benchmark/financials/queries/q03.sql
@@ -0,0 +1,6 @@
+SELECT "Run Date", Account, Action, ROUND("Amount ($)", 2) AS 'Total Dividend Payout ($$)', Name
+FROM account_history
+LEFT JOIN constituents ON account_history.Symbol = constituents.Symbol
+WHERE Sector = 'Information Technology'
+AND {{starts_with('A', 'constituents::Name')}} = 1
+AND lower(Action) like "%dividend%"
diff --git a/benchmark/financials/queries/q04.sql b/benchmark/financials/queries/q04.sql
@@ -0,0 +1,7 @@
+SELECT DISTINCT constituents.Symbol, Action FROM constituents
+LEFT JOIN account_history ON constituents.Symbol = account_history.Symbol
+LEFT JOIN portfolio on constituents.Symbol = portfolio.Symbol
+WHERE account_history."Run Date" > '2021-02-23'
+AND ({{get_length('n_length', 'constituents::Name')}} > 3 OR {{starts_with('A', 'portfolio::Symbol')}})
+AND portfolio.Symbol IS NOT NULL
+ORDER BY LENGTH(constituents.Name) LIMIT 1
diff --git a/benchmark/financials/queries/q05.sql b/benchmark/financials/queries/q05.sql
@@ -0,0 +1,7 @@
+SELECT DISTINCT constituents.Symbol, Action FROM constituents
+LEFT JOIN account_history ON constituents.Symbol = account_history.Symbol
+LEFT JOIN portfolio on constituents.Symbol = portfolio.Symbol
+WHERE "Run Date" > '2021-02-23'
+AND ({{get_length('n_length', 'constituents::Name')}} > 3 OR {{starts_with('A', 'portfolio::Symbol')}})
+AND portfolio.Symbol IS NOT NULL
+ORDER BY LENGTH(Name) LIMIT 1
diff --git a/benchmark/financials/queries/q06.sql b/benchmark/financials/queries/q06.sql
@@ -0,0 +1,10 @@
+{{
+    get_table_size(
+        (
+            WITH a AS (
+                SELECT * FROM (SELECT DISTINCT * FROM portfolio) as w
+                    WHERE {{starts_with('F', 'w::Symbol')}} = TRUE
+            ) SELECT * FROM a WHERE LENGTH(a.Symbol) > 2
+        )
+    )
+}}
diff --git a/benchmark/financials/queries/q07.sql b/benchmark/financials/queries/q07.sql
@@ -0,0 +1,8 @@
+SELECT w."Percent of Account" FROM (SELECT * FROM "portfolio" WHERE Quantity > 200 OR "Today''s Gain/Loss Percent" > 0.05) as w
+JOIN {{
+    do_join(
+        left_on='geographic::Symbol',
+        right_on='w::Symbol'
+    )
+}} WHERE {{starts_with('F', 'w::Symbol')}}
+AND w."Percent of Account" < 0.2
diff --git a/benchmark/rugby/__init__.py b/benchmark/rugby/__init__.py
diff --git a/benchmark/rugby/load.py b/benchmark/rugby/load.py
@@ -0,0 +1,15 @@
+from typing import Tuple, Type, Set
+
+from blendsql import LLMQA, LLMMap, LLMJoin
+from blendsql.utils import fetch_from_hub
+from blendsql.ingredients import Ingredient
+from blendsql.db import Database, SQLite
+
+
+def load_benchmark() -> Tuple[Database, Set[Type[Ingredient]]]:
+    return (
+        SQLite(
+            fetch_from_hub("1884_New_Zealand_rugby_union_tour_of_New_South_Wales_1.db")
+        ),
+        {LLMQA, LLMMap, LLMJoin},
+    )
diff --git a/benchmark/rugby/queries/q01.sql b/benchmark/rugby/queries/q01.sql
@@ -0,0 +1,8 @@
+SELECT * FROM w
+    WHERE city = {{
+        LLMQA(
+            'Which city is located 120 miles west of Sydney?',
+            (SELECT * FROM documents WHERE documents MATCH 'sydney OR 120'),
+            options='w::city'
+        )
+    }}
diff --git a/benchmark/rugby/queries/q02.sql b/benchmark/rugby/queries/q02.sql
@@ -0,0 +1,7 @@
+SELECT DISTINCT venue FROM w
+    WHERE city = 'sydney' AND {{
+        LLMMap(
+            'More than 30 total points?',
+            'w::score'
+        )
+    }} = TRUE
diff --git a/benchmark/rugby/queries/q03.sql b/benchmark/rugby/queries/q03.sql
@@ -0,0 +1,7 @@
+SELECT date, rival, score, documents.content AS "Team Description" FROM w
+    JOIN {{
+        LLMJoin(
+            left_on='documents::title',
+            right_on='w::rival'
+        )
+    }} WHERE rival = 'nsw waratahs'
diff --git a/benchmark/rugby/queries/q04.sql b/benchmark/rugby/queries/q04.sql
@@ -0,0 +1,9 @@
+SELECT rival
+FROM w
+WHERE city = {{
+    LLMQA(
+        'What city features the Mount Panorama racetrack?',
+        (SELECT title, content FROM documents WHERE documents MATCH 'mount panorama racetrack'),
+        options='w::city'
+    )
+}}
diff --git a/benchmark/run.py b/benchmark/run.py
@@ -0,0 +1,61 @@
+from pathlib import Path
+from colorama import Fore
+import importlib.util
+from typing import Callable
+import pandas as pd
+
+
+from blendsql import blend
+from blendsql.models import TransformersLLM
+import outlines.caching
+
+outlines.caching.clear_cache()
+
+MODEL = TransformersLLM("hf-internal-testing/tiny-random-PhiForCausalLM", caching=False)
+NUM_ITER_PER_QUERY = 5
+
+if __name__ == "__main__":
+    print(f"Averaging based on {NUM_ITER_PER_QUERY} iterations per query...")
+    print("Loading benchmarks...")
+    task_to_times = {}
+    for task_dir in Path(__file__).parent.iterdir():
+        if not task_dir.is_dir():
+            continue
+        elif str(task_dir.name).startswith("__"):
+            continue
+        print()
+        print(f"Running {task_dir.name}...")
+        task_to_times[task_dir.name] = []
+        spec = importlib.util.spec_from_file_location(
+            "load_benchmark", str(task_dir / "load.py")
+        )
+        load_module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(load_module)
+        load_benchmark: Callable = load_module.load_benchmark
+        db, ingredients = load_benchmark()
+        for query_file in (task_dir / "queries").iterdir():
+            query = open(query_file, "r").read()
+            for x in range(NUM_ITER_PER_QUERY):
+                print("." * x, end="\r")
+                smoothie = blend(
+                    query=query,
+                    db=db,
+                    blender=MODEL,
+                    verbose=False,
+                    ingredients=ingredients,
+                )
+                task_to_times[task_dir.name].append(smoothie.meta.process_time_seconds)
+    tasks, avg_runtime, num_queries = [], [], []
+    for task_name, times in task_to_times.items():
+        tasks.append(task_name)
+        avg_runtime.append(sum(times) / len(times))
+        num_queries.append(len(times) // NUM_ITER_PER_QUERY)
+    df = pd.DataFrame(
+        {"Task": tasks, "Average Runtime": avg_runtime, "# Unique Queries": num_queries}
+    )
+    print(
+        Fore.LIGHTCYAN_EX
+        + "Please paste this markdown table into your future PR"
+        + Fore.RESET
+    )
+    print(Fore.GREEN + df.to_markdown(index=False) + Fore.RESET)