trilogy-data · ngharrington · May 9, 2023 · May 9, 2023
diff --git a/README.md b/README.md
@@ -20,8 +20,8 @@ Recommended to use "gpt-3.5-turbo" or higher as the model.
 
 ```python
 from trilogy_public_models import models
-from preql import Executor, Dialects
-from preql_nlp import build_query
+from preql import Dialects
+from preql_nlp import build_query, NlpPreqlModelClient
 
 # define the model we want to parse
 environment = models["bigquery.stack_overflow"]
@@ -30,20 +30,17 @@ environment = models["bigquery.stack_overflow"]
 # default bigquery executor requires local default credentials configured
 executor = Dialects.BIGQUERY.default_executor(environment= environment)
 
-# build a query off text and the selected model
-processed_query = build_query(
-    "How many questions are asked per year?",
-    environment,
-)
+# build an NLP client for the preql model
+client = NlpPreqlModelClient(openai_model="gpt-3.5-turbo", preql_model=environment, preql_executor=executor)
 
-# make sure we got reasonable outputs
-for concept in processed_query.output_columns:
-    print(concept.name)
+# ask a data question  about the model in natural language.
+question = "How many questions are asked per year?"
+results = client.answer(question)
+
+# print the results
+for r in results:
+    print(r)
 
-# and run that to get our answer
-results = executor.execute_query(processed_query)
-for row in results:
-    print(row)
 ```
 
 

diff --git a/preql_nlp/__init__.py b/preql_nlp/__init__.py
@@ -3,8 +3,9 @@
 patch_promptimize()
 
 from .main import build_query  # noqa: E402
+from .client import NlpPreqlModelClient # noqa: E402
 
 
 __version__ = "0.0.5"
 
-__all__ = ["build_query"]
+__all__ = ["build_query", "NlpPreqlModelClient"]
diff --git a/preql_nlp/client.py b/preql_nlp/client.py
@@ -0,0 +1,37 @@
+from preql import Environment, Executor
+from preql_nlp.main import build_query, answer_is_reasonable
+from dataclasses import dataclass
+
+from time import sleep
+from typing import List
+
+@dataclass
+class NlpPreqlModelClient:
+
+    openai_model: str
+    preql_model: Environment
+    preql_executor: Executor
+
+    def answer(self, question: str) -> List[tuple]:
+        max_retries = 3
+        retries = 0
+        while retries < max_retries:
+            query = build_query(question, self.preql_model, debug=False, log_info=True, model=self.openai_model)
+            results = self.preql_executor.execute_query(query)
+            cols = results.keys()
+
+            res = []
+            for r in results:
+                res.append(r)
+
+            if self.answer_is_reasonable(question, res, cols):
+                return res
+            else:
+                retries += 1
+                sleep(1)
+
+        raise Exception(f"Answer not reasonable after {max_retries} retries")
+
+    def answer_is_reasonable(self, question, results, columns) -> bool:
+        return answer_is_reasonable(question, results, columns)
+
diff --git a/preql_nlp/main.py b/preql_nlp/main.py
@@ -17,6 +17,7 @@
     SemanticToTokensPromptCase,
     SelectionPromptCase,
     SemanticExtractionPromptCase,
+    CheckAnswerPromptCase,
 )
 from preql_nlp.constants import logger, DEFAULT_LIMIT
 
@@ -106,7 +107,7 @@ def coerce_list_str(input: Any) -> List[str]:
 
 
 def discover_inputs(
-    input_text: str, input_environment: Environment, debug: bool = False, log_info: bool = True
+    input_text: str, input_environment: Environment, debug: bool = False, log_info: bool = True, model: str = "gpt-3.5-turbo"
 ) -> IntermediateParseResults:
     # we work around prompt size issues and hallucination by doing a two phase discovery
     # first we parse the question into metrics/dimensions
@@ -124,7 +125,7 @@ def discover_inputs(
     session_uuid = uuid.uuid4()
 
     parsed = coerce_list_dict(
-        run_prompt(SemanticExtractionPromptCase(input_text), debug=debug, log_info=log_info, session_uuid=session_uuid)
+        run_prompt(SemanticExtractionPromptCase(input_text, model=model), debug=debug, log_info=log_info, session_uuid=session_uuid)
     )[0]
     order = parsed.get("order", [])
     token_inputs = {"metrics": metrics, "dimensions": dimensions}
@@ -138,7 +139,7 @@ def discover_inputs(
         phrase_tokens = coerce_list_dict(
             run_prompt(
                 SemanticToTokensPromptCase(
-                    phrases=local_phrases, tokens=token_inputs[field]
+                    phrases=local_phrases, tokens=token_inputs[field], model=model
                 ),
                 debug=True,
                 session_uuid=session_uuid,
@@ -165,7 +166,7 @@ def discover_inputs(
                         f"Could not find concept for input {k} with tokens {v}"
                     )
     selections = coerce_list_dict(
-        run_prompt(SelectionPromptCase(concepts=output, question=input_text), debug=debug, session_uuid=session_uuid, log_info=log_info)
+        run_prompt(SelectionPromptCase(concepts=output, question=input_text, model=model), debug=debug, session_uuid=session_uuid, log_info=log_info)
     )[0]
     final = list(set(selections.get("matches", [])))
 
@@ -216,9 +217,10 @@ def parse_query(
     input_text: str,
     input_environment: Environment,
     debug: bool = False,
-    log_info: bool=True
+    log_info: bool=True,
+    model: str="gpt-3.5-turbo"
 ):
-    results = discover_inputs(input_text, input_environment, debug=debug, log_info=log_info)
+    results = discover_inputs(input_text, input_environment, debug=debug, log_info=log_info, model=model)
     concepts = [input_environment.concepts[x] for x in results.select]
     order = parse_order(concepts, results.order)
     if debug:
@@ -234,7 +236,20 @@ def build_query(
     input_text: str,
     input_environment: Environment,
     debug: bool = False,
-    log_info: bool = True
+    log_info: bool = True,
+    model: str = "gpt-3.5-turbo"
 ) -> ProcessedQuery:
-    query = parse_query(input_text, input_environment, debug=debug, log_info=log_info)
+    query = parse_query(input_text, input_environment, debug=debug, log_info=log_info, model=model)
     return process_query_v2(statement=query, environment=input_environment)
+
+
+def answer_is_reasonable(question, results, columns) -> bool:
+    prompt = CheckAnswerPromptCase(question=question, columns=columns, answer=results)
+    res = coerce_list_dict(
+        run_prompt(prompt, debug=True, log_info=False)
+    )
+    if res[0]["answer"] != "REASONABLE":
+        return False
+    else:
+        return True
+
diff --git a/preql_nlp/monkeypatch.py b/preql_nlp/monkeypatch.py
@@ -1,3 +1,6 @@
+
+
+
 from promptimize.prompt_cases import BasePromptCase, utils
 
 # patched method while waiting for upstream PR to be merged

diff --git a/preql_nlp/prompts/__init__.py b/preql_nlp/prompts/__init__.py
@@ -1,2 +1,2 @@
-from .prompt_executor import run_prompt, SelectionPromptCase, SemanticExtractionPromptCase, SemanticToTokensPromptCase
-__all__ = ["run_prompt", "SelectionPromptCase", "SemanticExtractionPromptCase", "SemanticToTokensPromptCase"]
+from .prompt_executor import run_prompt, SelectionPromptCase, SemanticExtractionPromptCase, SemanticToTokensPromptCase, CheckAnswerPromptCase
+__all__ = ["run_prompt", "SelectionPromptCase", "SemanticExtractionPromptCase", "SemanticToTokensPromptCase", "CHeckAnswerPromptCase"]
diff --git a/preql_nlp/prompts/check_answer.py b/preql_nlp/prompts/check_answer.py
@@ -0,0 +1,41 @@
+CHECK_ANSWER_PROMPT_V1 = """
+You are a helpful system that determines whether a sql result set makes sense as an answer to a business question. I will give a question, followed by a line break, followed by a list of columns, followed by a line break, followed by a result set as a written line-by line
+as a list of tuples. You will respond either "REASONABLE" or "UNREASONABLE" or "UNSURE" using a VALID json format shown below and NOTHING ELSE, depending if you think the result set is reasonable given 
+the question asked. Below is an example of a result set that seems reasonable given the question.
+
+Prompt:
+How many questions are asked per year?
+
+RMKeyView(['question_count', 'question_creation_date_year'])
+
+(2200802, 2016)
+(2196676, 2015)
+(2137435, 2014)
+(2116212, 2017)
+(2033690, 2013)
+(1888989, 2018)
+(1871695, 2020)
+(1766933, 2019)
+(1629580, 2021)
+(1629386, 2012)
+(1268788, 2022)
+(1189881, 2011)
+(690840, 2010)
+(341651, 2009)
+(57569, 2008)
+
+Reponse:
+{% raw %}{"answer": "REASONABLE"}{% endraw %}.
+
+Remember your response MUST BE VALID JSON. Complete the following:
+
+{{ question }}
+
+{{ columns }}
+
+{% for res in results %}
+{{ res }}
+{% endfor %}
+
+Reponse:
+"""
diff --git a/preql_nlp/prompts/prompt_executor.py b/preql_nlp/prompts/prompt_executor.py
@@ -9,6 +9,8 @@
 from preql_nlp.prompts.query_semantic_extraction import EXTRACTION_PROMPT_V1
 from preql_nlp.prompts.semantic_to_tokens import STRUCTURED_PROMPT_V1
 from preql_nlp.prompts.final_selection import SELECTION_TEMPLATE_V1
+from preql_nlp.prompts.check_answer import CHECK_ANSWER_PROMPT_V1
+from langchain.llms import OpenAI
 
 from typing import List, Optional, Callable, Union
 import uuid
@@ -21,12 +23,20 @@ def __init__(
         self,
         category: str,
         evaluators: Optional[Union[Callable, List[Callable]]] = None,
+        model: Optional[str] = None
     ):
+        self.model = model or "gpt-3.5-turbo"
         super().__init__(category=category, evaluators=evaluators)
         self._prompt_hash = str(uuid.uuid4())
 
     def get_extra_template_context(self):
         raise NotImplementedError("This class can't be used directly.")
+
+    def get_prompt_executor(self):
+        model_name = self.model
+        openai_api_key = os.environ.get("OPENAI_API_KEY")
+        self.prompt_executor_kwargs = {"model_name": model_name}
+        return OpenAI(model_name=model_name, openai_api_key=openai_api_key)
 
 
 class SemanticExtractionPromptCase(BasePreqlPromptCase):
@@ -36,9 +46,10 @@ def __init__(
         self,
         question: str,
         evaluators: Optional[Union[Callable, List[Callable]]] = None,
+        model: Optional[str] = None
     ):
         self.question = question
-        super().__init__(category="semantic_extraction", evaluators=evaluators)
+        super().__init__(category="semantic_extraction", evaluators=evaluators, model=model)
 
     def get_extra_template_context(self):
         return {"question": self.question}
@@ -52,10 +63,11 @@ def __init__(
         tokens: List[str],
         phrases: List[str],
         evaluators: Optional[Union[Callable, List[Callable]]] = None,
+        model: Optional[str] = None
     ):
         self.tokens = tokens
         self.phrases = phrases
-        super().__init__(category="semantic_to_tokens", evaluators=evaluators)
+        super().__init__(category="semantic_to_tokens", evaluators=evaluators, model=model)
 
     def get_extra_template_context(self):
         return {"tokens": self.tokens, "phrase_str": ",".join(self.phrases)}
@@ -69,24 +81,39 @@ def __init__(
         question: str,
         concepts: List[str],
         evaluators: Optional[Union[Callable, List[Callable]]] = None,
+        model: Optional[str] = None
     ):
         self.question = question
         self.concepts = concepts
-        super().__init__(evaluators=evaluators, category="selection")
-        self.execution.score = None
+        super().__init__(evaluators=evaluators, category="selection", model=model)
 
     def get_extra_template_context(self):
         return {"concept_string": ", ".join(self.concepts), "question": self.question}
 
 
+class CheckAnswerPromptCase(BasePreqlPromptCase):
+    template = CHECK_ANSWER_PROMPT_V1
+
+    def __init__(self, question: str, columns: List[str], answer: List[tuple], evaluators: Optional[Union[Callable, List[Callable]]] = None,
+        model: Optional[str] = None):
+        self.question = question
+        self.columns = columns
+        self.answer = answer
+        super().__init__(evaluators=evaluators, category="check", model=model)
+
+    def get_extra_template_context(self):
+        return {"results": self.answer, "columns": self.columns, "question": self.question}
+
+
+
 DATA_DIR = os.path.join(
     os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "log_data"
 )
 if not os.path.exists(DATA_DIR):
     os.makedirs(DATA_DIR)
 
 
-def log_prompt_info(prompt: TemplatedPromptCase, session_uuid: uuid.UUID):
+def log_prompt_info(prompt: BasePreqlPromptCase, session_uuid: uuid.UUID):
     prompt_hash = prompt.prompt_hash
     prompt_context = prompt.jinja_context
     template = prompt.template
@@ -99,20 +126,16 @@ def log_prompt_info(prompt: TemplatedPromptCase, session_uuid: uuid.UUID):
         "category": category,
         "session_uuid": str(session_uuid),
         "response": prompt.response,
+        "model": prompt.model
     }
     with open(
         os.path.join(DATA_DIR, str(session_uuid), prompt_hash + ".json"), "w"
     ) as f:
-        print(
-            "printing to...{}".format(
-                os.path.join(DATA_DIR, str(session_uuid), prompt_hash + ".json")
-            )
-        )
         json.dump(data, f)
 
 
 def run_prompt(
-    prompt: TemplatedPromptCase,
+    prompt: BasePreqlPromptCase,
     debug: bool = False,
     log_info=True,
     session_uuid: uuid.UUID | None = None,

diff --git a/preql_nlp/prompts/query_semantic_extraction.py b/preql_nlp/prompts/query_semantic_extraction.py
@@ -1,5 +1,3 @@
-
-
 EXTRACTION_PROMPT_V1 = """
 System: You are a helpful AI that translates ambiguous business questions into structured outputs.
 For a provided question, you will determine if there are metrics or aggregates or dimensions,
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,6 @@



		from promptimize.prompt_cases import BasePromptCase, utils

		# patched method while waiting for upstream PR to be merged
Expand Down