Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Stuff #3

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ Recommended to use "gpt-3.5-turbo" or higher as the model.

```python
from trilogy_public_models import models
from preql import Executor, Dialects
from preql_nlp import build_query
from preql import Dialects
from preql_nlp import build_query, NlpPreqlModelClient

# define the model we want to parse
environment = models["bigquery.stack_overflow"]
Expand All @@ -30,20 +30,17 @@ environment = models["bigquery.stack_overflow"]
# default bigquery executor requires local default credentials configured
executor = Dialects.BIGQUERY.default_executor(environment= environment)

# build a query off text and the selected model
processed_query = build_query(
"How many questions are asked per year?",
environment,
)
# build an NLP client for the preql model
client = NlpPreqlModelClient(openai_model="gpt-3.5-turbo", preql_model=environment, preql_executor=executor)

# make sure we got reasonable outputs
for concept in processed_query.output_columns:
print(concept.name)
# ask a data question about the model in natural language.
question = "How many questions are asked per year?"
results = client.answer(question)

# print the results
for r in results:
print(r)

# and run that to get our answer
results = executor.execute_query(processed_query)
for row in results:
print(row)
```


Expand Down
3 changes: 2 additions & 1 deletion preql_nlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
patch_promptimize()

from .main import build_query # noqa: E402
from .client import NlpPreqlModelClient # noqa: E402


__version__ = "0.0.5"

__all__ = ["build_query"]
__all__ = ["build_query", "NlpPreqlModelClient"]
37 changes: 37 additions & 0 deletions preql_nlp/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from preql import Environment, Executor
from preql_nlp.main import build_query, answer_is_reasonable
from dataclasses import dataclass

from time import sleep
from typing import List

@dataclass
class NlpPreqlModelClient:

openai_model: str
preql_model: Environment
preql_executor: Executor

def answer(self, question: str) -> List[tuple]:
max_retries = 3
retries = 0
while retries < max_retries:
query = build_query(question, self.preql_model, debug=False, log_info=True, model=self.openai_model)
results = self.preql_executor.execute_query(query)
cols = results.keys()

res = []
for r in results:
res.append(r)

if self.answer_is_reasonable(question, res, cols):
return res
else:
retries += 1
sleep(1)

raise Exception(f"Answer not reasonable after {max_retries} retries")

def answer_is_reasonable(self, question, results, columns) -> bool:
return answer_is_reasonable(question, results, columns)

31 changes: 23 additions & 8 deletions preql_nlp/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
SemanticToTokensPromptCase,
SelectionPromptCase,
SemanticExtractionPromptCase,
CheckAnswerPromptCase,
)
from preql_nlp.constants import logger, DEFAULT_LIMIT

Expand Down Expand Up @@ -106,7 +107,7 @@ def coerce_list_str(input: Any) -> List[str]:


def discover_inputs(
input_text: str, input_environment: Environment, debug: bool = False, log_info: bool = True
input_text: str, input_environment: Environment, debug: bool = False, log_info: bool = True, model: str = "gpt-3.5-turbo"
) -> IntermediateParseResults:
# we work around prompt size issues and hallucination by doing a two phase discovery
# first we parse the question into metrics/dimensions
Expand All @@ -124,7 +125,7 @@ def discover_inputs(
session_uuid = uuid.uuid4()

parsed = coerce_list_dict(
run_prompt(SemanticExtractionPromptCase(input_text), debug=debug, log_info=log_info, session_uuid=session_uuid)
run_prompt(SemanticExtractionPromptCase(input_text, model=model), debug=debug, log_info=log_info, session_uuid=session_uuid)
)[0]
order = parsed.get("order", [])
token_inputs = {"metrics": metrics, "dimensions": dimensions}
Expand All @@ -138,7 +139,7 @@ def discover_inputs(
phrase_tokens = coerce_list_dict(
run_prompt(
SemanticToTokensPromptCase(
phrases=local_phrases, tokens=token_inputs[field]
phrases=local_phrases, tokens=token_inputs[field], model=model
),
debug=True,
session_uuid=session_uuid,
Expand All @@ -165,7 +166,7 @@ def discover_inputs(
f"Could not find concept for input {k} with tokens {v}"
)
selections = coerce_list_dict(
run_prompt(SelectionPromptCase(concepts=output, question=input_text), debug=debug, session_uuid=session_uuid, log_info=log_info)
run_prompt(SelectionPromptCase(concepts=output, question=input_text, model=model), debug=debug, session_uuid=session_uuid, log_info=log_info)
)[0]
final = list(set(selections.get("matches", [])))

Expand Down Expand Up @@ -216,9 +217,10 @@ def parse_query(
input_text: str,
input_environment: Environment,
debug: bool = False,
log_info: bool=True
log_info: bool=True,
model: str="gpt-3.5-turbo"
):
results = discover_inputs(input_text, input_environment, debug=debug, log_info=log_info)
results = discover_inputs(input_text, input_environment, debug=debug, log_info=log_info, model=model)
concepts = [input_environment.concepts[x] for x in results.select]
order = parse_order(concepts, results.order)
if debug:
Expand All @@ -234,7 +236,20 @@ def build_query(
input_text: str,
input_environment: Environment,
debug: bool = False,
log_info: bool = True
log_info: bool = True,
model: str = "gpt-3.5-turbo"
) -> ProcessedQuery:
query = parse_query(input_text, input_environment, debug=debug, log_info=log_info)
query = parse_query(input_text, input_environment, debug=debug, log_info=log_info, model=model)
return process_query_v2(statement=query, environment=input_environment)


def answer_is_reasonable(question, results, columns) -> bool:
prompt = CheckAnswerPromptCase(question=question, columns=columns, answer=results)
res = coerce_list_dict(
run_prompt(prompt, debug=True, log_info=False)
)
if res[0]["answer"] != "REASONABLE":
return False
else:
return True

3 changes: 3 additions & 0 deletions preql_nlp/monkeypatch.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@



from promptimize.prompt_cases import BasePromptCase, utils

# patched method while waiting for upstream PR to be merged
Expand Down
4 changes: 2 additions & 2 deletions preql_nlp/prompts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .prompt_executor import run_prompt, SelectionPromptCase, SemanticExtractionPromptCase, SemanticToTokensPromptCase
__all__ = ["run_prompt", "SelectionPromptCase", "SemanticExtractionPromptCase", "SemanticToTokensPromptCase"]
from .prompt_executor import run_prompt, SelectionPromptCase, SemanticExtractionPromptCase, SemanticToTokensPromptCase, CheckAnswerPromptCase
__all__ = ["run_prompt", "SelectionPromptCase", "SemanticExtractionPromptCase", "SemanticToTokensPromptCase", "CHeckAnswerPromptCase"]
41 changes: 41 additions & 0 deletions preql_nlp/prompts/check_answer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
CHECK_ANSWER_PROMPT_V1 = """
You are a helpful system that determines whether a sql result set makes sense as an answer to a business question. I will give a question, followed by a line break, followed by a list of columns, followed by a line break, followed by a result set as a written line-by line
as a list of tuples. You will respond either "REASONABLE" or "UNREASONABLE" or "UNSURE" using a VALID json format shown below and NOTHING ELSE, depending if you think the result set is reasonable given
the question asked. Below is an example of a result set that seems reasonable given the question.

Prompt:
How many questions are asked per year?

RMKeyView(['question_count', 'question_creation_date_year'])

(2200802, 2016)
(2196676, 2015)
(2137435, 2014)
(2116212, 2017)
(2033690, 2013)
(1888989, 2018)
(1871695, 2020)
(1766933, 2019)
(1629580, 2021)
(1629386, 2012)
(1268788, 2022)
(1189881, 2011)
(690840, 2010)
(341651, 2009)
(57569, 2008)

Reponse:
{% raw %}{"answer": "REASONABLE"}{% endraw %}.

Remember your response MUST BE VALID JSON. Complete the following:

{{ question }}

{{ columns }}

{% for res in results %}
{{ res }}
{% endfor %}

Reponse:
"""
45 changes: 34 additions & 11 deletions preql_nlp/prompts/prompt_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from preql_nlp.prompts.query_semantic_extraction import EXTRACTION_PROMPT_V1
from preql_nlp.prompts.semantic_to_tokens import STRUCTURED_PROMPT_V1
from preql_nlp.prompts.final_selection import SELECTION_TEMPLATE_V1
from preql_nlp.prompts.check_answer import CHECK_ANSWER_PROMPT_V1
from langchain.llms import OpenAI

from typing import List, Optional, Callable, Union
import uuid
Expand All @@ -21,12 +23,20 @@ def __init__(
self,
category: str,
evaluators: Optional[Union[Callable, List[Callable]]] = None,
model: Optional[str] = None
):
self.model = model or "gpt-3.5-turbo"
super().__init__(category=category, evaluators=evaluators)
self._prompt_hash = str(uuid.uuid4())

def get_extra_template_context(self):
raise NotImplementedError("This class can't be used directly.")

def get_prompt_executor(self):
model_name = self.model
openai_api_key = os.environ.get("OPENAI_API_KEY")
self.prompt_executor_kwargs = {"model_name": model_name}
return OpenAI(model_name=model_name, openai_api_key=openai_api_key)


class SemanticExtractionPromptCase(BasePreqlPromptCase):
Expand All @@ -36,9 +46,10 @@ def __init__(
self,
question: str,
evaluators: Optional[Union[Callable, List[Callable]]] = None,
model: Optional[str] = None
):
self.question = question
super().__init__(category="semantic_extraction", evaluators=evaluators)
super().__init__(category="semantic_extraction", evaluators=evaluators, model=model)

def get_extra_template_context(self):
return {"question": self.question}
Expand All @@ -52,10 +63,11 @@ def __init__(
tokens: List[str],
phrases: List[str],
evaluators: Optional[Union[Callable, List[Callable]]] = None,
model: Optional[str] = None
):
self.tokens = tokens
self.phrases = phrases
super().__init__(category="semantic_to_tokens", evaluators=evaluators)
super().__init__(category="semantic_to_tokens", evaluators=evaluators, model=model)

def get_extra_template_context(self):
return {"tokens": self.tokens, "phrase_str": ",".join(self.phrases)}
Expand All @@ -69,24 +81,39 @@ def __init__(
question: str,
concepts: List[str],
evaluators: Optional[Union[Callable, List[Callable]]] = None,
model: Optional[str] = None
):
self.question = question
self.concepts = concepts
super().__init__(evaluators=evaluators, category="selection")
self.execution.score = None
super().__init__(evaluators=evaluators, category="selection", model=model)

def get_extra_template_context(self):
return {"concept_string": ", ".join(self.concepts), "question": self.question}


class CheckAnswerPromptCase(BasePreqlPromptCase):
template = CHECK_ANSWER_PROMPT_V1

def __init__(self, question: str, columns: List[str], answer: List[tuple], evaluators: Optional[Union[Callable, List[Callable]]] = None,
model: Optional[str] = None):
self.question = question
self.columns = columns
self.answer = answer
super().__init__(evaluators=evaluators, category="check", model=model)

def get_extra_template_context(self):
return {"results": self.answer, "columns": self.columns, "question": self.question}



DATA_DIR = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "log_data"
)
if not os.path.exists(DATA_DIR):
os.makedirs(DATA_DIR)


def log_prompt_info(prompt: TemplatedPromptCase, session_uuid: uuid.UUID):
def log_prompt_info(prompt: BasePreqlPromptCase, session_uuid: uuid.UUID):
prompt_hash = prompt.prompt_hash
prompt_context = prompt.jinja_context
template = prompt.template
Expand All @@ -99,20 +126,16 @@ def log_prompt_info(prompt: TemplatedPromptCase, session_uuid: uuid.UUID):
"category": category,
"session_uuid": str(session_uuid),
"response": prompt.response,
"model": prompt.model
}
with open(
os.path.join(DATA_DIR, str(session_uuid), prompt_hash + ".json"), "w"
) as f:
print(
"printing to...{}".format(
os.path.join(DATA_DIR, str(session_uuid), prompt_hash + ".json")
)
)
json.dump(data, f)


def run_prompt(
prompt: TemplatedPromptCase,
prompt: BasePreqlPromptCase,
debug: bool = False,
log_info=True,
session_uuid: uuid.UUID | None = None,
Expand Down
2 changes: 0 additions & 2 deletions preql_nlp/prompts/query_semantic_extraction.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


EXTRACTION_PROMPT_V1 = """
System: You are a helpful AI that translates ambiguous business questions into structured outputs.
For a provided question, you will determine if there are metrics or aggregates or dimensions,
Expand Down