Skip to content

Commit

Permalink
Use prompty to store prompts (#2178)
Browse files Browse the repository at this point in the history
* move-prompts-to-jinja-templates

* refactor: convert few_shots to JSON format and clean up comments

* Clean retreivethenread.py

* Port to prompty

* Configure Azure Developer Pipeline

* Refactor to use a PromptManager

* Inject followup at the end

* Make mypy so happy, remove conversation truncation for ask approaches

* Refactor text.py since it doesnt need to be its own very short file

* Fix Chat approach tests

* More prompty updates, test updates

* Fix type annotations

* Update more snapshots

* Add prompty metadata, revert some unneeded changes

* Fix thought process UI keys and data expectations

* Resolve issue with injected prompt, update test

* Pass in past messages to query rewrite prompt

* Update snapshots

* Updated prompty

* Removing prompty from types now that it has them

* Add samples to the non-vision approaches

* Rename the prompts to match the tabs

* Add back a comment about RAG flow

* Add prompty back to pyproject.toml

* Update the docs about customization

* Update to pyproject now that prompty is typed

* Fix vision approach missing past messages

---------

Co-authored-by: Jeannot Damoiseaux <[email protected]>
  • Loading branch information
pamelafox and jeannotdamoiseaux authored Jan 14, 2025
1 parent 5f898db commit 3629df8
Show file tree
Hide file tree
Showing 85 changed files with 643 additions and 432 deletions.
12 changes: 10 additions & 2 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from approaches.approach import Approach
from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
from approaches.chatreadretrievereadvision import ChatReadRetrieveReadVisionApproach
from approaches.promptmanager import PromptyManager
from approaches.retrievethenread import RetrieveThenReadApproach
from approaches.retrievethenreadvision import RetrieveThenReadVisionApproach
from chat_history.cosmosdb import chat_history_cosmosdb_bp
Expand Down Expand Up @@ -642,8 +643,10 @@ async def setup_clients():
current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED] = USE_CHAT_HISTORY_BROWSER
current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED] = USE_CHAT_HISTORY_COSMOS

# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
# or some derivative, here we include several for exploration purposes
prompt_manager = PromptyManager()

# Set up the two default RAG approaches for /ask and /chat
# RetrieveThenReadApproach is used by /ask for single-turn Q&A
current_app.config[CONFIG_ASK_APPROACH] = RetrieveThenReadApproach(
search_client=search_client,
openai_client=openai_client,
Expand All @@ -657,8 +660,10 @@ async def setup_clients():
content_field=KB_FIELDS_CONTENT,
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
)

# ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
current_app.config[CONFIG_CHAT_APPROACH] = ChatReadRetrieveReadApproach(
search_client=search_client,
openai_client=openai_client,
Expand All @@ -672,6 +677,7 @@ async def setup_clients():
content_field=KB_FIELDS_CONTENT,
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
)

if USE_GPT4V:
Expand All @@ -696,6 +702,7 @@ async def setup_clients():
content_field=KB_FIELDS_CONTENT,
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
)

current_app.config[CONFIG_CHAT_VISION_APPROACH] = ChatReadRetrieveReadVisionApproach(
Expand All @@ -716,6 +723,7 @@ async def setup_clients():
content_field=KB_FIELDS_CONTENT,
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
)


Expand Down
8 changes: 7 additions & 1 deletion app/backend/approaches/approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam

from approaches.promptmanager import PromptManager
from core.authentication import AuthenticationHelper
from text import nonewlines


@dataclass
Expand Down Expand Up @@ -109,6 +109,7 @@ def __init__(
openai_host: str,
vision_endpoint: str,
vision_token_provider: Callable[[], Awaitable[str]],
prompt_manager: PromptManager,
):
self.search_client = search_client
self.openai_client = openai_client
Expand All @@ -121,6 +122,7 @@ def __init__(
self.openai_host = openai_host
self.vision_endpoint = vision_endpoint
self.vision_token_provider = vision_token_provider
self.prompt_manager = prompt_manager

def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]:
include_category = overrides.get("include_category")
Expand Down Expand Up @@ -205,6 +207,10 @@ async def search(
def get_sources_content(
self, results: List[Document], use_semantic_captions: bool, use_image_citation: bool
) -> list[str]:

def nonewlines(s: str) -> str:
return s.replace("\n", " ").replace("\r", " ")

if use_semantic_captions:
return [
(self.get_citation((doc.sourcepage or ""), use_image_citation))
Expand Down
44 changes: 6 additions & 38 deletions app/backend/approaches/chatapproach.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,53 +9,21 @@


class ChatApproach(Approach, ABC):
query_prompt_few_shots: list[ChatCompletionMessageParam] = [
{"role": "user", "content": "How did crypto do last year?"},
{"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"},
{"role": "user", "content": "What are my health plans?"},
{"role": "assistant", "content": "Show available health plans"},
]
NO_RESPONSE = "0"

follow_up_questions_prompt_content = """Generate 3 very brief follow-up questions that the user would likely ask next.
Enclose the follow-up questions in double angle brackets. Example:
<<Are there exclusions for prescriptions?>>
<<Which pharmacies can be ordered from?>>
<<What is the limit for over-the-counter medication?>>
Do no repeat questions that have already been asked.
Make sure the last question ends with ">>".
"""

query_prompt_template = """Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.
You have access to Azure AI Search index with 100's of documents.
Generate a search query based on the conversation and the new question.
Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
Do not include any text inside [] or <<>> in the search query terms.
Do not include any special characters like '+'.
If the question is not in English, translate the question to English before generating the search query.
If you cannot generate a search query, return just the number 0.
"""

@property
@abstractmethod
def system_message_chat_conversation(self) -> str:
pass
NO_RESPONSE = "0"

@abstractmethod
async def run_until_final_call(self, messages, overrides, auth_claims, should_stream) -> tuple:
pass

def get_system_prompt(self, override_prompt: Optional[str], follow_up_questions_prompt: str) -> str:
def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[str, str]:
# Allows client to replace the entire prompt, or to inject into the existing prompt using >>>
if override_prompt is None:
return self.system_message_chat_conversation.format(
injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt
)
return {}
elif override_prompt.startswith(">>>"):
return self.system_message_chat_conversation.format(
injected_prompt=override_prompt[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt
)
return {"injected_prompt": override_prompt[3:]}
else:
return override_prompt.format(follow_up_questions_prompt=follow_up_questions_prompt)
return {"override_prompt": override_prompt}

def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
response_message = chat_completion.choices[0].message
Expand Down
79 changes: 29 additions & 50 deletions app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from approaches.approach import ThoughtStep
from approaches.chatapproach import ChatApproach
from approaches.promptmanager import PromptManager
from core.authentication import AuthenticationHelper


Expand All @@ -38,6 +39,7 @@ def __init__(
content_field: str,
query_language: str,
query_speller: str,
prompt_manager: PromptManager
):
self.search_client = search_client
self.openai_client = openai_client
Expand All @@ -52,16 +54,10 @@ def __init__(
self.query_language = query_language
self.query_speller = query_speller
self.chatgpt_token_limit = get_token_limit(chatgpt_model, default_to_minimum=self.ALLOW_NON_GPT_MODELS)

@property
def system_message_chat_conversation(self):
return """Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
If the question is not in English, answer in the language used in the question.
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
{follow_up_questions_prompt}
{injected_prompt}
"""
self.prompt_manager = prompt_manager
self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")

@overload
async def run_until_final_call(
Expand Down Expand Up @@ -101,37 +97,21 @@ async def run_until_final_call(
original_user_query = messages[-1]["content"]
if not isinstance(original_user_query, str):
raise ValueError("The most recent message content must be a string.")
user_query_request = "Generate search query for: " + original_user_query

tools: List[ChatCompletionToolParam] = [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
"parameters": {
"type": "object",
"properties": {
"search_query": {
"type": "string",
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
}
},
"required": ["search_query"],
},
},
}
]

rendered_query_prompt = self.prompt_manager.render_prompt(
self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]}
)
tools: List[ChatCompletionToolParam] = self.query_rewrite_tools

# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
query_response_token_limit = 100
query_messages = build_messages(
model=self.chatgpt_model,
system_prompt=self.query_prompt_template,
system_prompt=rendered_query_prompt.system_content,
few_shots=rendered_query_prompt.few_shot_messages,
past_messages=rendered_query_prompt.past_messages,
new_user_content=rendered_query_prompt.new_user_content,
tools=tools,
few_shots=self.query_prompt_few_shots,
past_messages=messages[:-1],
new_user_content=user_query_request,
max_tokens=self.chatgpt_token_limit - query_response_token_limit,
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
)
Expand Down Expand Up @@ -169,32 +149,31 @@ async def run_until_final_call(
minimum_reranker_score,
)

sources_content = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
content = "\n".join(sources_content)

# STEP 3: Generate a contextual and content specific answer using the search results and chat history

# Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
system_message = self.get_system_prompt(
overrides.get("prompt_template"),
self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else "",
text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
rendered_answer_prompt = self.prompt_manager.render_prompt(
self.answer_prompt,
self.get_system_prompt_variables(overrides.get("prompt_template"))
| {
"include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
"past_messages": messages[:-1],
"user_query": original_user_query,
"text_sources": text_sources,
},
)

response_token_limit = 1024
messages = build_messages(
model=self.chatgpt_model,
system_prompt=system_message,
past_messages=messages[:-1],
# Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
new_user_content=original_user_query + "\n\nSources:\n" + content,
system_prompt=rendered_answer_prompt.system_content,
past_messages=rendered_answer_prompt.past_messages,
new_user_content=rendered_answer_prompt.new_user_content,
max_tokens=self.chatgpt_token_limit - response_token_limit,
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
)

data_points = {"text": sources_content}

extra_info = {
"data_points": data_points,
"data_points": {"text": text_sources},
"thoughts": [
ThoughtStep(
"Prompt to generate search query",
Expand Down
Loading

0 comments on commit 3629df8

Please sign in to comment.