Use prompty to store prompts (#2178)

* move-prompts-to-jinja-templates * refactor: convert few_shots to JSON format and clean up comments * Clean retreivethenread.py * Port to prompty * Configure Azure Developer Pipeline * Refactor to use a PromptManager * Inject followup at the end * Make mypy so happy, remove conversation truncation for ask approaches * Refactor text.py since it doesnt need to be its own very short file * Fix Chat approach tests * More prompty updates, test updates * Fix type annotations * Update more snapshots * Add prompty metadata, revert some unneeded changes * Fix thought process UI keys and data expectations * Resolve issue with injected prompt, update test * Pass in past messages to query rewrite prompt * Update snapshots * Updated prompty * Removing prompty from types now that it has them * Add samples to the non-vision approaches * Rename the prompts to match the tabs * Add back a comment about RAG flow * Add prompty back to pyproject.toml * Update the docs about customization * Update to pyproject now that prompty is typed * Fix vision approach missing past messages --------- Co-authored-by: Jeannot Damoiseaux <[email protected]>
Azure-Samples · Jan 14, 2025 · 3629df8 · 3629df8
1 parent 5f898db
commit 3629df8
Show file tree

Hide file tree

Showing 85 changed files with 643 additions and 432 deletions.
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -51,6 +51,7 @@
 from approaches.approach import Approach
 from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
 from approaches.chatreadretrievereadvision import ChatReadRetrieveReadVisionApproach
+from approaches.promptmanager import PromptyManager
 from approaches.retrievethenread import RetrieveThenReadApproach
 from approaches.retrievethenreadvision import RetrieveThenReadVisionApproach
 from chat_history.cosmosdb import chat_history_cosmosdb_bp
@@ -642,8 +643,10 @@ async def setup_clients():
     current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED] = USE_CHAT_HISTORY_BROWSER
     current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED] = USE_CHAT_HISTORY_COSMOS
 
-    # Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
-    # or some derivative, here we include several for exploration purposes
+    prompt_manager = PromptyManager()
+
+    # Set up the two default RAG approaches for /ask and /chat
+    # RetrieveThenReadApproach is used by /ask for single-turn Q&A
     current_app.config[CONFIG_ASK_APPROACH] = RetrieveThenReadApproach(
         search_client=search_client,
         openai_client=openai_client,
@@ -657,8 +660,10 @@ async def setup_clients():
         content_field=KB_FIELDS_CONTENT,
         query_language=AZURE_SEARCH_QUERY_LANGUAGE,
         query_speller=AZURE_SEARCH_QUERY_SPELLER,
+        prompt_manager=prompt_manager,
     )
 
+    # ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
     current_app.config[CONFIG_CHAT_APPROACH] = ChatReadRetrieveReadApproach(
         search_client=search_client,
         openai_client=openai_client,
@@ -672,6 +677,7 @@ async def setup_clients():
         content_field=KB_FIELDS_CONTENT,
         query_language=AZURE_SEARCH_QUERY_LANGUAGE,
         query_speller=AZURE_SEARCH_QUERY_SPELLER,
+        prompt_manager=prompt_manager,
     )
 
     if USE_GPT4V:
@@ -696,6 +702,7 @@ async def setup_clients():
             content_field=KB_FIELDS_CONTENT,
             query_language=AZURE_SEARCH_QUERY_LANGUAGE,
             query_speller=AZURE_SEARCH_QUERY_SPELLER,
+            prompt_manager=prompt_manager,
         )
 
         current_app.config[CONFIG_CHAT_VISION_APPROACH] = ChatReadRetrieveReadVisionApproach(
@@ -716,6 +723,7 @@ async def setup_clients():
             content_field=KB_FIELDS_CONTENT,
             query_language=AZURE_SEARCH_QUERY_LANGUAGE,
             query_speller=AZURE_SEARCH_QUERY_SPELLER,
+            prompt_manager=prompt_manager,
         )
 
 

diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
@@ -24,8 +24,8 @@
 from openai import AsyncOpenAI
 from openai.types.chat import ChatCompletionMessageParam
 
+from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
-from text import nonewlines
 
 
 @dataclass
@@ -109,6 +109,7 @@ def __init__(
         openai_host: str,
         vision_endpoint: str,
         vision_token_provider: Callable[[], Awaitable[str]],
+        prompt_manager: PromptManager,
     ):
         self.search_client = search_client
         self.openai_client = openai_client
@@ -121,6 +122,7 @@ def __init__(
         self.openai_host = openai_host
         self.vision_endpoint = vision_endpoint
         self.vision_token_provider = vision_token_provider
+        self.prompt_manager = prompt_manager
 
     def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]:
         include_category = overrides.get("include_category")
@@ -205,6 +207,10 @@ async def search(
     def get_sources_content(
         self, results: List[Document], use_semantic_captions: bool, use_image_citation: bool
     ) -> list[str]:
+
+        def nonewlines(s: str) -> str:
+            return s.replace("\n", " ").replace("\r", " ")
+
         if use_semantic_captions:
             return [
                 (self.get_citation((doc.sourcepage or ""), use_image_citation))

diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py
@@ -9,53 +9,21 @@
 
 
 class ChatApproach(Approach, ABC):
-    query_prompt_few_shots: list[ChatCompletionMessageParam] = [
-        {"role": "user", "content": "How did crypto do last year?"},
-        {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"},
-        {"role": "user", "content": "What are my health plans?"},
-        {"role": "assistant", "content": "Show available health plans"},
-    ]
-    NO_RESPONSE = "0"
-
-    follow_up_questions_prompt_content = """Generate 3 very brief follow-up questions that the user would likely ask next.
-    Enclose the follow-up questions in double angle brackets. Example:
-    <<Are there exclusions for prescriptions?>>
-    <<Which pharmacies can be ordered from?>>
-    <<What is the limit for over-the-counter medication?>>
-    Do no repeat questions that have already been asked.
-    Make sure the last question ends with ">>".
-    """
-
-    query_prompt_template = """Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.
-    You have access to Azure AI Search index with 100's of documents.
-    Generate a search query based on the conversation and the new question.
-    Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
-    Do not include any text inside [] or <<>> in the search query terms.
-    Do not include any special characters like '+'.
-    If the question is not in English, translate the question to English before generating the search query.
-    If you cannot generate a search query, return just the number 0.
-    """
 
-    @property
-    @abstractmethod
-    def system_message_chat_conversation(self) -> str:
-        pass
+    NO_RESPONSE = "0"
 
     @abstractmethod
     async def run_until_final_call(self, messages, overrides, auth_claims, should_stream) -> tuple:
         pass
 
-    def get_system_prompt(self, override_prompt: Optional[str], follow_up_questions_prompt: str) -> str:
+    def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[str, str]:
+        # Allows client to replace the entire prompt, or to inject into the existing prompt using >>>
         if override_prompt is None:
-            return self.system_message_chat_conversation.format(
-                injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt
-            )
+            return {}
         elif override_prompt.startswith(">>>"):
-            return self.system_message_chat_conversation.format(
-                injected_prompt=override_prompt[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt
-            )
+            return {"injected_prompt": override_prompt[3:]}
         else:
-            return override_prompt.format(follow_up_questions_prompt=follow_up_questions_prompt)
+            return {"override_prompt": override_prompt}
 
     def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
         response_message = chat_completion.choices[0].message

diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -13,6 +13,7 @@
 
 from approaches.approach import ThoughtStep
 from approaches.chatapproach import ChatApproach
+from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
 
 
@@ -38,6 +39,7 @@ def __init__(
         content_field: str,
         query_language: str,
         query_speller: str,
+        prompt_manager: PromptManager
     ):
         self.search_client = search_client
         self.openai_client = openai_client
@@ -52,16 +54,10 @@ def __init__(
         self.query_language = query_language
         self.query_speller = query_speller
         self.chatgpt_token_limit = get_token_limit(chatgpt_model, default_to_minimum=self.ALLOW_NON_GPT_MODELS)
-
-    @property
-    def system_message_chat_conversation(self):
-        return """Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
-        Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
-        If the question is not in English, answer in the language used in the question.
-        Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
-        {follow_up_questions_prompt}
-        {injected_prompt}
-        """
+        self.prompt_manager = prompt_manager
+        self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
+        self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
+        self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
 
     @overload
     async def run_until_final_call(
@@ -101,37 +97,21 @@ async def run_until_final_call(
         original_user_query = messages[-1]["content"]
         if not isinstance(original_user_query, str):
             raise ValueError("The most recent message content must be a string.")
-        user_query_request = "Generate search query for: " + original_user_query
-
-        tools: List[ChatCompletionToolParam] = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "search_sources",
-                    "description": "Retrieve sources from the Azure AI Search index",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "search_query": {
-                                "type": "string",
-                                "description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
-                            }
-                        },
-                        "required": ["search_query"],
-                    },
-                },
-            }
-        ]
+
+        rendered_query_prompt = self.prompt_manager.render_prompt(
+            self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]}
+        )
+        tools: List[ChatCompletionToolParam] = self.query_rewrite_tools
 
         # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
         query_response_token_limit = 100
         query_messages = build_messages(
             model=self.chatgpt_model,
-            system_prompt=self.query_prompt_template,
+            system_prompt=rendered_query_prompt.system_content,
+            few_shots=rendered_query_prompt.few_shot_messages,
+            past_messages=rendered_query_prompt.past_messages,
+            new_user_content=rendered_query_prompt.new_user_content,
             tools=tools,
-            few_shots=self.query_prompt_few_shots,
-            past_messages=messages[:-1],
-            new_user_content=user_query_request,
             max_tokens=self.chatgpt_token_limit - query_response_token_limit,
             fallback_to_default=self.ALLOW_NON_GPT_MODELS,
         )
@@ -169,32 +149,31 @@ async def run_until_final_call(
             minimum_reranker_score,
         )
 
-        sources_content = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
-        content = "\n".join(sources_content)
-
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
-
-        # Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
-        system_message = self.get_system_prompt(
-            overrides.get("prompt_template"),
-            self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else "",
+        text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
+        rendered_answer_prompt = self.prompt_manager.render_prompt(
+            self.answer_prompt,
+            self.get_system_prompt_variables(overrides.get("prompt_template"))
+            | {
+                "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
+                "past_messages": messages[:-1],
+                "user_query": original_user_query,
+                "text_sources": text_sources,
+            },
         )
 
         response_token_limit = 1024
         messages = build_messages(
             model=self.chatgpt_model,
-            system_prompt=system_message,
-            past_messages=messages[:-1],
-            # Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
-            new_user_content=original_user_query + "\n\nSources:\n" + content,
+            system_prompt=rendered_answer_prompt.system_content,
+            past_messages=rendered_answer_prompt.past_messages,
+            new_user_content=rendered_answer_prompt.new_user_content,
             max_tokens=self.chatgpt_token_limit - response_token_limit,
             fallback_to_default=self.ALLOW_NON_GPT_MODELS,
         )
 
-        data_points = {"text": sources_content}
-
         extra_info = {
-            "data_points": data_points,
+            "data_points": {"text": text_sources},
             "thoughts": [
                 ThoughtStep(
                     "Prompt to generate search query",