ScottLogic · CLeopard99 · Oct 29, 2024 · Oct 30, 2024 · mic-smith · Oct 30, 2024
diff --git a/backend/src/prompts/templates/best-next-step.j2 b/backend/src/prompts/templates/best-next-step.j2
@@ -22,7 +22,7 @@ Here is the list of Agents you can choose from:
 AGENT LIST:
 {{ list_of_agents }}
 
-If the list of agents does not contain something suitable, you should say the agent is 'WebAgent'. ie. If question is 'general knowledge', 'personal' or a 'greeting'.
+If the list of agents does not contain something suitable, you should say the agent is 'none'. ie. If question is 'general knowledge', 'personal' or a 'greeting'.
 
 ## Determine the next best step
 Your task is to pick one of the mentioned agents above to complete the task.

diff --git a/backend/src/prompts/templates/intent-format.j2 b/backend/src/prompts/templates/intent-format.j2
@@ -3,15 +3,18 @@ Reply only in json with the following format:
 {
     "query": "string of the original query",
     "user_intent": "string of the overall intent of the user",
+    "result_type": "string of the type of result expected, this will be either 'text' or 'dataset'",
+    "query_type": "string of the type of query the user is asking, this will be either 'general' or 'data manipulation'",
     "questions": array of the following object: 
             {
                 "query": "string of the query for the individual question",
                 "question_intent": "string of the intent of the question",
                 "operation": "string of the operation to be performed",
-                "question_category": "string of the category of the question",
+                "question_category": "string of the category of the question, will be either 'data driven', 'search online', 'general knowledge', or 'chitchat'",
                 "parameters": "array of objects that have a type and value properties, both of which are strings",
                 "aggregation": "string of the aggregation to be performed or none if no aggregation is needed",
                 "sort_order": "string of the sort order to be performed or none if no sorting is needed",
                 "timeframe": "string of the timeframe to be considered or none if no timeframe is needed",
+                "query_type":"this is copied from the parent object query_type property",
             }
 }
diff --git a/backend/src/prompts/templates/intent.j2 b/backend/src/prompts/templates/intent.j2
@@ -65,18 +65,26 @@ Q: Find the schedule of the local train station.
 Response: 
 {"query":"Find the schedule of the local train station.","user_intent":"find train schedule","questions":[{"query":"Find the schedule of the local train station.","question_intent":"retrieve train schedule from web","operation":"online search","question_category":"search online","parameters":[{"type":"train station","value":"local"}],"sort_order":"none"}]} 
 
-Q: What are the different subscriptions with Netflix? Show me the results in a chart.
+Q: Tell me Apple's ESG scores.
 Response:
-{"query": "What are the different subscriptions with Netflix? Show me the results in a chart.", "user_intent": "find and display subscription information", "questions": [{"query": "What are the different subscriptions with Netflix?", "question_intent": "retrieve subscription information", "operation": "literal search", "question_category": "data driven", "parameters": [{"type": "company", "value": "Netflix"}], "aggregation": "none", "sort_order": "none", "timeframe": "none"}, {"query": "Show me the results in a chart", "question_intent": "display information in a chart", "operation": "data visualization", "question_category": "data presentation", "parameters": [], "aggregation": "none", "sort_order": "none", "timeframe": "none"}]}
+{"query":"Tell me Apple's ESG scores.","user_intent":"find ESG scores for a specific company","result_type":"text","query_type":"general","questions":[{"query": "Tell me Apple's ESG scores.","question_intent":"retrieve ESG scores","operation":"literal search","question_category":"data driven","parameters":[{"type":"company","value":"Apple"}]}]}
 
-Q: Show me a chart of different subscription prices with Netflix?
+Q: What are the average ESG scores per country?
 Response:
-{"query": "Show me a chart of different subscription prices with Netflix?", "user_intent": "retrieve and visualize subscription data", "questions": [{"query": "What are the different subscription prices with Netflix?", "question_intent": "retrieve subscription pricing information", "operation": "literal search", "question_category": "data driven", "parameters": [{"type": "company", "value": "Netflix"}], "aggregation": "none", "sort_order": "none", "timeframe": "none"}, {"query": "Show me the results in a chart", "question_intent": "display subscription pricing information in a chart", "operation": "data visualization", "question_category": "data presentation", "parameters": [], "aggregation": "none", "sort_order": "none", "timeframe": "none"}]}
+{"query":"What are the average ESG scores per country?","user_intent":"find average ESG scores by country","result_type":"dataset","query_type":"general","questions":[{"query":"What are the average ESG scores per country?","question_intent":"calculate average ESG scores","operation":"filter + aggregation + sort","question_category":"data driven","parameters":[{"type":"score","value":"ESG"}],"aggregation":"average","sort_order":"ascending"}]}
 
-Finally, if no tool fits the task, return the following:
+Q: Regarding ESG scores, what does Apple score for social over the past 6 months?
+Response:
+{"query":"Regarding ESG scores, what does Apple score for social over the past 6 months?","user_intent":"find historical ESG scores for a specific company","result_type":"dataset","query_type":"general","questions":[{"query":"What does Apple score for social over the past 6 months?","question_intent":"retrieve historical social ESG scores","operation":"filter + sort","question_category":"data driven","parameters":[{"type":"company","value":"Apple"}],"sort_order":"descending","timeframe":"past 6 months"}]}
+
+Q: Sort this data by social score descending.
+Response:
+{"query":"Sort this data by social score descending.","user_intent":"sort data by social score","result_type":"dataset","query_type":"data manipulation","questions":[{"query":"Sort this data by social score descending","question_intent":"sort data by social score","operation":"filter + sort","question_category":"data driven","parameters":[{"type": "sort_by", "value": "social score"}],"sort_order":"descending"}]}
 
-{
-    "tool_name":  "None",
-    "tool_parameters":  "{}",
-    "reasoning": "No tool was appropriate for the task"
-}
+Q: Group this data by country.
+Response:
+{"query":"Group this data by country.","user_intent":"group it data by country","result_type":"dataset","query_type":"data manipulation","questions":[{"query":"Group this data by country.","question_intent":"group data by country","operation":"filter + aggregation + sort","question_category":"data driven","parameters":[{"type": "group_by", "value": "country"}],"aggregation":"average","sort_order":"ascending"}]}
+
+Q: Remove USA from this dataset.
+Response:
+{"query":"Remove USA from this dataset.","user_intent":"filter dataset","result_type":"dataset","query_type":"data manipulation","questions":[{"query":"Remove USA from this dataset.","question_intent":"filter dataset","operation":"filter + sort","question_category":"data driven","parameters":[{"type":"country","value":"not USA"}],"sort_order":"ascending"}]}
diff --git a/backend/src/prompts/templates/neo4j-graph-why.j2 b/backend/src/prompts/templates/neo4j-graph-why.j2
@@ -1,4 +1,2 @@
-The graph represents a hierarchy starting from a Parent Account,
-which is at the top level of the graph and goes down to Transaction.
-All Transactions belong to a group or Classification and each of them 
-is made and associated to a Merchant. 
+The graph represents Investment Funds and Businesses within the fund.
+Businesses relate to a specific Industry and have Envorinment, Social and Governance scores.
diff --git a/backend/src/prompts/templates/summariser.j2 b/backend/src/prompts/templates/summariser.j2
@@ -18,3 +18,10 @@ Reply only in json with the following format:
     "summary":  "The summary of the content that answers the user's query",
     "reasoning": "A sentence on why you chose that summary"
 }
+
+e.g.
+Task: What is the capital of England
+{
+    "summary": "The capital of England is London.",
+    "reasoning": "London is widely known as the capital of England, a fact mentioned in various authoritative sources and geographical references."
+}
diff --git a/backend/src/prompts/templates/tool-selection-format.j2 b/backend/src/prompts/templates/tool-selection-format.j2
@@ -1,5 +1,4 @@
-Reply only in json with the following format, in the tool_parameters please include the currency and measuring scale used in the content provided.:
-
+Reply only in json with the following format:
 
 {
     "tool_name":  "the exact string name of the tool chosen",

diff --git a/backend/src/prompts/templates/validator.j2 b/backend/src/prompts/templates/validator.j2
@@ -4,6 +4,8 @@ Your entire purpose is to return a boolean value to indicate if the answer has f
 
 You will be passed a task and an answer. You need to determine if the answer is correct or not.
 
+If the "query_type" is "data manipulation", return True, it's probably correct.
+
 Be lenient - if the answer looks reasonably right then return True
 
 e.g.
@@ -24,7 +26,8 @@ Answer: Microsoft's ESG (Environmental, Social, and Governance) scores area as f
 Response: False
 Reasoning: The answer is for Microsoft not Apple.
 
-You must always return a single boolean value as the response.
-Do not return any additional information, just the boolean value.
-
-Spending is negative
+You must return a json result:
+{
+    "is_valid": "boolean"
+    "reasoning": "string"
+}
diff --git a/backend/src/supervisors/supervisor.py b/backend/src/supervisors/supervisor.py
@@ -20,15 +20,13 @@
 
     for question in questions:
         try:
-            (agent_name, answer, status) = await solve_task(question, get_scratchpad())
+            (agent_name, answer) = await solve_task(question, get_scratchpad())
             update_scratchpad(agent_name, question, answer)
-            if status == "error":
-                raise Exception(answer)
         except Exception as error:
             update_scratchpad(error=error)
 
 
-async def solve_task(task, scratchpad, attempt=0) -> Tuple[str, str, str]:
+async def solve_task(task, scratchpad, attempt=0) -> Tuple[str, str]:
     if attempt == 5:
         raise Exception(unsolvable_response)
 
@@ -38,15 +36,26 @@
     logger.info(f"Agent selected: {agent.name}")
     logger.info(f"Task is {task}")
     answer = await agent.invoke(task)
-    parsed_json = json.loads(answer)
-    status = parsed_json.get('status', 'success')
-    ignore_validation = parsed_json.get('ignore_validation', '')
-    answer_content = parsed_json.get('content', '')
-    if(ignore_validation == 'true') or await is_valid_answer(answer_content, task):
-        return (agent.name, answer_content, status)
+    if await is_valid_answer(agent, answer, task):
+        return (agent.name, answer)
+
     return await solve_task(task, scratchpad, attempt + 1)
 
 
-async def is_valid_answer(answer, task) -> bool:
-    is_valid = (await get_validator_agent().invoke(f"Task: {task}  Answer: {answer}")).lower() == "true"
+async def is_valid_answer(agent, answer, task) -> bool:
+    is_valid_result = await get_validator_agent().invoke(f"Task: {task}  Answer: {answer}")
+    is_valid_result_json = json.loads(is_valid_result)
+    is_valid = is_valid_result_json["is_valid"]
+    if not is_valid:
+        logger.warning(f"Answer: {answer} for query: '{
+                       task['query']}' is not valid")
+
+    if not is_valid and agent.name == "DatastoreAgent":
+        if answer == 'No database query':
+            update_scratchpad(agent_name=agent.name, result=is_valid, error=f'The task "{
+                              task["query"]}" failed to generate cypher query, next time DO NOT use agent_name {agent.name} again')
+        else:
+            update_scratchpad(agent_name=agent.name, result=is_valid, error=f'The task "{
+                              task["query"]}" generated cypher query but resulted in an invalid answer, next time try to use agent_name {agent.name} again')
+
     return is_valid
diff --git a/backend/tests/prompts/prompting_test.py b/backend/tests/prompts/prompting_test.py
@@ -58,7 +58,7 @@ def test_load_best_next_step_template():
 AGENT LIST:
 
 
-If the list of agents does not contain something suitable, you should say the agent is 'WebAgent'. ie. If question is 'general knowledge', 'personal' or a 'greeting'.
+If the list of agents does not contain something suitable, you should say the agent is 'none'. ie. If question is 'general knowledge', 'personal' or a 'greeting'.
 
 ## Determine the next best step
 Your task is to pick one of the mentioned agents above to complete the task.
@@ -104,7 +104,7 @@ def test_load_best_next_step_with_history_template():
 AGENT LIST:
 
 
-If the list of agents does not contain something suitable, you should say the agent is 'WebAgent'. ie. If question is 'general knowledge', 'personal' or a 'greeting'.
+If the list of agents does not contain something suitable, you should say the agent is 'none'. ie. If question is 'general knowledge', 'personal' or a 'greeting'.
 
 ## Determine the next best step
 Your task is to pick one of the mentioned agents above to complete the task.
@@ -168,8 +168,7 @@ def test_best_tool_template():
 def test_tool_selection_format_template():
     engine = PromptEngine()
     try:
-        expected_string = """Reply only in json with the following format, in the tool_parameters please include the currency and measuring scale used in the content provided.:
-
+        expected_string = """Reply only in json with the following format:
 
 {
     \"tool_name\":  \"the exact string name of the tool chosen\",