Azure-Samples · changliu2 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/src/api/agents/product/product.prompty b/src/api/agents/product/product.prompty
@@ -7,12 +7,18 @@ model:
   api: chat
   configuration:
     type: azure_openai
-    azure_deployment: gpt-4-evals
-    api_version: 2023-07-01-preview
+    azure_deployment: gpt-4o-mini
+    api_version: 2024-06-01
   parameters:
     max_tokens: 1500
+    response_format:
+      type: json_object
 sample:
   context: Can you use a selection of sports and outdoor cooking gear as context?
+inputs:
+  context:
+    type: string
+
 ---
 system:
 
@@ -48,4 +54,5 @@ Only output the full array of specialized queries to make to the search index. L
 yoursef to 5 queries.
 
 user:
-{{context}}
+context: {{context}}
+queries:
diff --git a/src/api/agents/product/product.py b/src/api/agents/product/product.py
@@ -87,7 +87,11 @@ def retrieve_products(items, index_name):
 def find_products(context: str) -> Dict[str, any]:
     # Get product queries
     queries = prompty.execute("product.prompty", inputs={"context":context})
+
     qs = json.loads(queries)
+    if "queries" in qs.keys():
+        qs = qs["queries"]
+    print("Agent suggested 5 product queries:", qs)
     # Generate embeddings
     items = generate_embeddings(qs)
     # Retrieve products

diff --git a/src/api/agents/researcher/researcher.prompty b/src/api/agents/researcher/researcher.prompty
@@ -9,13 +9,18 @@ model:
   api: chat
   configuration: 
     type: azure_openai
-    azure_deployment: gpt-35-turbo
-    api_version: 2023-07-01-preview
+    azure_deployment: gpt-4o-mini
+    api_version: 2024-06-01
   parameters:
     tools: ${file:functions.json}
 sample:
   instructions: Can you find the latest camping trends and what folks are doing in the winter?
   feedback: Can you dig find some information about the latest camping trends and what folks are doing in the winter?
+inputs:
+  instructions:
+    type: string
+  feedback:
+    type: string
 ---
 system:
 # Researcher Agent

diff --git a/src/api/agents/researcher/researcher.py b/src/api/agents/researcher/researcher.py
@@ -41,7 +41,10 @@ def find_information(query, market="en-US"):
         {"url": a["url"], "name": a["name"], "description": a["snippet"]}
         for a in items["webPages"]["value"]
     ]
-    related = [a["text"] for a in items["relatedSearches"]["value"]]
+    # set defaults
+    related = [""] * len(items)
+    if "relatedSearches" in items:
+        related = [a["text"] for a in items["relatedSearches"]["value"]]
     return {"pages": pages, "related": related}
 
 

diff --git a/src/api/agents/writer/writer.prompty b/src/api/agents/writer/writer.prompty
@@ -7,8 +7,8 @@ model:
   api: chat
   configuration:
     type: azure_openai
-    azure_deployment: gpt-4-evals
-    api_version: 2023-07-01-preview
+    azure_deployment: gpt-4o-mini
+    api_version: 2024-06-01
   parameters:
     max_tokens: 2000
 sample:

diff --git a/src/api/evaluate/eval_inputs.jsonl b/src/api/evaluate/eval_inputs.jsonl
@@ -1,3 +1,11 @@
-{"research_context": "Can you find the latest camping trends and what folks are doing in the winter?", "product_context": "Can you use a selection of tents and sleeping bags as context?", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be roughly 500 words long."}
-{"research_context": "Can you find the latest trends in hiking shoes?" , "product_context":"Can you use a selection of hiking shoes as context?", "assignment_context": "Write an article about the best kind of hiking shoes. The article should include the product information. The article should be roughly 500 words long."}
-{"research_context": "Find information about the best snow camping spots in the world","product_context":"Can you use a selection of tents that are good for snow as context?",  "assignment_context": "Write an article about the best kind of tents for snow camping. The article should be roughly 500 words long."}
+{"research_context": "Can you find the latest camping trends and what folks are doing in the winter?", "product_context": "Can you use a selection of tents and sleeping bags as context?", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "Can you investigate the rise of glamping and its impact on traditional camping?", "product_context": "Include examples of luxury tents and high-end sleeping bags in your discussion.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "What are the latest trends in family camping and how are families choosing their gear?", "product_context": "Use a variety of family-sized tents and sleeping bags as context.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "Can you analyze the trend of digital detox camping and how it influences gear choices?", "product_context": "Include examples of tents and sleeping bags that promote a tech-free experience.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "Can you delve into the trend of adventure camping and how it influences gear choices for extreme conditions?", "product_context": "Use a selection of rugged tents and specialized sleeping bags as context.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "What are the current trends in pet-friendly camping and how are campers choosing gear that accommodates their furry friends?", "product_context": "Include examples of pet-friendly tents and sleeping bags.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "What are the emerging trends in camping technology and how are they changing the way campers choose their gear?", "product_context": "Include examples of tech-integrated tents and sleeping bags.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "Can you explore the trend of eco-conscious hiking and how it influences gear choices?", "product_context": "Use a selection of sustainable hiking apparel and gear as context.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "What are the current trends in solo hiking and how are individuals choosing their gear for safety and comfort?", "product_context": "Include examples of solo-friendly tents and compact cooking gear.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "Can you analyze the rise of family hiking and how families are selecting gear that accommodates all ages?", "product_context": "Use a variety of family-sized backpacks and kid-friendly hiking gear as examples.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
+{"research_context": "What are the emerging trends in hiking technology and how are they changing the way hikers choose their gear?", "product_context": "Include examples of GPS devices and smart hiking apparel.", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words. Make sure to cite sources in the article as you mention the research not at the end."}
diff --git a/src/api/evaluate/evaluate.py b/src/api/evaluate/evaluate.py
@@ -43,7 +43,7 @@ def evaluate_remote(data_path):
     model_config = default_connection.to_evaluator_model_config(deployment_name=deployment_name, api_version=api_version)
     # Create an evaluation
     evaluation = Evaluation(
-        display_name="Remote Evaluation",
+        display_name="Cloud Evaluation",
         description="Evaluation of dataset",
         data=Dataset(id=data_id),
         evaluators={
@@ -141,15 +141,25 @@ def evaluate_orchestrator(model_config, project_scope,  data_path):
 
     data = []    
     eval_data = []
+    import time
+    start = time.time()
     print(f"\n===== Creating articles to evaluate using data provided in {data_path}")
     print("")
+    num_retries = 3
     with open(data_path) as f:
         for num, line in enumerate(f):
             row = json.loads(line)
             data.append(row)
             print(f"generating article {num +1}")
-            eval_data.append(run_orchestrator(row["research_context"], row["product_context"], row["assignment_context"]))
-
+            for i in range(num_retries):
+                try:
+                    eval_data.append(run_orchestrator(row["research_context"], row["product_context"], row["assignment_context"]))
+                    break
+                except Exception as e:
+                    print("Agents failed to produce an article. Examine trace for details. Error message:" + str(e) + f"\Retrying {i+1}/{num_retries} times.")
+                    continue
+    end = time.time()
+    print(f"Agent finished writing articles in {end-start} seconds.")
     # write out eval data to a file so we can re-run evaluation on it
     with jsonlines.open(folder + '/eval_data.jsonl', 'w') as writer:
         for row in eval_data:
@@ -299,6 +309,7 @@ def make_image_message(url_path):
             resized_image_urls = []
             for image in image_path:
                 new_image = local_image_resize(image)
+                if new_image is None: continue
                 #get the file type
                 _, extension = os.path.splitext(new_image)
                 # Normalize the extension (e.g., .JPG -> jpg)
@@ -444,7 +455,7 @@ def make_image_message(url_path):
 
     img_paths = []
     # This is code to add an image from a file path
-    for image_num in range(1,4):
+    for image_num in range(1, 9):
         parent = pathlib.Path(__file__).parent.resolve()
         path = os.path.join(parent, "data")
         image_path = os.path.join(path, f"{image_num}.png")

diff --git a/src/api/evaluate/evaluators.py b/src/api/evaluate/evaluators.py
@@ -4,7 +4,7 @@
 import prompty
 from opentelemetry import trace
 from opentelemetry.trace import set_span_in_context
-from azure.ai.evaluation import RelevanceEvaluator, GroundednessEvaluator, FluencyEvaluator, CoherenceEvaluator
+from azure.ai.evaluation import RelevanceEvaluator, GroundednessEvaluator, FluencyEvaluator, CoherenceEvaluator, RetrievalEvaluator
 from azure.ai.evaluation import ViolenceEvaluator, HateUnfairnessEvaluator, SelfHarmEvaluator, SexualEvaluator
 from azure.ai.evaluation import evaluate
 from azure.ai.evaluation import ViolenceMultimodalEvaluator, SelfHarmMultimodalEvaluator, HateUnfairnessMultimodalEvaluator, SexualMultimodalEvaluator
@@ -56,14 +56,19 @@ def __call__(self, response):
 class ArticleEvaluator:
     def __init__(self, model_config, project_scope):
         self.evaluators = {
+            # RAG metrics
+            "groundedness": GroundednessEvaluator(model_config),
+            "retrieval": RetrievalEvaluator(model_config),
             "relevance": RelevanceEvaluator(model_config),
+            # business writing metrics
             "fluency": FluencyEvaluator(model_config),
             "coherence": CoherenceEvaluator(model_config),
-            "groundedness": GroundednessEvaluator(model_config),
+            # safety metrics
             "violence": ViolenceEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential()),
             "hate_unfairness": HateUnfairnessEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential()),
             "self_harm": SelfHarmEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential()),
             "sexual": SexualEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential()),
+            # custom evaluator for friendly tones
             "friendliness": FriendlinessEvaluator(),
         }
         self.project_scope = project_scope
@@ -73,6 +78,7 @@ def __call__(self, *, data_path, **kwargs):
         ## NOTE: - The following code expects that the user has Storage Blob Data Contributor permissions in order for the results to upload to the Azure AI Studio.
         result = evaluate(
             data=data_path,
+            evaluation_name="Local Evaluation",
             evaluators=self.evaluators,
             ## NOTE: If you do not have Storage Blob Data Contributor permissions, please comment out the below line of code. 
             azure_ai_project=self.project_scope,