diff --git a/docs/ai/tutorials/evaluate-with-reporting.md b/docs/ai/tutorials/evaluate-with-reporting.md index 7f68c7d1dc015..10a5d804a6c6c 100644 --- a/docs/ai/tutorials/evaluate-with-reporting.md +++ b/docs/ai/tutorials/evaluate-with-reporting.md @@ -1,14 +1,14 @@ --- title: Tutorial - Evaluate a model's response description: Create an MSTest app and add a custom evaluator to evaluate the AI chat response of a language model, and learn how to use the caching and reporting features of Microsoft.Extensions.AI.Evaluation. -ms.date: 03/14/2025 +ms.date: 05/09/2025 ms.topic: tutorial ms.custom: devx-track-dotnet-ai --- # Tutorial: Evaluate a model's response with response caching and reporting -In this tutorial, you create an MSTest app to evaluate the chat response of an OpenAI model. The test app uses the [Microsoft.Extensions.AI.Evaluation](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) libraries to perform the evaluations, cache the model responses, and create reports. The tutorial uses both a [built-in evaluator](xref:Microsoft.Extensions.AI.Evaluation.Quality.RelevanceTruthAndCompletenessEvaluator) and a custom evaluator. +In this tutorial, you create an MSTest app to evaluate the chat response of an OpenAI model. The test app uses the [Microsoft.Extensions.AI.Evaluation](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) libraries to perform the evaluations, cache the model responses, and create reports. The tutorial uses both built-in and custom evaluators. ## Prerequisites @@ -25,32 +25,32 @@ Complete the following steps to create an MSTest project that connects to the `g 1. In a terminal window, navigate to the directory where you want to create your app, and create a new MSTest app with the `dotnet new` command: - ```dotnetcli - dotnet new mstest -o TestAIWithReporting - ``` + ```dotnetcli + dotnet new mstest -o TestAIWithReporting + ``` 1. Navigate to the `TestAIWithReporting` directory, and add the necessary packages to your app: - ```dotnetcli - dotnet add package Azure.AI.OpenAI - dotnet add package Azure.Identity - dotnet add package Microsoft.Extensions.AI.Abstractions --prerelease - dotnet add package Microsoft.Extensions.AI.Evaluation --prerelease - dotnet add package Microsoft.Extensions.AI.Evaluation.Quality --prerelease - dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting --prerelease - dotnet add package Microsoft.Extensions.AI.OpenAI --prerelease - dotnet add package Microsoft.Extensions.Configuration - dotnet add package Microsoft.Extensions.Configuration.UserSecrets - ``` + ```dotnetcli + dotnet add package Azure.AI.OpenAI + dotnet add package Azure.Identity + dotnet add package Microsoft.Extensions.AI.Abstractions --prerelease + dotnet add package Microsoft.Extensions.AI.Evaluation --prerelease + dotnet add package Microsoft.Extensions.AI.Evaluation.Quality --prerelease + dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting --prerelease + dotnet add package Microsoft.Extensions.AI.OpenAI --prerelease + dotnet add package Microsoft.Extensions.Configuration + dotnet add package Microsoft.Extensions.Configuration.UserSecrets + ``` 1. Run the following commands to add [app secrets](/aspnet/core/security/app-secrets) for your Azure OpenAI endpoint, model name, and tenant ID: - ```bash - dotnet user-secrets init - dotnet user-secrets set AZURE_OPENAI_ENDPOINT - dotnet user-secrets set AZURE_OPENAI_GPT_NAME gpt-4o - dotnet user-secrets set AZURE_TENANT_ID - ``` + ```bash + dotnet user-secrets init + dotnet user-secrets set AZURE_OPENAI_ENDPOINT + dotnet user-secrets set AZURE_OPENAI_GPT_NAME gpt-4o + dotnet user-secrets set AZURE_TENANT_ID + ``` (Depending on your environment, the tenant ID might not be needed. In that case, remove it from the code that instantiates the .) diff --git a/docs/ai/tutorials/snippets/evaluate-with-reporting/MyTests.cs b/docs/ai/tutorials/snippets/evaluate-with-reporting/MyTests.cs index 215a698cd9b90..2a803a672e2e6 100644 --- a/docs/ai/tutorials/snippets/evaluate-with-reporting/MyTests.cs +++ b/docs/ai/tutorials/snippets/evaluate-with-reporting/MyTests.cs @@ -59,10 +59,11 @@ private static ChatConfiguration GetAzureOpenAIChatConfiguration() // private static IEnumerable GetEvaluators() { - IEvaluator rtcEvaluator = new RelevanceTruthAndCompletenessEvaluator(); + IEvaluator relevanceEvaluator = new RelevanceEvaluator(); + IEvaluator coherenceEvaluator = new CoherenceEvaluator(); IEvaluator wordCountEvaluator = new WordCountEvaluator(); - return [rtcEvaluator, wordCountEvaluator]; + return [relevanceEvaluator, coherenceEvaluator, wordCountEvaluator]; } // @@ -104,20 +105,15 @@ private static void Validate(EvaluationResult result) { // Retrieve the score for relevance from the . NumericMetric relevance = - result.Get(RelevanceTruthAndCompletenessEvaluator.RelevanceMetricName); + result.Get(RelevanceEvaluator.RelevanceMetricName); Assert.IsFalse(relevance.Interpretation!.Failed, relevance.Reason); Assert.IsTrue(relevance.Interpretation.Rating is EvaluationRating.Good or EvaluationRating.Exceptional); - // Retrieve the score for truth from the . - NumericMetric truth = result.Get(RelevanceTruthAndCompletenessEvaluator.TruthMetricName); - Assert.IsFalse(truth.Interpretation!.Failed, truth.Reason); - Assert.IsTrue(truth.Interpretation.Rating is EvaluationRating.Good or EvaluationRating.Exceptional); - - // Retrieve the score for completeness from the . - NumericMetric completeness = - result.Get(RelevanceTruthAndCompletenessEvaluator.CompletenessMetricName); - Assert.IsFalse(completeness.Interpretation!.Failed, completeness.Reason); - Assert.IsTrue(completeness.Interpretation.Rating is EvaluationRating.Good or EvaluationRating.Exceptional); + // Retrieve the score for coherence from the . + NumericMetric coherence = + result.Get(CoherenceEvaluator.CoherenceMetricName); + Assert.IsFalse(coherence.Interpretation!.Failed, coherence.Reason); + Assert.IsTrue(coherence.Interpretation.Rating is EvaluationRating.Good or EvaluationRating.Exceptional); // Retrieve the word count from the . NumericMetric wordCount = result.Get(WordCountEvaluator.WordCountMetricName); @@ -135,7 +131,7 @@ public async Task SampleAndEvaluateResponse() // Create a with the scenario name // set to the fully qualified name of the current test method. await using ScenarioRun scenarioRun = - await s_defaultReportingConfiguration.CreateScenarioRunAsync(this.ScenarioName); + await s_defaultReportingConfiguration.CreateScenarioRunAsync(ScenarioName); // Use the that's included in the // to get the LLM response. diff --git a/docs/ai/tutorials/snippets/evaluate-with-reporting/TestAIWithReporting.csproj b/docs/ai/tutorials/snippets/evaluate-with-reporting/TestAIWithReporting.csproj index 988af261ef1df..08fa88a7fb8e4 100644 --- a/docs/ai/tutorials/snippets/evaluate-with-reporting/TestAIWithReporting.csproj +++ b/docs/ai/tutorials/snippets/evaluate-with-reporting/TestAIWithReporting.csproj @@ -11,11 +11,11 @@ - - - - - + + + + +