.Net: Azure OpenAI Connector - Adding new max completion tokens overr…

…ide parameter (#10533) ### Motivation and Context - Fixes #10523 Azure SDK Connector only uses the new `max completion tokens` parameter if enabled thru a flag of the `ChatCompletionOptions` via experimental temporary extension method, to enable such setting thru Semantic Kernel, we added also a temporary experimental flag to the `AzurePromptExecutionSettings.SetNewMaxCompletionTokensEnabled ` which when `TRUE` will trigger the expected behavior. (default = false). See: [Azure OpenAI Changelog](https://github.com/Azure/azure-sdk-for-net/blob/2fb24b98e5bf4b1277ab69181bf650a4afbb4d20/sdk/openai/Azure.AI.OpenAI/CHANGELOG.md?plain=1#L34) Usage: ```csharp var result = await service.GetChatMessageContentAsync("my prompt", new AzureOpenAIPromptExecutionSettings { SetNewMaxCompletionTokensEnabled = true, MaxTokens = 1000, }); ```
microsoft · Feb 14, 2025 · 5680206 · 5680206
1 parent d350b83
commit 5680206
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 16 deletions.
diff --git a/...ectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs b/...ectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs
@@ -273,6 +273,39 @@ public async Task GetChatMessageContentsHandlesResponseFormatCorrectlyAsync(obje
         Assert.Equal(expectedResponseType, content.GetProperty("response_format").GetProperty("type").GetString());
     }
 
+    [Theory]
+    [InlineData(true, "max_completion_tokens")]
+    [InlineData(false, "max_tokens")]
+    public async Task GetChatMessageContentsHandlesMaxTokensCorrectlyAsync(bool useNewMaxTokens, string expectedPropertyName)
+    {
+        // Arrange
+        var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
+        var settings = new AzureOpenAIPromptExecutionSettings
+        {
+            SetNewMaxCompletionTokensEnabled = useNewMaxTokens,
+            MaxTokens = 123
+        };
+
+        using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
+        {
+            Content = new StringContent(AzureOpenAITestHelper.GetTestResponse("chat_completion_test_response.json"))
+        };
+        this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);
+
+        // Act
+        var result = await service.GetChatMessageContentsAsync(new ChatHistory("System message"), settings);
+
+        // Assert
+        var requestContent = this._messageHandlerStub.RequestContents[0];
+
+        Assert.NotNull(requestContent);
+
+        var content = JsonSerializer.Deserialize<JsonElement>(Encoding.UTF8.GetString(requestContent));
+
+        Assert.True(content.TryGetProperty(expectedPropertyName, out var propertyValue));
+        Assert.Equal(123, propertyValue.GetInt32());
+    }
+
     [Theory]
     [InlineData(null, null)]
     [InlineData("string", "low")]

diff --git a/...tors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs b/...tors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs
@@ -35,6 +35,7 @@ public void ItCreatesOpenAIExecutionSettingsWithCorrectDefaults()
         Assert.Null(executionSettings.TopLogprobs);
         Assert.Null(executionSettings.Logprobs);
         Assert.Null(executionSettings.AzureChatDataSource);
+        Assert.False(executionSettings.SetNewMaxCompletionTokensEnabled);
         Assert.Equal(maxTokensSettings, executionSettings.MaxTokens);
         Assert.Null(executionSettings.Store);
         Assert.Null(executionSettings.Metadata);
@@ -58,7 +59,8 @@ public void ItUsesExistingOpenAIExecutionSettings()
             TokenSelectionBiases = new Dictionary<int, int>() { { 1, 2 }, { 3, 4 } },
             Seed = 123456,
             Store = true,
-            Metadata = new Dictionary<string, string>() { { "foo", "bar" } }
+            Metadata = new Dictionary<string, string>() { { "foo", "bar" } },
+            SetNewMaxCompletionTokensEnabled = true,
         };
 
         // Act
@@ -74,6 +76,7 @@ public void ItUsesExistingOpenAIExecutionSettings()
         Assert.Equal(actualSettings.Seed, executionSettings.Seed);
         Assert.Equal(actualSettings.Store, executionSettings.Store);
         Assert.Equal(actualSettings.Metadata, executionSettings.Metadata);
+        Assert.Equal(actualSettings.SetNewMaxCompletionTokensEnabled, executionSettings.SetNewMaxCompletionTokensEnabled);
     }
 
     [Fact]
@@ -259,6 +262,7 @@ public void PromptExecutionSettingsFreezeWorksAsExpected()
         Assert.Throws<NotSupportedException>(() => executionSettings.TokenSelectionBiases?.Add(5, 6));
         Assert.Throws<InvalidOperationException>(() => executionSettings.Store = false);
         Assert.Throws<NotSupportedException>(() => executionSettings.Metadata?.Add("bar", "foo"));
+        Assert.Throws<InvalidOperationException>(() => executionSettings.SetNewMaxCompletionTokensEnabled = true);
 
         executionSettings!.Freeze(); // idempotent
         Assert.True(executionSettings.IsFrozen);

diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.ChatCompletion.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.ChatCompletion.cs
@@ -1,5 +1,7 @@
 // Copyright (c) Microsoft. All rights reserved.
 
+using System;
+using System.ClientModel.Primitives;
 using System.Diagnostics;
 using Azure.AI.OpenAI.Chat;
 using Microsoft.SemanticKernel.ChatCompletion;
@@ -35,23 +37,29 @@ protected override ChatCompletionOptions CreateChatCompletionOptions(
         {
             return base.CreateChatCompletionOptions(executionSettings, chatHistory, toolCallingConfig, kernel);
         }
-
-        var options = new ChatCompletionOptions
-        {
-            MaxOutputTokenCount = executionSettings.MaxTokens,
-            Temperature = (float?)executionSettings.Temperature,
-            TopP = (float?)executionSettings.TopP,
-            FrequencyPenalty = (float?)executionSettings.FrequencyPenalty,
-            PresencePenalty = (float?)executionSettings.PresencePenalty,
+        ChatCompletionOptions options = ModelReaderWriter.Read<ChatCompletionOptions>(BinaryData.FromString("{}")!)!;
+        options.MaxOutputTokenCount = executionSettings.MaxTokens;
+        options.Temperature = (float?)executionSettings.Temperature;
+        options.TopP = (float?)executionSettings.TopP;
+        options.FrequencyPenalty = (float?)executionSettings.FrequencyPenalty;
+        options.PresencePenalty = (float?)executionSettings.PresencePenalty;
 #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-            Seed = executionSettings.Seed,
+
+        options.Seed = executionSettings.Seed;
 #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
-            EndUserId = executionSettings.User,
-            TopLogProbabilityCount = executionSettings.TopLogprobs,
-            IncludeLogProbabilities = executionSettings.Logprobs,
-            StoredOutputEnabled = executionSettings.Store,
-            ReasoningEffortLevel = GetEffortLevel(executionSettings),
-        };
+        options.EndUserId = executionSettings.User;
+        options.TopLogProbabilityCount = executionSettings.TopLogprobs;
+        options.IncludeLogProbabilities = executionSettings.Logprobs;
+        options.StoredOutputEnabled = executionSettings.Store;
+        options.ReasoningEffortLevel = GetEffortLevel(executionSettings);
+        options.ResponseModalities = ChatResponseModalities.Default;
+
+        if (azureSettings.SetNewMaxCompletionTokensEnabled)
+        {
+#pragma warning disable AOAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+            options.SetNewMaxCompletionTokensPropertyEnabled(true);
+#pragma warning restore AOAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+        }
 
         var responseFormat = GetResponseFormat(executionSettings);
         if (responseFormat is not null)

diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Settings/AzureOpenAIPromptExecutionSettings.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Settings/AzureOpenAIPromptExecutionSettings.cs
@@ -16,6 +16,26 @@ namespace Microsoft.SemanticKernel.Connectors.AzureOpenAI;
 [JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)]
 public sealed class AzureOpenAIPromptExecutionSettings : OpenAIPromptExecutionSettings
 {
+    /// <summary>
+    /// Enabling this property will enforce the new <c>max_completion_tokens</c> parameter to be send the Azure OpenAI API.
+    /// </summary>
+    /// <remarks>
+    /// This setting is temporary and flags the underlying Azure SDK to use the new <c>max_completion_tokens</c> parameter using the
+    /// <see href="https://github.com/Azure/azure-sdk-for-net/blob/c2aa8d8448bdb7378a5c1b7ba23aa75e39e6b425/sdk/openai/Azure.AI.OpenAI/CHANGELOG.md?plain=1#L34">
+    /// SetNewMaxCompletionTokensPropertyEnabled</see> extension.
+    /// </remarks>
+    [Experimental("SKEXP0010")]
+    [JsonIgnore]
+    public bool SetNewMaxCompletionTokensEnabled
+    {
+        get => this._setNewMaxCompletionTokensEnabled;
+        set
+        {
+            this.ThrowIfFrozen();
+            this._setNewMaxCompletionTokensEnabled = value;
+        }
+    }
+
     /// <summary>
     /// An abstraction of additional settings for chat completion, see https://learn.microsoft.com/en-us/dotnet/api/azure.ai.openai.azurechatextensionsoptions.
     /// This property is compatible only with Azure OpenAI.
@@ -38,6 +58,7 @@ public override PromptExecutionSettings Clone()
     {
         var settings = base.Clone<AzureOpenAIPromptExecutionSettings>();
         settings.AzureChatDataSource = this.AzureChatDataSource;
+        settings.SetNewMaxCompletionTokensEnabled = this.SetNewMaxCompletionTokensEnabled;
         return settings;
     }
 
@@ -103,6 +124,7 @@ public static AzureOpenAIPromptExecutionSettings FromExecutionSettingsWithData(P
     #region private ================================================================================
     [Experimental("SKEXP0010")]
     private AzureSearchChatDataSource? _azureChatDataSource;
+    private bool _setNewMaxCompletionTokensEnabled;
 
     #endregion
 }