Skip to content

Commit

Permalink
.Net: Azure OpenAI Connector - Adding new max completion tokens overr…
Browse files Browse the repository at this point in the history
…ide parameter (#10533)

### Motivation and Context

- Fixes #10523 

Azure SDK Connector only uses the new `max completion tokens` parameter
if enabled thru a flag of the `ChatCompletionOptions` via experimental
temporary extension method, to enable such setting thru Semantic Kernel,
we added also a temporary experimental flag to the
`AzurePromptExecutionSettings.SetNewMaxCompletionTokensEnabled ` which
when `TRUE` will trigger the expected behavior. (default = false).

See: [Azure OpenAI
Changelog](https://github.com/Azure/azure-sdk-for-net/blob/2fb24b98e5bf4b1277ab69181bf650a4afbb4d20/sdk/openai/Azure.AI.OpenAI/CHANGELOG.md?plain=1#L34)

Usage:

```csharp
var result = await service.GetChatMessageContentAsync("my prompt", new AzureOpenAIPromptExecutionSettings
{
    SetNewMaxCompletionTokensEnabled = true,
    MaxTokens = 1000,
});
```
  • Loading branch information
RogerBarreto authored Feb 14, 2025
1 parent d350b83 commit 5680206
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,39 @@ public async Task GetChatMessageContentsHandlesResponseFormatCorrectlyAsync(obje
Assert.Equal(expectedResponseType, content.GetProperty("response_format").GetProperty("type").GetString());
}

[Theory]
[InlineData(true, "max_completion_tokens")]
[InlineData(false, "max_tokens")]
public async Task GetChatMessageContentsHandlesMaxTokensCorrectlyAsync(bool useNewMaxTokens, string expectedPropertyName)
{
// Arrange
var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
var settings = new AzureOpenAIPromptExecutionSettings
{
SetNewMaxCompletionTokensEnabled = useNewMaxTokens,
MaxTokens = 123
};

using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
{
Content = new StringContent(AzureOpenAITestHelper.GetTestResponse("chat_completion_test_response.json"))
};
this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);

// Act
var result = await service.GetChatMessageContentsAsync(new ChatHistory("System message"), settings);

// Assert
var requestContent = this._messageHandlerStub.RequestContents[0];

Assert.NotNull(requestContent);

var content = JsonSerializer.Deserialize<JsonElement>(Encoding.UTF8.GetString(requestContent));

Assert.True(content.TryGetProperty(expectedPropertyName, out var propertyValue));
Assert.Equal(123, propertyValue.GetInt32());
}

[Theory]
[InlineData(null, null)]
[InlineData("string", "low")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public void ItCreatesOpenAIExecutionSettingsWithCorrectDefaults()
Assert.Null(executionSettings.TopLogprobs);
Assert.Null(executionSettings.Logprobs);
Assert.Null(executionSettings.AzureChatDataSource);
Assert.False(executionSettings.SetNewMaxCompletionTokensEnabled);
Assert.Equal(maxTokensSettings, executionSettings.MaxTokens);
Assert.Null(executionSettings.Store);
Assert.Null(executionSettings.Metadata);
Expand All @@ -58,7 +59,8 @@ public void ItUsesExistingOpenAIExecutionSettings()
TokenSelectionBiases = new Dictionary<int, int>() { { 1, 2 }, { 3, 4 } },
Seed = 123456,
Store = true,
Metadata = new Dictionary<string, string>() { { "foo", "bar" } }
Metadata = new Dictionary<string, string>() { { "foo", "bar" } },
SetNewMaxCompletionTokensEnabled = true,
};

// Act
Expand All @@ -74,6 +76,7 @@ public void ItUsesExistingOpenAIExecutionSettings()
Assert.Equal(actualSettings.Seed, executionSettings.Seed);
Assert.Equal(actualSettings.Store, executionSettings.Store);
Assert.Equal(actualSettings.Metadata, executionSettings.Metadata);
Assert.Equal(actualSettings.SetNewMaxCompletionTokensEnabled, executionSettings.SetNewMaxCompletionTokensEnabled);
}

[Fact]
Expand Down Expand Up @@ -259,6 +262,7 @@ public void PromptExecutionSettingsFreezeWorksAsExpected()
Assert.Throws<NotSupportedException>(() => executionSettings.TokenSelectionBiases?.Add(5, 6));
Assert.Throws<InvalidOperationException>(() => executionSettings.Store = false);
Assert.Throws<NotSupportedException>(() => executionSettings.Metadata?.Add("bar", "foo"));
Assert.Throws<InvalidOperationException>(() => executionSettings.SetNewMaxCompletionTokensEnabled = true);

executionSettings!.Freeze(); // idempotent
Assert.True(executionSettings.IsFrozen);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.ClientModel.Primitives;
using System.Diagnostics;
using Azure.AI.OpenAI.Chat;
using Microsoft.SemanticKernel.ChatCompletion;
Expand Down Expand Up @@ -35,23 +37,29 @@ protected override ChatCompletionOptions CreateChatCompletionOptions(
{
return base.CreateChatCompletionOptions(executionSettings, chatHistory, toolCallingConfig, kernel);
}

var options = new ChatCompletionOptions
{
MaxOutputTokenCount = executionSettings.MaxTokens,
Temperature = (float?)executionSettings.Temperature,
TopP = (float?)executionSettings.TopP,
FrequencyPenalty = (float?)executionSettings.FrequencyPenalty,
PresencePenalty = (float?)executionSettings.PresencePenalty,
ChatCompletionOptions options = ModelReaderWriter.Read<ChatCompletionOptions>(BinaryData.FromString("{}")!)!;
options.MaxOutputTokenCount = executionSettings.MaxTokens;
options.Temperature = (float?)executionSettings.Temperature;
options.TopP = (float?)executionSettings.TopP;
options.FrequencyPenalty = (float?)executionSettings.FrequencyPenalty;
options.PresencePenalty = (float?)executionSettings.PresencePenalty;
#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
Seed = executionSettings.Seed,

options.Seed = executionSettings.Seed;
#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
EndUserId = executionSettings.User,
TopLogProbabilityCount = executionSettings.TopLogprobs,
IncludeLogProbabilities = executionSettings.Logprobs,
StoredOutputEnabled = executionSettings.Store,
ReasoningEffortLevel = GetEffortLevel(executionSettings),
};
options.EndUserId = executionSettings.User;
options.TopLogProbabilityCount = executionSettings.TopLogprobs;
options.IncludeLogProbabilities = executionSettings.Logprobs;
options.StoredOutputEnabled = executionSettings.Store;
options.ReasoningEffortLevel = GetEffortLevel(executionSettings);
options.ResponseModalities = ChatResponseModalities.Default;

if (azureSettings.SetNewMaxCompletionTokensEnabled)
{
#pragma warning disable AOAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
options.SetNewMaxCompletionTokensPropertyEnabled(true);
#pragma warning restore AOAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
}

var responseFormat = GetResponseFormat(executionSettings);
if (responseFormat is not null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,26 @@ namespace Microsoft.SemanticKernel.Connectors.AzureOpenAI;
[JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)]
public sealed class AzureOpenAIPromptExecutionSettings : OpenAIPromptExecutionSettings
{
/// <summary>
/// Enabling this property will enforce the new <c>max_completion_tokens</c> parameter to be send the Azure OpenAI API.
/// </summary>
/// <remarks>
/// This setting is temporary and flags the underlying Azure SDK to use the new <c>max_completion_tokens</c> parameter using the
/// <see href="https://github.com/Azure/azure-sdk-for-net/blob/c2aa8d8448bdb7378a5c1b7ba23aa75e39e6b425/sdk/openai/Azure.AI.OpenAI/CHANGELOG.md?plain=1#L34">
/// SetNewMaxCompletionTokensPropertyEnabled</see> extension.
/// </remarks>
[Experimental("SKEXP0010")]
[JsonIgnore]
public bool SetNewMaxCompletionTokensEnabled
{
get => this._setNewMaxCompletionTokensEnabled;
set
{
this.ThrowIfFrozen();
this._setNewMaxCompletionTokensEnabled = value;
}
}

/// <summary>
/// An abstraction of additional settings for chat completion, see https://learn.microsoft.com/en-us/dotnet/api/azure.ai.openai.azurechatextensionsoptions.
/// This property is compatible only with Azure OpenAI.
Expand All @@ -38,6 +58,7 @@ public override PromptExecutionSettings Clone()
{
var settings = base.Clone<AzureOpenAIPromptExecutionSettings>();
settings.AzureChatDataSource = this.AzureChatDataSource;
settings.SetNewMaxCompletionTokensEnabled = this.SetNewMaxCompletionTokensEnabled;
return settings;
}

Expand Down Expand Up @@ -103,6 +124,7 @@ public static AzureOpenAIPromptExecutionSettings FromExecutionSettingsWithData(P
#region private ================================================================================
[Experimental("SKEXP0010")]
private AzureSearchChatDataSource? _azureChatDataSource;
private bool _setNewMaxCompletionTokensEnabled;

#endregion
}

0 comments on commit 5680206

Please sign in to comment.