diff --git a/OpenAI_API/Audio/AudioEndpoint.cs b/OpenAI_API/Audio/AudioEndpoint.cs new file mode 100644 index 0000000..4ee9a44 --- /dev/null +++ b/OpenAI_API/Audio/AudioEndpoint.cs @@ -0,0 +1,75 @@ +using System.Net.Http; +using System.Threading.Tasks; + +namespace OpenAI_API.Audio +{ + /// + /// You can use this endpoint for audio transcription or translation. + /// + public class AudioEndpoint : EndpointBase, IAudioEndpoint + { + /// + /// Creates audio endpoint object. + /// + /// + public AudioEndpoint(OpenAIAPI api) : base(api) + { + } + + /// + /// Audio endpoint. + /// + protected override string Endpoint { get { return "audio"; } } + + /// + /// Sends transcript request to openai and returns verbose_json result. + /// + public Task CreateTranscriptionAsync(TranscriptionRequest request) + { + return PostAudioAsync($"{Url}/transcriptions", request); + } + + /// + /// Translates audio into English. + /// + public Task CreateTranslationAsync(TranslationRequest request) + { + return PostAudioAsync($"{Url}/translations",new TranscriptionRequest { + File = request.File, + Model = request.Model, + Prompt = request.Prompt, + ResponseFormat = request.ResponseFormat, + Temperature = request.Temperature} + ); + } + + private Task PostAudioAsync(string url, TranscriptionRequest request) + { + var content = new MultipartFormDataContent(); + + var fileContent = new StreamContent(request.File.File); + fileContent.Headers.ContentLength = request.File.ContentLength; + fileContent.Headers.ContentType = + new System.Net.Http.Headers.MediaTypeHeaderValue(request.File.ContentType); + + content.Add(fileContent, "file", request.File.Name); + content.Add(new StringContent(request.Model), "model"); + + if (!IsNullOrWhiteSpace(request.Prompt)) + content.Add(new StringContent(request.Prompt), "prompt"); + + if (!IsNullOrWhiteSpace(request.ResponseFormat)) + content.Add(new StringContent(request.ResponseFormat), "response_format"); + + if (!request.Temperature.HasValue) + content.Add(new StringContent(request.Temperature.ToString()), "temperature"); + + if (!IsNullOrWhiteSpace(request.Language)) + content.Add(new StringContent(request.Language), "language"); + + return HttpPost(url, postData: content); + } + + private bool IsNullOrWhiteSpace(string str) => string.IsNullOrWhiteSpace(str); + } +} diff --git a/OpenAI_API/Audio/AudioFile.cs b/OpenAI_API/Audio/AudioFile.cs new file mode 100644 index 0000000..bafa851 --- /dev/null +++ b/OpenAI_API/Audio/AudioFile.cs @@ -0,0 +1,30 @@ +using System.IO; + +namespace OpenAI_API.Audio +{ + /// + /// Audio file object for transcript and translate requests. + /// + public class AudioFile + { + /// + /// Stream of the file. + /// + public Stream File { get; set; } + + /// + /// Content length of the file + /// + public long ContentLength { get { return File.Length; } } + + /// + /// Type of audio file.Must be mp3, mp4, mpeg, mpga, m4a, wav, or webm. + /// + public string ContentType { get; set; } + + /// + /// Full name of the file. such as test.mp3 + /// + public string Name { get; set; } + } +} diff --git a/OpenAI_API/Audio/IAudioEndpoint.cs b/OpenAI_API/Audio/IAudioEndpoint.cs new file mode 100644 index 0000000..ac5e19a --- /dev/null +++ b/OpenAI_API/Audio/IAudioEndpoint.cs @@ -0,0 +1,21 @@ +using System.Threading.Tasks; + +namespace OpenAI_API.Audio +{ + /// + /// You can use this endpoint for audio transcription or translation. + /// + public interface IAudioEndpoint + { + /// + /// Sends transcript request to openai and returns verbose_json result. + /// + Task CreateTranscriptionAsync(TranscriptionRequest request); + + + /// + /// Translates audio into into English. + /// + public Task CreateTranslationAsync(TranslationRequest request); + } +} diff --git a/OpenAI_API/Audio/TranscriptionRequest.cs b/OpenAI_API/Audio/TranscriptionRequest.cs new file mode 100644 index 0000000..0be8852 --- /dev/null +++ b/OpenAI_API/Audio/TranscriptionRequest.cs @@ -0,0 +1,14 @@ +namespace OpenAI_API.Audio +{ + /// + /// Transcribes audio into the input language. + /// + public class TranscriptionRequest:TranslationRequest + { + /// + /// The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.Visit to list ISO-639-1 formats + /// + public string Language { get; set; } + + } +} diff --git a/OpenAI_API/Audio/TranscriptionResult.cs b/OpenAI_API/Audio/TranscriptionResult.cs new file mode 100644 index 0000000..5101e47 --- /dev/null +++ b/OpenAI_API/Audio/TranscriptionResult.cs @@ -0,0 +1,123 @@ +using Newtonsoft.Json; +using System.Collections.Generic; + +namespace OpenAI_API.Audio +{ + /// + /// Default format for transcript result. + /// + public class TranscriptionResult:ApiResultBase + { + /// + /// Text of the transcript result. + /// + public string Text { get; set; } + } + + /// + /// Transcription format for vtt results. + /// + public class TranscriptionVerboseJsonResult:TranscriptionResult + { + /// + /// Task type. Translate or transcript. + /// + [JsonProperty("task")] + public string Task { get; set; } + + /// + /// Language of the audio. + /// + [JsonProperty("language")] + public string Language { get; set; } + + /// + /// Audio duration. + /// + [JsonProperty("duration")] + public float Duration { get; set; } + + /// + /// Audio segments. + /// + [JsonProperty("segments")] + public List Segments { get; set; } + + /// + /// Creates a verbose json result object. + /// + public TranscriptionVerboseJsonResult() + { + Segments = new List(); + } + + } + + /// + /// Segment of the transcript. + /// + public class TranscriptionSegment + { + /// + /// Segment id + /// + [JsonProperty("id")] + public int Id { get; set; } + + /// + /// Start time. + /// + [JsonProperty("start")] + public float Start { get; set; } + + /// + /// End time. + /// + [JsonProperty("end")] + public float End { get; set; } + + /// + /// Segment text. + /// + [JsonProperty("text")] + public string Text { get; set; } + + /// + /// Text tokens. + /// + [JsonProperty("tokens")] + public int[] Tokens { get; set; } + + /// + /// Temperature. + /// + [JsonProperty("temperature")] + public double Temperature { get; set; } + + /// + /// Average log probabilities of the text. + /// + [JsonProperty("avg_logprob")] + public double AvgLogProb { get; set; } + + /// + /// Compression ratio. + /// + [JsonProperty("compression_ratio")] + public double CompressionRation { get; set; } + + /// + /// No speech probability. + /// + [JsonProperty("no_speech_prob")] + public double NoSpeechProb { get; set; } + + /// + /// Transient. + /// + [JsonProperty("transient")] + public bool Transient { get; set; } + } + + +} diff --git a/OpenAI_API/Audio/TranslationRequest.cs b/OpenAI_API/Audio/TranslationRequest.cs new file mode 100644 index 0000000..ee4956f --- /dev/null +++ b/OpenAI_API/Audio/TranslationRequest.cs @@ -0,0 +1,37 @@ +using System.IO; + +namespace OpenAI_API.Audio +{ + /// + /// Creates translation request object for translate audios to english language. + /// + public class TranslationRequest + { + /// + /// The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm + /// + public AudioFile File { get; set; } + + /// + /// ID of the model to use. Only whisper-1 is currently available. + /// + public string Model { get; set; } = Models.Model.Whisper_1; + + /// + /// An optional text to guide the model's style or continue a previous audio segment. The Prompt should match the audio language. Please review href="https://platform.openai.com/docs/guides/speech-to-text/prompting"/> + /// + public string Prompt { get; set; } + + /// + /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt. + /// + public string ResponseFormat { get; set; } = "verbose_json"; + + /// + /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. + /// + public float? Temperature { get; set; } + } + + +} diff --git a/OpenAI_API/Chat/ChatResult.cs b/OpenAI_API/Chat/ChatResult.cs index 9138d19..51addcb 100644 --- a/OpenAI_API/Chat/ChatResult.cs +++ b/OpenAI_API/Chat/ChatResult.cs @@ -1,14 +1,12 @@ using Newtonsoft.Json; -using System; using System.Collections.Generic; -using System.Text; namespace OpenAI_API.Chat { - /// - /// Represents a result from calling the Chat API - /// - public class ChatResult : ApiResultBase + /// + /// Represents a result from calling the Chat API + /// + public class ChatResult : ApiResultBase { /// /// The identifier of the result, which may be used during troubleshooting diff --git a/OpenAI_API/Chat/Conversation.cs b/OpenAI_API/Chat/Conversation.cs index babc9b1..a8be8dd 100644 --- a/OpenAI_API/Chat/Conversation.cs +++ b/OpenAI_API/Chat/Conversation.cs @@ -42,7 +42,7 @@ public OpenAI_API.Models.Model Model /// /// After calling , this contains the full response object which can contain useful metadata like token usages, , etc. This is overwritten with every call to and only contains the most recent result. /// - public ChatResult MostResentAPIResult { get; private set; } + public ChatResult MostRecentApiResult { get; private set; } /// /// Creates a new conversation with ChatGPT chat @@ -71,7 +71,7 @@ public Conversation(ChatEndpoint endpoint, OpenAI_API.Models.Model model = null, private List _Messages; /// - /// Appends a to the chat hstory + /// Appends a to the chat history /// /// The to append to the chat history public void AppendMessage(ChatMessage message) @@ -80,20 +80,20 @@ public void AppendMessage(ChatMessage message) } /// - /// Creates and appends a to the chat hstory + /// Creates and appends a to the chat history /// /// The for the message. Typically, a conversation is formatted with a system message first, followed by alternating user and assistant messages. See the OpenAI docs for more details about usage. /// The content of the message) public void AppendMessage(ChatMessageRole role, string content) => this.AppendMessage(new ChatMessage(role, content)); /// - /// Creates and appends a to the chat hstory with the Role of . The user messages help instruct the assistant. They can be generated by the end users of an application, or set by a developer as an instruction. + /// Creates and appends a to the chat history with the Role of . The user messages help instruct the assistant. They can be generated by the end users of an application, or set by a developer as an instruction. /// /// Text content generated by the end users of an application, or set by a developer as an instruction public void AppendUserInput(string content) => this.AppendMessage(new ChatMessage(ChatMessageRole.User, content)); /// - /// Creates and appends a to the chat hstory with the Role of . The user messages help instruct the assistant. They can be generated by the end users of an application, or set by a developer as an instruction. + /// Creates and appends a to the chat history with the Role of . The user messages help instruct the assistant. They can be generated by the end users of an application, or set by a developer as an instruction. /// /// The name of the user in a multi-user chat /// Text content generated by the end users of an application, or set by a developer as an instruction @@ -101,12 +101,12 @@ public void AppendMessage(ChatMessage message) /// - /// Creates and appends a to the chat hstory with the Role of . The system message helps set the behavior of the assistant. + /// Creates and appends a to the chat history with the Role of . The system message helps set the behavior of the assistant. /// /// text content that helps set the behavior of the assistant public void AppendSystemMessage(string content) => this.AppendMessage(new ChatMessage(ChatMessageRole.System, content)); /// - /// Creates and appends a to the chat hstory with the Role of . Assistant messages can be written by a developer to help give examples of desired behavior. + /// Creates and appends a to the chat history with the Role of . Assistant messages can be written by a developer to help give examples of desired behavior. /// /// Text content written by a developer to help give examples of desired behavior public void AppendExampleChatbotOutput(string content) => this.AppendMessage(new ChatMessage(ChatMessageRole.Assistant, content)); @@ -123,7 +123,7 @@ public async Task GetResponseFromChatbotAsync() req.Messages = _Messages.ToList(); var res = await _endpoint.CreateChatCompletionAsync(req); - MostResentAPIResult = res; + MostRecentApiResult = res; if (res.Choices.Count > 0) { @@ -201,7 +201,7 @@ public async IAsyncEnumerable StreamResponseEnumerableFromChatbotAsync() yield return deltaContent; } } - MostResentAPIResult = res; + MostRecentApiResult = res; } if (responseRole != null) diff --git a/OpenAI_API/EndpointBase.cs b/OpenAI_API/EndpointBase.cs index 792727a..c7b7ac1 100644 --- a/OpenAI_API/EndpointBase.cs +++ b/OpenAI_API/EndpointBase.cs @@ -294,7 +294,6 @@ internal async Task HttpPut(string url = null, object postData = null) whe } - /* /// /// Sends an HTTP request and handles a streaming response. Does basic line splitting and error handling. diff --git a/OpenAI_API/Model/Model.cs b/OpenAI_API/Model/Model.cs index bdeeb2b..bf5b825 100644 --- a/OpenAI_API/Model/Model.cs +++ b/OpenAI_API/Model/Model.cs @@ -164,6 +164,10 @@ public Model() /// public static Model TextModerationLatest => new Model("text-moderation-latest") { OwnedBy = "openai" }; + /// + /// Whisper model. This model generates transcript from audio. + /// + public static Model Whisper_1 => new Model("whisper-1") { OwnedBy = "openai" }; /// /// Gets more details about this Model from the API, specifically properties such as and permissions. diff --git a/OpenAI_API/OpenAIAPI.cs b/OpenAI_API/OpenAIAPI.cs index f1e2bda..622b78a 100644 --- a/OpenAI_API/OpenAIAPI.cs +++ b/OpenAI_API/OpenAIAPI.cs @@ -1,4 +1,5 @@ -using OpenAI_API.Chat; +using OpenAI_API.Audio; +using OpenAI_API.Chat; using OpenAI_API.Completions; using OpenAI_API.Embedding; using OpenAI_API.Files; @@ -50,6 +51,7 @@ public OpenAIAPI(APIAuthentication apiKeys = null) Chat = new ChatEndpoint(this); Moderation = new ModerationEndpoint(this); ImageGenerations = new ImageGenerationEndpoint(this); + Audio = new AudioEndpoint(this); } /// @@ -101,5 +103,10 @@ public static OpenAIAPI ForAzure(string YourResourceName, string deploymentId, A /// The API lets you do operations with images. Given a prompt and/or an input image, the model will generate a new image. /// public IImageGenerationEndpoint ImageGenerations { get; } + + /// + /// Manages audio operations such as transcipt,translate. + /// + public IAudioEndpoint Audio { get; } } } diff --git a/OpenAI_API/OpenAI_API.csproj b/OpenAI_API/OpenAI_API.csproj index 00488b0..92d05fd 100644 --- a/OpenAI_API/OpenAI_API.csproj +++ b/OpenAI_API/OpenAI_API.csproj @@ -14,12 +14,12 @@ OpenAI, AI, ML, API, ChatGPT, DALLE, GPT3, GPT-3, GPT4, GPT-4, DALL-E OpenAI API - Added support for GPT4, streaming conversations with ChatGPT, IHttpClientFactory, and numerous bug fixes. + Added support for GPT4, streaming conversations with ChatGPT, IHttpClientFactory, and various bug fixes. OpenAI - 1.7.1 - 1.7.1.0 - 1.7.1.0 + 1.7.2 + 1.7.2.0 + 1.7.2.0 True README.md True diff --git a/OpenAI_Tests/AudioEndpointTests.cs b/OpenAI_Tests/AudioEndpointTests.cs new file mode 100644 index 0000000..cae7d02 --- /dev/null +++ b/OpenAI_Tests/AudioEndpointTests.cs @@ -0,0 +1,45 @@ +using NUnit.Framework; +using OpenAI_API.Audio; +using System; +using System.IO; +using System.Threading.Tasks; + +namespace OpenAI_Tests +{ + public class AudioEndpointTests + { + private const string TEST_FILE_NAME = "audio_test.mp3"; + + [SetUp] + public void Setup() + { + OpenAI_API.APIAuthentication.Default = new OpenAI_API.APIAuthentication(Environment.GetEnvironmentVariable("TEST_OPENAI_SECRET_KEY")); + } + + [Test] + public async Task Test_TranscriptionAsync() + { + var api = new OpenAI_API.OpenAIAPI(); + var request = new TranscriptionRequest { File = new AudioFile { File = new FileStream(TEST_FILE_NAME, FileMode.Open), Name = TEST_FILE_NAME, ContentType = "audio/mp3" } }; + var result = await api.Audio.CreateTranscriptionAsync(request); + + Assert.IsNotNull(result); + Assert.IsNotNull(result.Text); + Assert.IsNotNull(result.Segments); + Assert.Greater(result.Segments.Count, 0); + } + + [Test] + public async Task Test_TranslateAsync() + { + var api = new OpenAI_API.OpenAIAPI(); + var request = new TranslationRequest { File = new AudioFile { File = new FileStream(TEST_FILE_NAME, FileMode.Open), Name = TEST_FILE_NAME, ContentType = "audio/mp3" } }; + var result = await api.Audio.CreateTranslationAsync(request); + + Assert.IsNotNull(result); + Assert.IsNotNull(result.Text); + Assert.IsNotNull(result.Segments); + Assert.Greater(result.Segments.Count, 0); + } + } +} diff --git a/OpenAI_Tests/OpenAI_Tests.csproj b/OpenAI_Tests/OpenAI_Tests.csproj index f26766e..651b075 100644 --- a/OpenAI_Tests/OpenAI_Tests.csproj +++ b/OpenAI_Tests/OpenAI_Tests.csproj @@ -22,6 +22,9 @@ PreserveNewest + + PreserveNewest + diff --git a/OpenAI_Tests/audio_test.mp3 b/OpenAI_Tests/audio_test.mp3 new file mode 100644 index 0000000..f3ec79f Binary files /dev/null and b/OpenAI_Tests/audio_test.mp3 differ diff --git a/README.md b/README.md index eb34df6..9afe53a 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Console.WriteLine(result); * [Moderation API](#moderation) * [Files API](#files-for-fine-tuning) * [Image APIs (DALL-E)](#images) + * [Audio](#Audio) * [Azure](#azure) * [Additonal Documentation](#documentation) * [License](#license) @@ -277,9 +278,9 @@ The DALL-E Image Generation API is accessed via `OpenAIAPI.ImageGenerations`: async Task CreateImageAsync(ImageGenerationRequest request); // for example -var result = await api.Images.CreateImageAsync(new ImageGenerationRequest("A drawing of a computer writing a test", 1, ImageSize._512)); +var result = await api.ImageGenerations.CreateImageAsync(new ImageGenerationRequest("A drawing of a computer writing a test", 1, ImageSize._512)); // or -var result = await api.Images.CreateImageAsync("A drawing of a computer writing a test"); +var result = await api.ImageGenerations.CreateImageAsync("A drawing of a computer writing a test"); Console.WriteLine(result.Data[0].Url); ``` @@ -288,6 +289,23 @@ The image result contains a URL for an online image or a base64-encoded image, d Image edits and variations are not yet implemented. +### Audio +The audio API endpoint is accessed via `OpenAIAPI.Audio`: + +```csharp +///Transcript +Task CreateTranscriptionAsync(TranscriptionRequest request); + +///Translate +Task CreateTranslationAsync(TranslationRequest request); + +//for example +var request = new TranscriptionRequest { File = new AudioFile { File = new FileStream(TEST_FILE_NAME, FileMode.Open), Name = TEST_FILE_NAME, ContentType = "audio/mp3" } }; +var result = await api.Audio.CreateTranscriptionAsync(request); + +Console.WriteLine(result.Text); +``` + ## Azure For using the Azure OpenAI Service, you need to specify the name of your Azure OpenAI resource as well as your model deployment id.