diff --git a/DiscoveryJson/texttospeech.v1beta1.json b/DiscoveryJson/texttospeech.v1beta1.json index c70b2d806b5..9c93a509129 100644 --- a/DiscoveryJson/texttospeech.v1beta1.json +++ b/DiscoveryJson/texttospeech.v1beta1.json @@ -237,6 +237,24 @@ }, "voices": { "methods": { + "generateVoiceCloningKey": { + "description": "Generates voice clone key given a short voice prompt. This method validates the voice prompts with a series of checks against the voice talent statement to verify the voice clone is safe to generate.", + "flatPath": "v1beta1/voices:generateVoiceCloningKey", + "httpMethod": "POST", + "id": "texttospeech.voices.generateVoiceCloningKey", + "parameterOrder": [], + "parameters": {}, + "path": "v1beta1/voices:generateVoiceCloningKey", + "request": { + "$ref": "GenerateVoiceCloningKeyRequest" + }, + "response": { + "$ref": "GenerateVoiceCloningKeyResponse" + }, + "scopes": [ + "https://www.googleapis.com/auth/cloud-platform" + ] + }, "list": { "description": "Returns a list of Voice supported for synthesis.", "flatPath": "v1beta1/voices", @@ -261,9 +279,20 @@ } } }, - "revision": "20240815", + "revision": "20241001", "rootUrl": "https://texttospeech.googleapis.com/", "schemas": { + "AdvancedVoiceOptions": { + "description": "Used for advanced voice options.", + "id": "AdvancedVoiceOptions", + "properties": { + "lowLatencyJourneySynthesis": { + "description": "Only for Jounrney voices. If false, the synthesis will be context aware and have higher latency.", + "type": "boolean" + } + }, + "type": "object" + }, "AudioConfig": { "description": "Description of audio data to be synthesized.", "id": "AudioConfig", @@ -320,6 +349,49 @@ }, "type": "object" }, + "CustomPronunciationParams": { + "description": "Pronunciation customization for a phrase.", + "id": "CustomPronunciationParams", + "properties": { + "phoneticEncoding": { + "description": "The phonetic encoding of the phrase.", + "enum": [ + "PHONETIC_ENCODING_UNSPECIFIED", + "PHONETIC_ENCODING_IPA", + "PHONETIC_ENCODING_X_SAMPA" + ], + "enumDescriptions": [ + "Not specified.", + "IPA. (e.g. apple -> \u02c8\u00e6p\u0259l ) https://en.wikipedia.org/wiki/International_Phonetic_Alphabet", + "X-SAMPA (e.g. apple -> \"{p@l\" ) https://en.wikipedia.org/wiki/X-SAMPA" + ], + "type": "string" + }, + "phrase": { + "description": "The phrase to which the customization will be applied. The phrase can be multiple words (in the case of proper nouns etc), but should not span to a whole sentence.", + "type": "string" + }, + "pronunciation": { + "description": "The pronunciation of the phrase. This must be in the phonetic encoding specified above.", + "type": "string" + } + }, + "type": "object" + }, + "CustomPronunciations": { + "description": "A collection of pronunciation customizations.", + "id": "CustomPronunciations", + "properties": { + "pronunciations": { + "description": "The pronunciation customizations to be applied.", + "items": { + "$ref": "CustomPronunciationParams" + }, + "type": "array" + } + }, + "type": "object" + }, "CustomVoiceParams": { "description": "Description of the custom voice to be synthesized.", "id": "CustomVoiceParams", @@ -346,6 +418,40 @@ }, "type": "object" }, + "GenerateVoiceCloningKeyRequest": { + "description": "Request message for the `GenerateVoiceCloningKey` method.", + "id": "GenerateVoiceCloningKeyRequest", + "properties": { + "consentScript": { + "description": "Required. The script used for the voice talent statement. The script will be provided to the caller through other channels. It must be returned unchanged in this field.", + "type": "string" + }, + "languageCode": { + "description": "Required. The language of the supplied audio as a [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. Example: \"en-US\". See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes.", + "type": "string" + }, + "referenceAudio": { + "$ref": "InputAudio", + "description": "Required. The training audio used to create voice clone. This is currently limited to LINEAR16 PCM WAV files mono audio with 24khz sample rate. This needs to be specified in [InputAudio.audio_config], other values will be explicitly rejected." + }, + "voiceTalentConsent": { + "$ref": "InputAudio", + "description": "Required. The voice talent audio used to verify consent to voice clone." + } + }, + "type": "object" + }, + "GenerateVoiceCloningKeyResponse": { + "description": "Response message for the `GenerateVoiceCloningKey` method.", + "id": "GenerateVoiceCloningKeyResponse", + "properties": { + "voiceCloningKey": { + "description": "The voice clone key. Use it in the SynthesizeSpeechRequest by setting [voice.voice_clone.voice_cloning_key].", + "type": "string" + } + }, + "type": "object" + }, "GoogleCloudTexttospeechV1beta1SynthesizeLongAudioMetadata": { "description": "Metadata for response returned by the `SynthesizeLongAudio` method.", "id": "GoogleCloudTexttospeechV1beta1SynthesizeLongAudioMetadata", @@ -369,6 +475,56 @@ }, "type": "object" }, + "InputAudio": { + "description": "Holds audio content and config.", + "id": "InputAudio", + "properties": { + "audioConfig": { + "$ref": "InputAudioConfig", + "description": "Required. Provides information that specifies how to process content." + }, + "content": { + "description": "Required. The audio data bytes encoded as specified in `InputAudioConfig`. Note: as with all bytes fields, proto buffers use a pure binary representation, whereas JSON representations use base64. Audio samples should be between 5-25 seconds in length.", + "format": "byte", + "type": "string" + } + }, + "type": "object" + }, + "InputAudioConfig": { + "description": "Description of inputted audio data.", + "id": "InputAudioConfig", + "properties": { + "audioEncoding": { + "description": "Required. The format of the audio byte stream.", + "enum": [ + "AUDIO_ENCODING_UNSPECIFIED", + "LINEAR16", + "MP3", + "MP3_64_KBPS", + "OGG_OPUS", + "MULAW", + "ALAW" + ], + "enumDescriptions": [ + "Not specified. Will return result google.rpc.Code.INVALID_ARGUMENT.", + "Uncompressed 16-bit signed little-endian samples (Linear PCM). Audio content returned as LINEAR16 also contains a WAV header.", + "MP3 audio at 32kbps.", + "MP3 at 64kbps.", + "Opus encoded audio wrapped in an ogg container. The result will be a file which can be played natively on Android, and in browsers (at least Chrome and Firefox). The quality of the encoding is considerably higher than MP3 while using approximately the same bitrate.", + "8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. Audio content returned as MULAW also contains a WAV header.", + "8-bit samples that compand 14-bit audio samples using G.711 PCMU/A-law. Audio content returned as ALAW also contains a WAV header." + ], + "type": "string" + }, + "sampleRateHertz": { + "description": "Required. The sample rate (in hertz) for this audio.", + "format": "int32", + "type": "integer" + } + }, + "type": "object" + }, "ListOperationsResponse": { "description": "The response message for Operations.ListOperations.", "id": "ListOperationsResponse", @@ -467,6 +623,10 @@ "description": "Contains text input to be synthesized. Either `text` or `ssml` must be supplied. Supplying both or neither returns google.rpc.Code.INVALID_ARGUMENT. The input size is limited to 5000 bytes.", "id": "SynthesisInput", "properties": { + "customPronunciations": { + "$ref": "CustomPronunciations", + "description": "Optional. The pronunciation customizations to be applied to the input. If this is set, the input will be synthesized using the given pronunciation customizations. The initial support will be for EFIGS (English, French, Italian, German, Spanish) languages, as provided in VoiceSelectionParams. Journey and Instant Clone voices are not supported yet. In order to customize the pronunciation of a phrase, there must be an exact match of the phrase in the input types. If using SSML, the phrase must not be inside a phoneme tag (entirely or partially)." + }, "ssml": { "description": "The SSML document to be synthesized. The SSML document must be valid and well-formed. Otherwise the RPC will fail and return google.rpc.Code.INVALID_ARGUMENT. For more information, see [SSML](https://cloud.google.com/text-to-speech/docs/ssml).", "type": "string" @@ -528,6 +688,10 @@ "description": "The top-level message sent by the client for the `SynthesizeSpeech` method.", "id": "SynthesizeSpeechRequest", "properties": { + "advancedVoiceOptions": { + "$ref": "AdvancedVoiceOptions", + "description": "Adnanced voice options." + }, "audioConfig": { "$ref": "AudioConfig", "description": "Required. The configuration of the synthesized audio." @@ -636,6 +800,17 @@ }, "type": "object" }, + "VoiceCloneParams": { + "description": "The configuration of Voice Clone feature.", + "id": "VoiceCloneParams", + "properties": { + "voiceCloningKey": { + "description": "Required. Created by GenerateVoiceCloningKey.", + "type": "string" + } + }, + "type": "object" + }, "VoiceSelectionParams": { "description": "Description of which voice to use for a synthesis request.", "id": "VoiceSelectionParams", @@ -667,6 +842,10 @@ "A gender-neutral voice. This voice is not yet supported." ], "type": "string" + }, + "voiceClone": { + "$ref": "VoiceCloneParams", + "description": "Optional. The configuration for a voice clone. If [VoiceCloneParams.voice_clone_key] is set, the service will choose the voice clone matching the specified configuration." } }, "type": "object" diff --git a/Src/Generated/Google.Apis.Texttospeech.v1beta1/Google.Apis.Texttospeech.v1beta1.cs b/Src/Generated/Google.Apis.Texttospeech.v1beta1/Google.Apis.Texttospeech.v1beta1.cs index 6c775fda565..089e6e50a4d 100644 --- a/Src/Generated/Google.Apis.Texttospeech.v1beta1/Google.Apis.Texttospeech.v1beta1.cs +++ b/Src/Generated/Google.Apis.Texttospeech.v1beta1/Google.Apis.Texttospeech.v1beta1.cs @@ -586,6 +586,51 @@ public VoicesResource(Google.Apis.Services.IClientService service) this.service = service; } + /// + /// Generates voice clone key given a short voice prompt. This method validates the voice prompts with a series + /// of checks against the voice talent statement to verify the voice clone is safe to generate. + /// + /// The body of the request. + public virtual GenerateVoiceCloningKeyRequest GenerateVoiceCloningKey(Google.Apis.Texttospeech.v1beta1.Data.GenerateVoiceCloningKeyRequest body) + { + return new GenerateVoiceCloningKeyRequest(this.service, body); + } + + /// + /// Generates voice clone key given a short voice prompt. This method validates the voice prompts with a series + /// of checks against the voice talent statement to verify the voice clone is safe to generate. + /// + public class GenerateVoiceCloningKeyRequest : TexttospeechBaseServiceRequest + { + /// Constructs a new GenerateVoiceCloningKey request. + public GenerateVoiceCloningKeyRequest(Google.Apis.Services.IClientService service, Google.Apis.Texttospeech.v1beta1.Data.GenerateVoiceCloningKeyRequest body) : base(service) + { + Body = body; + InitParameters(); + } + + /// Gets or sets the body of this request. + Google.Apis.Texttospeech.v1beta1.Data.GenerateVoiceCloningKeyRequest Body { get; set; } + + /// Returns the body of the request. + protected override object GetBody() => Body; + + /// Gets the method name. + public override string MethodName => "generateVoiceCloningKey"; + + /// Gets the HTTP method. + public override string HttpMethod => "POST"; + + /// Gets the REST path. + public override string RestPath => "v1beta1/voices:generateVoiceCloningKey"; + + /// Initializes GenerateVoiceCloningKey parameter list. + protected override void InitParameters() + { + base.InitParameters(); + } + } + /// Returns a list of Voice supported for synthesis. public virtual ListRequest List() { @@ -638,6 +683,19 @@ protected override void InitParameters() } namespace Google.Apis.Texttospeech.v1beta1.Data { + /// Used for advanced voice options. + public class AdvancedVoiceOptions : Google.Apis.Requests.IDirectResponseSchema + { + /// + /// Only for Jounrney voices. If false, the synthesis will be context aware and have higher latency. + /// + [Newtonsoft.Json.JsonPropertyAttribute("lowLatencyJourneySynthesis")] + public virtual System.Nullable LowLatencyJourneySynthesis { get; set; } + + /// The ETag of the item. + public virtual string ETag { get; set; } + } + /// Description of audio data to be synthesized. public class AudioConfig : Google.Apis.Requests.IDirectResponseSchema { @@ -694,6 +752,39 @@ public class AudioConfig : Google.Apis.Requests.IDirectResponseSchema public virtual string ETag { get; set; } } + /// Pronunciation customization for a phrase. + public class CustomPronunciationParams : Google.Apis.Requests.IDirectResponseSchema + { + /// The phonetic encoding of the phrase. + [Newtonsoft.Json.JsonPropertyAttribute("phoneticEncoding")] + public virtual string PhoneticEncoding { get; set; } + + /// + /// The phrase to which the customization will be applied. The phrase can be multiple words (in the case of + /// proper nouns etc), but should not span to a whole sentence. + /// + [Newtonsoft.Json.JsonPropertyAttribute("phrase")] + public virtual string Phrase { get; set; } + + /// The pronunciation of the phrase. This must be in the phonetic encoding specified above. + [Newtonsoft.Json.JsonPropertyAttribute("pronunciation")] + public virtual string Pronunciation { get; set; } + + /// The ETag of the item. + public virtual string ETag { get; set; } + } + + /// A collection of pronunciation customizations. + public class CustomPronunciations : Google.Apis.Requests.IDirectResponseSchema + { + /// The pronunciation customizations to be applied. + [Newtonsoft.Json.JsonPropertyAttribute("pronunciations")] + public virtual System.Collections.Generic.IList Pronunciations { get; set; } + + /// The ETag of the item. + public virtual string ETag { get; set; } + } + /// Description of the custom voice to be synthesized. public class CustomVoiceParams : Google.Apis.Requests.IDirectResponseSchema { @@ -709,6 +800,54 @@ public class CustomVoiceParams : Google.Apis.Requests.IDirectResponseSchema public virtual string ETag { get; set; } } + /// Request message for the `GenerateVoiceCloningKey` method. + public class GenerateVoiceCloningKeyRequest : Google.Apis.Requests.IDirectResponseSchema + { + /// + /// Required. The script used for the voice talent statement. The script will be provided to the caller through + /// other channels. It must be returned unchanged in this field. + /// + [Newtonsoft.Json.JsonPropertyAttribute("consentScript")] + public virtual string ConsentScript { get; set; } + + /// + /// Required. The language of the supplied audio as a [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) + /// language tag. Example: "en-US". See [Language + /// Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported + /// language codes. + /// + [Newtonsoft.Json.JsonPropertyAttribute("languageCode")] + public virtual string LanguageCode { get; set; } + + /// + /// Required. The training audio used to create voice clone. This is currently limited to LINEAR16 PCM WAV files + /// mono audio with 24khz sample rate. This needs to be specified in [InputAudio.audio_config], other values + /// will be explicitly rejected. + /// + [Newtonsoft.Json.JsonPropertyAttribute("referenceAudio")] + public virtual InputAudio ReferenceAudio { get; set; } + + /// Required. The voice talent audio used to verify consent to voice clone. + [Newtonsoft.Json.JsonPropertyAttribute("voiceTalentConsent")] + public virtual InputAudio VoiceTalentConsent { get; set; } + + /// The ETag of the item. + public virtual string ETag { get; set; } + } + + /// Response message for the `GenerateVoiceCloningKey` method. + public class GenerateVoiceCloningKeyResponse : Google.Apis.Requests.IDirectResponseSchema + { + /// + /// The voice clone key. Use it in the SynthesizeSpeechRequest by setting [voice.voice_clone.voice_cloning_key]. + /// + [Newtonsoft.Json.JsonPropertyAttribute("voiceCloningKey")] + public virtual string VoiceCloningKey { get; set; } + + /// The ETag of the item. + public virtual string ETag { get; set; } + } + /// Metadata for response returned by the `SynthesizeLongAudio` method. public class GoogleCloudTexttospeechV1beta1SynthesizeLongAudioMetadata : Google.Apis.Requests.IDirectResponseSchema { @@ -796,6 +935,40 @@ public virtual System.DateTimeOffset? StartTimeDateTimeOffset public virtual string ETag { get; set; } } + /// Holds audio content and config. + public class InputAudio : Google.Apis.Requests.IDirectResponseSchema + { + /// Required. Provides information that specifies how to process content. + [Newtonsoft.Json.JsonPropertyAttribute("audioConfig")] + public virtual InputAudioConfig AudioConfig { get; set; } + + /// + /// Required. The audio data bytes encoded as specified in `InputAudioConfig`. Note: as with all bytes fields, + /// proto buffers use a pure binary representation, whereas JSON representations use base64. Audio samples + /// should be between 5-25 seconds in length. + /// + [Newtonsoft.Json.JsonPropertyAttribute("content")] + public virtual string Content { get; set; } + + /// The ETag of the item. + public virtual string ETag { get; set; } + } + + /// Description of inputted audio data. + public class InputAudioConfig : Google.Apis.Requests.IDirectResponseSchema + { + /// Required. The format of the audio byte stream. + [Newtonsoft.Json.JsonPropertyAttribute("audioEncoding")] + public virtual string AudioEncoding { get; set; } + + /// Required. The sample rate (in hertz) for this audio. + [Newtonsoft.Json.JsonPropertyAttribute("sampleRateHertz")] + public virtual System.Nullable SampleRateHertz { get; set; } + + /// The ETag of the item. + public virtual string ETag { get; set; } + } + /// The response message for Operations.ListOperations. public class ListOperationsResponse : Google.Apis.Requests.IDirectResponseSchema { @@ -900,6 +1073,17 @@ public class Status : Google.Apis.Requests.IDirectResponseSchema /// public class SynthesisInput : Google.Apis.Requests.IDirectResponseSchema { + /// + /// Optional. The pronunciation customizations to be applied to the input. If this is set, the input will be + /// synthesized using the given pronunciation customizations. The initial support will be for EFIGS (English, + /// French, Italian, German, Spanish) languages, as provided in VoiceSelectionParams. Journey and Instant Clone + /// voices are not supported yet. In order to customize the pronunciation of a phrase, there must be an exact + /// match of the phrase in the input types. If using SSML, the phrase must not be inside a phoneme tag (entirely + /// or partially). + /// + [Newtonsoft.Json.JsonPropertyAttribute("customPronunciations")] + public virtual CustomPronunciations CustomPronunciations { get; set; } + /// /// The SSML document to be synthesized. The SSML document must be valid and well-formed. Otherwise the RPC will /// fail and return google.rpc.Code.INVALID_ARGUMENT. For more information, see @@ -1032,6 +1216,10 @@ public class SynthesizeLongAudioRequest : Google.Apis.Requests.IDirectResponseSc /// The top-level message sent by the client for the `SynthesizeSpeech` method. public class SynthesizeSpeechRequest : Google.Apis.Requests.IDirectResponseSchema { + /// Adnanced voice options. + [Newtonsoft.Json.JsonPropertyAttribute("advancedVoiceOptions")] + public virtual AdvancedVoiceOptions AdvancedVoiceOptions { get; set; } + /// Required. The configuration of the synthesized audio. [Newtonsoft.Json.JsonPropertyAttribute("audioConfig")] public virtual AudioConfig AudioConfig { get; set; } @@ -1121,6 +1309,17 @@ public class Voice : Google.Apis.Requests.IDirectResponseSchema public virtual string ETag { get; set; } } + /// The configuration of Voice Clone feature. + public class VoiceCloneParams : Google.Apis.Requests.IDirectResponseSchema + { + /// Required. Created by GenerateVoiceCloningKey. + [Newtonsoft.Json.JsonPropertyAttribute("voiceCloningKey")] + public virtual string VoiceCloningKey { get; set; } + + /// The ETag of the item. + public virtual string ETag { get; set; } + } + /// Description of which voice to use for a synthesis request. public class VoiceSelectionParams : Google.Apis.Requests.IDirectResponseSchema { @@ -1159,6 +1358,13 @@ public class VoiceSelectionParams : Google.Apis.Requests.IDirectResponseSchema [Newtonsoft.Json.JsonPropertyAttribute("ssmlGender")] public virtual string SsmlGender { get; set; } + /// + /// Optional. The configuration for a voice clone. If [VoiceCloneParams.voice_clone_key] is set, the service + /// will choose the voice clone matching the specified configuration. + /// + [Newtonsoft.Json.JsonPropertyAttribute("voiceClone")] + public virtual VoiceCloneParams VoiceClone { get; set; } + /// The ETag of the item. public virtual string ETag { get; set; } } diff --git a/Src/Generated/Google.Apis.Texttospeech.v1beta1/Google.Apis.Texttospeech.v1beta1.csproj b/Src/Generated/Google.Apis.Texttospeech.v1beta1/Google.Apis.Texttospeech.v1beta1.csproj index fc52e764a70..66414df7db1 100644 --- a/Src/Generated/Google.Apis.Texttospeech.v1beta1/Google.Apis.Texttospeech.v1beta1.csproj +++ b/Src/Generated/Google.Apis.Texttospeech.v1beta1/Google.Apis.Texttospeech.v1beta1.csproj @@ -3,7 +3,7 @@ Google.Apis.Texttospeech.v1beta1 Client Library - 1.68.0.3514 + 1.68.0.3561 Google LLC Copyright 2024 Google LLC Google