From 5995fbcf7099c02629bb6e8b0eac320beec10bac Mon Sep 17 00:00:00 2001 From: Insidious Fiddler Date: Mon, 11 Dec 2023 11:51:50 -0500 Subject: [PATCH] Feature: support Larynx TTS (#50) * Added Larynx, need to fix issues * Refactor generateWithLarynx method to improve error handling and file management * Update voice for Larynx in TextToSpeech.php --- src/app/Custom/TextToSpeech.php | 90 +++++++++++++++++-- .../API/TextToSpeechController.php | 2 +- 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/src/app/Custom/TextToSpeech.php b/src/app/Custom/TextToSpeech.php index d677af1..cc62669 100644 --- a/src/app/Custom/TextToSpeech.php +++ b/src/app/Custom/TextToSpeech.php @@ -2,6 +2,7 @@ namespace App\Custom; +use Illuminate\Support\Facades\Log; use Exception; use FFMpeg\FFMpeg; use FFMpeg\Format\Audio\Mp3; @@ -10,8 +11,9 @@ * Text to Speech Generator. * * Supported TTS engines: - * - VoiceRSS - * - Eleven Labs + * - VoiceRSS + * - Eleven Labs + * - Larynx (Beta) */ class TextToSpeech { @@ -27,7 +29,7 @@ class TextToSpeech * * @param string $text The text to be converted to speech. * @param string $language The language of the text (currently only used with VoiceRSS). - * @param string $engine The TTS engine to be used ('VoiceRSS', 'ElevenLabs', ...). + * @param string $engine The TTS engine to be used ('Larynx', 'VoiceRSS', 'ElevenLabs', ...). * @param array $options Additional options for the TTS engine: * - VoiceRSS: * - format: 'MP3' (default), ... @@ -41,8 +43,13 @@ class TextToSpeech * - similarity_boost: int (default 0), ... * - style: int (default 0), ... * - use_speaker_boost: bool (default true), ... - * - AnotherTTSAPI (if added later): - * - ... + * - Larynx: + * - voice: 'en-us/harvard-glow_tts' (default), ... + * - vocoder: 'hifi_gan/universal_large' (default), ... + * - denoiserStrength: float (default 0.01), ... + * - noiseScale: float (default 0.333), ... + * - lengthScale: float (default 1.0), ... + * - ssml: bool (default false), ... * @param array|null $apiKeys (Optional) The API keys for the TTS engines. Keys should be engine names with associated API key as value. * @throws Exception If the TTS engine is not supported. */ @@ -120,6 +127,14 @@ public function __construct($text, $language = 'en-us', $engine = 'VoiceRSS', $o 'use_speaker_boost' => true ] ], + 'Larynx' => [ + 'voice' => 'en-us/cmu_rms-glow_tts', + 'vocoder' => 'hifi_gan/universal_large', + 'denoiserStrength' => 0.002, + 'noiseScale' => 0.667, + 'lengthScale' => 0.85, + 'ssml' => 'false', + ] ]; // Check if the engine is supported @@ -146,6 +161,9 @@ public function generateAudio(): bool|string // Eleven Labs case 'ElevenLabs': return $this->generateWithElevenLabs(); + // Larynx + case 'Larynx': + return $this->generateWithLarynx(); // Unsupported engine default: throw new Exception('Unsupported TTS Engine'); @@ -237,6 +255,56 @@ private function generateWithElevenLabs() throw new Exception('Eleven Labs API Error'); // Handle error as per your requirements } + /** + * Generate the audio file using Larynx. + * + * @return bool|string Returns the generated audio data or false on failure + * @throws Exception If the API request fails + */ + private function generateWithLarynx() + { + $requesturl = "http://voice.vmgware.dev/api/tts?" . + "text=" . rawurlencode($this->text) . + "&voice=" . $this->OPTIONS['voice'] . + "&vocoder=" . $this->OPTIONS['vocoder'] . + "&denoiserStrength=" . $this->OPTIONS['denoiserStrength'] . + "&noiseScale=" . $this->OPTIONS['noiseScale'] . + "&lengthScale=" . $this->OPTIONS['lengthScale'] . + "&ssml=" . $this->OPTIONS['ssml']; + + $ch = curl_init($requesturl); + + $tmpfname = tempnam(sys_get_temp_dir(), 'tts'); + $fp = fopen($tmpfname, 'wb'); + + curl_setopt($ch, CURLOPT_FILE, $fp); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_TIMEOUT, 60); // Adjust timeout as needed + curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 1); + curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); + + curl_exec($ch); + if (curl_errno($ch)) { + throw new Exception('Larynx API Error: ' . curl_error($ch)); + } + + fclose($fp); + curl_close($ch); + + if (filesize($tmpfname) > 0) { + // return $tmpfname; // Return the path to the temporary file + // Return the audio data + $audioData = file_get_contents($tmpfname); + unlink($tmpfname); // Delete the temporary file + return $audioData; + } else { + throw new Exception('Larynx API Error: No response or empty file'); + } + } + + + /** * Convert the provided audio data to MP3. (Beta) * @@ -341,6 +409,18 @@ public static function validateCustomConfig(array $customConfig): bool return true; // Custom config is valid for ElevenLabs } + if ($engine === 'Larynx') { + // Validate custom config for Larynx engine (example rules) + $validOptions = ['voice', 'vocoder', 'denoiserStrength', 'noiseScale', 'lengthScale', 'ssml']; + + foreach ($customConfig[$engine] as $key => $value) { + if (!in_array($key, $validOptions)) { + return false; // Invalid option found + } + } + + return true; // Custom config is valid for Larynx + } return false; // Unsupported engine } diff --git a/src/app/Http/Controllers/API/TextToSpeechController.php b/src/app/Http/Controllers/API/TextToSpeechController.php index 67cfc25..33faddf 100644 --- a/src/app/Http/Controllers/API/TextToSpeechController.php +++ b/src/app/Http/Controllers/API/TextToSpeechController.php @@ -141,7 +141,7 @@ public function generate(Request $request): JsonResponse } // Make sure at least one API key is set - if (!TextToSpeech::hasApiKey() && !$options) { + if (!TextToSpeech::hasApiKey() && !$options && $engine != 'Larynx') { Log::error('Your server voice API configuration is incorrect. Please check your .env file.'); // Return the response